cmu-db
diff --git a/‎build_support/generate_flex.py
Lines changed: 24 additions & 0 deletions b/‎build_support/generate_flex.py
Lines changed: 24 additions & 0 deletions
diff --git a/‎build_support/generate_grammar.py
Lines changed: 326 additions & 0 deletions b/‎build_support/generate_grammar.py
Lines changed: 326 additions & 0 deletions
diff --git a/‎test/binder/binder_test.cpp
Lines changed: 2 additions & 2 deletions b/‎test/binder/binder_test.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎third_party/libpg_query/grammar/statements/create.y
Lines changed: 1 addition & 1 deletion b/‎third_party/libpg_query/grammar/statements/create.y
Lines changed: 1 addition & 1 deletion
@@ -1,3 +1,25 @@
+# ===----------------------------------------------------------------------===//
+# Copyright 2018-2022 Stichting DuckDB Foundation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next paragraph)
+# shall be included in all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+# ===----------------------------------------------------------------------===//
+
 # use flex to generate the scanner file for the parser
 # the following version of bison is used:
 # flex 2.5.35 Apple(flex-32)
@@ -7,13 +29,15 @@
 from sys import platform
 import sys
 
+
 def open_utf8(fpath, flags):
     import sys
     if sys.version_info[0] < 3:
         return open(fpath, flags)
     else:
         return open(fpath, flags, encoding="utf8")
 
+
 flex_bin = 'flex'
 for arg in sys.argv[1:]:
     if arg.startswith("--flex="):
 
@@ -0,0 +1,326 @@
+# ===----------------------------------------------------------------------===//
+# Copyright 2018-2022 Stichting DuckDB Foundation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next paragraph)
+# shall be included in all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+# ===----------------------------------------------------------------------===//
+
+# use bison to generate the parser files
+# the following version of bison is used:
+# bison (GNU Bison) 2.3
+import os
+import subprocess
+import re
+import sys
+
+
+def open_utf8(fpath, flags):
+    import sys
+    if sys.version_info[0] < 3:
+        return open(fpath, flags)
+    else:
+        return open(fpath, flags, encoding="utf8")
+
+
+bison_location = "bison"
+base_dir = 'third_party/libpg_query/grammar'
+pg_dir = 'third_party/libpg_query'
+template_file = os.path.join(base_dir, 'grammar.y')
+target_file = os.path.join(base_dir, 'grammar.y.tmp')
+header_file = os.path.join(base_dir, 'grammar.hpp')
+source_file = os.path.join(base_dir, 'grammar.cpp')
+type_dir = os.path.join(base_dir, 'types')
+rule_dir = os.path.join(base_dir, 'statements')
+result_source = os.path.join(base_dir, 'grammar_out.cpp')
+result_header = os.path.join(base_dir, 'grammar_out.hpp')
+target_source_loc = os.path.join(pg_dir, 'src_backend_parser_gram.cpp')
+target_header_loc = os.path.join(pg_dir, 'include/parser/gram.hpp')
+kwlist_header = os.path.join(pg_dir, 'include/parser/kwlist.hpp')
+
+counterexamples = False
+run_update = False
+for arg in sys.argv[1:]:
+    if arg.startswith("--bison="):
+        bison_location = arg.replace("--bison=", "")
+    elif arg.startswith("--counterexamples"):
+        counterexamples = True
+    elif arg.startswith("--update"):
+        run_update = True
+    else:
+        raise Exception("Unrecognized argument: " + arg +
+                        ", expected --counterexamples or --bison=/loc/to/bison")
+
+# parse the keyword lists
+
+
+def read_list_from_file(fname):
+    with open_utf8(fname, 'r') as f:
+        return [x.strip() for x in f.read().split('\n') if len(x.strip()) > 0]
+
+
+kwdir = os.path.join(base_dir, 'keywords')
+unreserved_keywords = read_list_from_file(
+    os.path.join(kwdir, 'unreserved_keywords.list'))
+colname_keywords = read_list_from_file(
+    os.path.join(kwdir, 'column_name_keywords.list'))
+func_name_keywords = read_list_from_file(
+    os.path.join(kwdir, 'func_name_keywords.list'))
+type_name_keywords = read_list_from_file(
+    os.path.join(kwdir, 'type_name_keywords.list'))
+reserved_keywords = read_list_from_file(
+    os.path.join(kwdir, 'reserved_keywords.list'))
+
+
+def strip_p(x):
+    if x.endswith("_P"):
+        return x[:-2]
+    else:
+        return x
+
+
+unreserved_keywords.sort(key=lambda x: strip_p(x))
+colname_keywords.sort(key=lambda x: strip_p(x))
+func_name_keywords.sort(key=lambda x: strip_p(x))
+type_name_keywords.sort(key=lambda x: strip_p(x))
+reserved_keywords.sort(key=lambda x: strip_p(x))
+
+statements = read_list_from_file(os.path.join(base_dir, 'statements.list'))
+statements.sort()
+if len(statements) < 0:
+    print("Need at least one statement")
+    exit(1)
+
+# verify there are no duplicate keywords and create big sorted list of keywords
+kwdict = {}
+for kw in unreserved_keywords:
+    kwdict[kw] = 'UNRESERVED_KEYWORD'
+
+for kw in colname_keywords:
+    kwdict[kw] = 'COL_NAME_KEYWORD'
+
+for kw in func_name_keywords:
+    kwdict[kw] = 'TYPE_FUNC_NAME_KEYWORD'
+
+for kw in type_name_keywords:
+    kwdict[kw] = 'TYPE_FUNC_NAME_KEYWORD'
+
+for kw in reserved_keywords:
+    kwdict[kw] = 'RESERVED_KEYWORD'
+
+kwlist = [(x, kwdict[x]) for x in kwdict.keys()]
+kwlist.sort(key=lambda x: strip_p(x[0]))
+
+# now generate kwlist.h
+# PG_KEYWORD("abort", ABORT_P, UNRESERVED_KEYWORD)
+kwtext = """
+namespace duckdb_libpgquery {
+#define PG_KEYWORD(a,b,c) {a,b,c},
+const PGScanKeyword ScanKeywords[] = {
+"""
+for tpl in kwlist:
+    kwtext += 'PG_KEYWORD("%s", %s, %s)\n' % (
+        strip_p(tpl[0]).lower(), tpl[0], tpl[1])
+kwtext += """
+};
+const int NumScanKeywords = lengthof(ScanKeywords);
+} // namespace duckdb_libpgquery
+"""
+
+with open_utf8(kwlist_header, 'w+') as f:
+    f.write(kwtext)
+
+
+# generate the final main.y.tmp file
+# first read the template file
+with open_utf8(template_file, 'r') as f:
+    text = f.read()
+
+# now perform a series of replacements in the file to construct the final yacc file
+
+
+def get_file_contents(fpath, add_line_numbers=False):
+    with open_utf8(fpath, 'r') as f:
+        result = f.read()
+        if add_line_numbers:
+            return '#line 1 "%s"\n' % (fpath,) + result
+        else:
+            return result
+
+
+# grammar.hpp
+text = text.replace("{{{ GRAMMAR_HEADER }}}",
+                    get_file_contents(header_file, True))
+
+# grammar.cpp
+text = text.replace("{{{ GRAMMAR_SOURCE }}}",
+                    get_file_contents(source_file, True))
+
+# keyword list
+kw_token_list = "%token <keyword> " + " ".join([x[0] for x in kwlist])
+
+text = text.replace("{{{ KEYWORDS }}}", kw_token_list)
+
+# statements
+stmt_list = "stmt: " + \
+    "\n\t| ".join(statements) + "\n\t| /*EMPTY*/\n\t{ $$ = NULL; }\n"
+text = text.replace("{{{ STATEMENTS }}}", stmt_list)
+
+# keywords
+# keywords can EITHER be reserved, unreserved, or some combination of (col_name, type_name, func_name)
+# that means duplicates are ONLY allowed between (col_name, type_name and func_name)
+# having a keyword be both reserved and unreserved is an error
+# as is having a keyword both reserved and col_name, for example
+# verify that this is the case
+reserved_dict = {}
+unreserved_dict = {}
+other_dict = {}
+for r in reserved_keywords:
+    if r in reserved_dict:
+        print("Duplicate keyword " + r + " in reserved keywords")
+        exit(1)
+    reserved_dict[r] = True
+
+for ur in unreserved_keywords:
+    if ur in unreserved_dict:
+        print("Duplicate keyword " + ur + " in unreserved keywords")
+        exit(1)
+    if ur in reserved_dict:
+        print("Keyword " + ur + " is marked as both unreserved and reserved")
+        exit(1)
+    unreserved_dict[ur] = True
+
+
+def add_to_other_keywords(kw, list_name):
+    global unreserved_dict
+    global reserved_dict
+    global other_dict
+    if kw in unreserved_dict:
+        print("Keyword " + kw + " is marked as both unreserved and " + list_name)
+        exit(1)
+    if kw in reserved_dict:
+        print("Keyword " + kw + " is marked as both reserved and " + list_name)
+        exit(1)
+    other_dict[kw] = True
+
+
+for cr in colname_keywords:
+    add_to_other_keywords(cr, "colname")
+
+type_func_name_dict = {}
+for tr in type_name_keywords:
+    add_to_other_keywords(tr, "typename")
+    type_func_name_dict[tr] = True
+
+for fr in func_name_keywords:
+    add_to_other_keywords(fr, "funcname")
+    type_func_name_dict[fr] = True
+
+type_func_name_keywords = list(type_func_name_dict.keys())
+type_func_name_keywords.sort()
+
+all_keywords = list(reserved_dict.keys()) + \
+    list(unreserved_dict.keys()) + list(other_dict.keys())
+all_keywords.sort()
+
+other_keyword = list(other_dict.keys())
+other_keyword.sort()
+
+kw_definitions = "unreserved_keyword: " + \
+    " | ".join(unreserved_keywords) + "\n"
+kw_definitions += "col_name_keyword: " + " | ".join(colname_keywords) + "\n"
+kw_definitions += "func_name_keyword: " + " | ".join(func_name_keywords) + "\n"
+kw_definitions += "type_name_keyword: " + " | ".join(type_name_keywords) + "\n"
+kw_definitions += "other_keyword: " + " | ".join(other_keyword) + "\n"
+kw_definitions += "type_func_name_keyword: " + \
+    " | ".join(type_func_name_keywords) + "\n"
+kw_definitions += "reserved_keyword: " + " | ".join(reserved_keywords) + "\n"
+text = text.replace("{{{ KEYWORD_DEFINITIONS }}}", kw_definitions)
+
+# types
+
+
+def concat_dir(dname, extension, add_line_numbers=False):
+    result = ""
+    for fname in os.listdir(dname):
+        fpath = os.path.join(dname, fname)
+        if os.path.isdir(fpath):
+            result += concat_dir(fpath, extension)
+        else:
+            if not fname.endswith(extension):
+                continue
+            result += get_file_contents(fpath, add_line_numbers)
+    return result
+
+
+type_definitions = concat_dir(type_dir, ".yh")
+# add statement types as well
+for stmt in statements:
+    type_definitions += "%type <node> " + stmt + "\n"
+
+text = text.replace("{{{ TYPES }}}", type_definitions)
+
+# grammar rules
+grammar_rules = concat_dir(rule_dir, ".y", True)
+
+text = text.replace("{{{ GRAMMAR RULES }}}", grammar_rules)
+
+# finally write the yacc file into the target file
+with open_utf8(target_file, 'w+') as f:
+    f.write(text)
+
+# generate the bison
+cmd = [bison_location]
+if counterexamples:
+    print("Attempting to print counterexamples (-Wcounterexamples)")
+    cmd += ["-Wcounterexamples"]
+if run_update:
+    cmd += ["--update"]
+cmd += ["-o", result_source, "-d", target_file]
+print(' '.join(cmd))
+proc = subprocess.Popen(cmd, stderr=subprocess.PIPE)
+res = proc.wait()
+
+if res != 0:
+    text = proc.stderr.read().decode('utf8')
+    print(text)
+    if 'shift/reduce' in text and not counterexamples:
+        print("---------------------------------------------------------------------")
+        print("In case of shift/reduce conflicts, try re-running with --counterexamples")
+        print("Note: this requires a more recent version of Bison (e.g. version 3.8)")
+        print("On a Macbook you can obtain this using \"brew install bison\"")
+    if counterexamples and 'time limit exceeded' in text:
+        print("---------------------------------------------------------------------")
+        print("The counterexamples time limit was exceeded. This likely means that no useful counterexample was generated.")
+        print("")
+        print("The counterexamples time limit can be increased by setting the TIME_LIMIT environment variable, e.g.:")
+        print("export TIME_LIMIT=100")
+    exit(1)
+
+
+os.rename(result_source, target_source_loc)
+os.rename(result_header, target_header_loc)
+
+with open_utf8(target_source_loc, 'r') as f:
+    text = f.read()
+
+text = text.replace('#include "grammar_out.hpp"',
+                    '#include "include/parser/gram.hpp"')
+text = text.replace('yynerrs = 0;', 'yynerrs = 0; (void)yynerrs;')
+
+with open_utf8(target_source_loc, 'w+') as f:
+    f.write(text)
@@ -173,8 +173,8 @@ TEST(BinderTest, BindBinaryOp) {
   PrintStatements(statements);
 }
 
-TEST(BinderTest, BindCopyFrom) {
-  auto statements = TryBind("copy from");
+TEST(BinderTest, DIABLED_BindCopyFrom) {
+  auto statements = TryBind("copy from 'a.csv'");
   PrintStatements(statements);
 }
 
 
@@ -222,7 +222,7 @@ GeneratedColumnType:
 			;
 
 opt_GeneratedColumnType:
-			GeneratedColumnType { $$ = $1 }
+			GeneratedColumnType { $$ = $1; }
 			| /* EMPTY */ { $$ = PG_CONSTR_GENERATED_VIRTUAL; }
 			;
Original file line number	Diff line number	Diff line change
`@@ -173,8 +173,8 @@ TEST(BinderTest, BindBinaryOp) {`
`173`	`173`	`PrintStatements(statements);`
`174`	`174`	`}`
`175`	`175`
`176`		`-TEST(BinderTest, BindCopyFrom) {`
`177`		`- auto statements = TryBind("copy from");`
	`176`	`+TEST(BinderTest, DIABLED_BindCopyFrom) {`
	`177`	`+ auto statements = TryBind("copy from 'a.csv'");`
`178`	`178`	`PrintStatements(statements);`
`179`	`179`	`}`
`180`	`180`
Original file line number	Diff line number	Diff line change
`@@ -222,7 +222,7 @@ GeneratedColumnType:`
`222`	`222`	`;`
`223`	`223`
`224`	`224`	`opt_GeneratedColumnType:`
`225`		`- GeneratedColumnType { $$ = $1 }`
	`225`	`+ GeneratedColumnType { $$ = $1; }`
`226`	`226`	`\| /* EMPTY */ { $$ = PG_CONSTR_GENERATED_VIRTUAL; }`
`227`	`227`	`;`
`228`	`228`