Skip to content

Commit 85bef31

Browse files
committed
[fix] parser support copy from 'a.csv'
Signed-off-by: clundro <[email protected]>
1 parent e1d7df2 commit 85bef31

File tree

7 files changed

+20913
-21062
lines changed

7 files changed

+20913
-21062
lines changed

build_support/generate_flex.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,25 @@
1+
# ===----------------------------------------------------------------------===//
2+
# Copyright 2018-2022 Stichting DuckDB Foundation
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy
5+
# of this software and associated documentation files (the "Software"), to deal
6+
# in the Software without restriction, including without limitation the rights
7+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
# copies of the Software, and to permit persons to whom the Software is
9+
# furnished to do so, subject to the following conditions:
10+
11+
# The above copyright notice and this permission notice (including the next paragraph)
12+
# shall be included in all copies or substantial portions of the Software.
13+
14+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20+
# THE SOFTWARE.
21+
# ===----------------------------------------------------------------------===//
22+
123
# use flex to generate the scanner file for the parser
224
# the following version of bison is used:
325
# flex 2.5.35 Apple(flex-32)
@@ -7,13 +29,15 @@
729
from sys import platform
830
import sys
931

32+
1033
def open_utf8(fpath, flags):
1134
import sys
1235
if sys.version_info[0] < 3:
1336
return open(fpath, flags)
1437
else:
1538
return open(fpath, flags, encoding="utf8")
1639

40+
1741
flex_bin = 'flex'
1842
for arg in sys.argv[1:]:
1943
if arg.startswith("--flex="):

build_support/generate_grammar.py

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
# ===----------------------------------------------------------------------===//
2+
# Copyright 2018-2022 Stichting DuckDB Foundation
3+
#
4+
# Permission is hereby granted, free of charge, to any person obtaining a copy
5+
# of this software and associated documentation files (the "Software"), to deal
6+
# in the Software without restriction, including without limitation the rights
7+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
# copies of the Software, and to permit persons to whom the Software is
9+
# furnished to do so, subject to the following conditions:
10+
11+
# The above copyright notice and this permission notice (including the next paragraph)
12+
# shall be included in all copies or substantial portions of the Software.
13+
14+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20+
# THE SOFTWARE.
21+
# ===----------------------------------------------------------------------===//
22+
23+
# use bison to generate the parser files
24+
# the following version of bison is used:
25+
# bison (GNU Bison) 2.3
26+
import os
27+
import subprocess
28+
import re
29+
import sys
30+
31+
32+
def open_utf8(fpath, flags):
33+
import sys
34+
if sys.version_info[0] < 3:
35+
return open(fpath, flags)
36+
else:
37+
return open(fpath, flags, encoding="utf8")
38+
39+
40+
bison_location = "bison"
41+
base_dir = 'third_party/libpg_query/grammar'
42+
pg_dir = 'third_party/libpg_query'
43+
template_file = os.path.join(base_dir, 'grammar.y')
44+
target_file = os.path.join(base_dir, 'grammar.y.tmp')
45+
header_file = os.path.join(base_dir, 'grammar.hpp')
46+
source_file = os.path.join(base_dir, 'grammar.cpp')
47+
type_dir = os.path.join(base_dir, 'types')
48+
rule_dir = os.path.join(base_dir, 'statements')
49+
result_source = os.path.join(base_dir, 'grammar_out.cpp')
50+
result_header = os.path.join(base_dir, 'grammar_out.hpp')
51+
target_source_loc = os.path.join(pg_dir, 'src_backend_parser_gram.cpp')
52+
target_header_loc = os.path.join(pg_dir, 'include/parser/gram.hpp')
53+
kwlist_header = os.path.join(pg_dir, 'include/parser/kwlist.hpp')
54+
55+
counterexamples = False
56+
run_update = False
57+
for arg in sys.argv[1:]:
58+
if arg.startswith("--bison="):
59+
bison_location = arg.replace("--bison=", "")
60+
elif arg.startswith("--counterexamples"):
61+
counterexamples = True
62+
elif arg.startswith("--update"):
63+
run_update = True
64+
else:
65+
raise Exception("Unrecognized argument: " + arg +
66+
", expected --counterexamples or --bison=/loc/to/bison")
67+
68+
# parse the keyword lists
69+
70+
71+
def read_list_from_file(fname):
72+
with open_utf8(fname, 'r') as f:
73+
return [x.strip() for x in f.read().split('\n') if len(x.strip()) > 0]
74+
75+
76+
kwdir = os.path.join(base_dir, 'keywords')
77+
unreserved_keywords = read_list_from_file(
78+
os.path.join(kwdir, 'unreserved_keywords.list'))
79+
colname_keywords = read_list_from_file(
80+
os.path.join(kwdir, 'column_name_keywords.list'))
81+
func_name_keywords = read_list_from_file(
82+
os.path.join(kwdir, 'func_name_keywords.list'))
83+
type_name_keywords = read_list_from_file(
84+
os.path.join(kwdir, 'type_name_keywords.list'))
85+
reserved_keywords = read_list_from_file(
86+
os.path.join(kwdir, 'reserved_keywords.list'))
87+
88+
89+
def strip_p(x):
90+
if x.endswith("_P"):
91+
return x[:-2]
92+
else:
93+
return x
94+
95+
96+
unreserved_keywords.sort(key=lambda x: strip_p(x))
97+
colname_keywords.sort(key=lambda x: strip_p(x))
98+
func_name_keywords.sort(key=lambda x: strip_p(x))
99+
type_name_keywords.sort(key=lambda x: strip_p(x))
100+
reserved_keywords.sort(key=lambda x: strip_p(x))
101+
102+
statements = read_list_from_file(os.path.join(base_dir, 'statements.list'))
103+
statements.sort()
104+
if len(statements) < 0:
105+
print("Need at least one statement")
106+
exit(1)
107+
108+
# verify there are no duplicate keywords and create big sorted list of keywords
109+
kwdict = {}
110+
for kw in unreserved_keywords:
111+
kwdict[kw] = 'UNRESERVED_KEYWORD'
112+
113+
for kw in colname_keywords:
114+
kwdict[kw] = 'COL_NAME_KEYWORD'
115+
116+
for kw in func_name_keywords:
117+
kwdict[kw] = 'TYPE_FUNC_NAME_KEYWORD'
118+
119+
for kw in type_name_keywords:
120+
kwdict[kw] = 'TYPE_FUNC_NAME_KEYWORD'
121+
122+
for kw in reserved_keywords:
123+
kwdict[kw] = 'RESERVED_KEYWORD'
124+
125+
kwlist = [(x, kwdict[x]) for x in kwdict.keys()]
126+
kwlist.sort(key=lambda x: strip_p(x[0]))
127+
128+
# now generate kwlist.h
129+
# PG_KEYWORD("abort", ABORT_P, UNRESERVED_KEYWORD)
130+
kwtext = """
131+
namespace duckdb_libpgquery {
132+
#define PG_KEYWORD(a,b,c) {a,b,c},
133+
const PGScanKeyword ScanKeywords[] = {
134+
"""
135+
for tpl in kwlist:
136+
kwtext += 'PG_KEYWORD("%s", %s, %s)\n' % (
137+
strip_p(tpl[0]).lower(), tpl[0], tpl[1])
138+
kwtext += """
139+
};
140+
const int NumScanKeywords = lengthof(ScanKeywords);
141+
} // namespace duckdb_libpgquery
142+
"""
143+
144+
with open_utf8(kwlist_header, 'w+') as f:
145+
f.write(kwtext)
146+
147+
148+
# generate the final main.y.tmp file
149+
# first read the template file
150+
with open_utf8(template_file, 'r') as f:
151+
text = f.read()
152+
153+
# now perform a series of replacements in the file to construct the final yacc file
154+
155+
156+
def get_file_contents(fpath, add_line_numbers=False):
157+
with open_utf8(fpath, 'r') as f:
158+
result = f.read()
159+
if add_line_numbers:
160+
return '#line 1 "%s"\n' % (fpath,) + result
161+
else:
162+
return result
163+
164+
165+
# grammar.hpp
166+
text = text.replace("{{{ GRAMMAR_HEADER }}}",
167+
get_file_contents(header_file, True))
168+
169+
# grammar.cpp
170+
text = text.replace("{{{ GRAMMAR_SOURCE }}}",
171+
get_file_contents(source_file, True))
172+
173+
# keyword list
174+
kw_token_list = "%token <keyword> " + " ".join([x[0] for x in kwlist])
175+
176+
text = text.replace("{{{ KEYWORDS }}}", kw_token_list)
177+
178+
# statements
179+
stmt_list = "stmt: " + \
180+
"\n\t| ".join(statements) + "\n\t| /*EMPTY*/\n\t{ $$ = NULL; }\n"
181+
text = text.replace("{{{ STATEMENTS }}}", stmt_list)
182+
183+
# keywords
184+
# keywords can EITHER be reserved, unreserved, or some combination of (col_name, type_name, func_name)
185+
# that means duplicates are ONLY allowed between (col_name, type_name and func_name)
186+
# having a keyword be both reserved and unreserved is an error
187+
# as is having a keyword both reserved and col_name, for example
188+
# verify that this is the case
189+
reserved_dict = {}
190+
unreserved_dict = {}
191+
other_dict = {}
192+
for r in reserved_keywords:
193+
if r in reserved_dict:
194+
print("Duplicate keyword " + r + " in reserved keywords")
195+
exit(1)
196+
reserved_dict[r] = True
197+
198+
for ur in unreserved_keywords:
199+
if ur in unreserved_dict:
200+
print("Duplicate keyword " + ur + " in unreserved keywords")
201+
exit(1)
202+
if ur in reserved_dict:
203+
print("Keyword " + ur + " is marked as both unreserved and reserved")
204+
exit(1)
205+
unreserved_dict[ur] = True
206+
207+
208+
def add_to_other_keywords(kw, list_name):
209+
global unreserved_dict
210+
global reserved_dict
211+
global other_dict
212+
if kw in unreserved_dict:
213+
print("Keyword " + kw + " is marked as both unreserved and " + list_name)
214+
exit(1)
215+
if kw in reserved_dict:
216+
print("Keyword " + kw + " is marked as both reserved and " + list_name)
217+
exit(1)
218+
other_dict[kw] = True
219+
220+
221+
for cr in colname_keywords:
222+
add_to_other_keywords(cr, "colname")
223+
224+
type_func_name_dict = {}
225+
for tr in type_name_keywords:
226+
add_to_other_keywords(tr, "typename")
227+
type_func_name_dict[tr] = True
228+
229+
for fr in func_name_keywords:
230+
add_to_other_keywords(fr, "funcname")
231+
type_func_name_dict[fr] = True
232+
233+
type_func_name_keywords = list(type_func_name_dict.keys())
234+
type_func_name_keywords.sort()
235+
236+
all_keywords = list(reserved_dict.keys()) + \
237+
list(unreserved_dict.keys()) + list(other_dict.keys())
238+
all_keywords.sort()
239+
240+
other_keyword = list(other_dict.keys())
241+
other_keyword.sort()
242+
243+
kw_definitions = "unreserved_keyword: " + \
244+
" | ".join(unreserved_keywords) + "\n"
245+
kw_definitions += "col_name_keyword: " + " | ".join(colname_keywords) + "\n"
246+
kw_definitions += "func_name_keyword: " + " | ".join(func_name_keywords) + "\n"
247+
kw_definitions += "type_name_keyword: " + " | ".join(type_name_keywords) + "\n"
248+
kw_definitions += "other_keyword: " + " | ".join(other_keyword) + "\n"
249+
kw_definitions += "type_func_name_keyword: " + \
250+
" | ".join(type_func_name_keywords) + "\n"
251+
kw_definitions += "reserved_keyword: " + " | ".join(reserved_keywords) + "\n"
252+
text = text.replace("{{{ KEYWORD_DEFINITIONS }}}", kw_definitions)
253+
254+
# types
255+
256+
257+
def concat_dir(dname, extension, add_line_numbers=False):
258+
result = ""
259+
for fname in os.listdir(dname):
260+
fpath = os.path.join(dname, fname)
261+
if os.path.isdir(fpath):
262+
result += concat_dir(fpath, extension)
263+
else:
264+
if not fname.endswith(extension):
265+
continue
266+
result += get_file_contents(fpath, add_line_numbers)
267+
return result
268+
269+
270+
type_definitions = concat_dir(type_dir, ".yh")
271+
# add statement types as well
272+
for stmt in statements:
273+
type_definitions += "%type <node> " + stmt + "\n"
274+
275+
text = text.replace("{{{ TYPES }}}", type_definitions)
276+
277+
# grammar rules
278+
grammar_rules = concat_dir(rule_dir, ".y", True)
279+
280+
text = text.replace("{{{ GRAMMAR RULES }}}", grammar_rules)
281+
282+
# finally write the yacc file into the target file
283+
with open_utf8(target_file, 'w+') as f:
284+
f.write(text)
285+
286+
# generate the bison
287+
cmd = [bison_location]
288+
if counterexamples:
289+
print("Attempting to print counterexamples (-Wcounterexamples)")
290+
cmd += ["-Wcounterexamples"]
291+
if run_update:
292+
cmd += ["--update"]
293+
cmd += ["-o", result_source, "-d", target_file]
294+
print(' '.join(cmd))
295+
proc = subprocess.Popen(cmd, stderr=subprocess.PIPE)
296+
res = proc.wait()
297+
298+
if res != 0:
299+
text = proc.stderr.read().decode('utf8')
300+
print(text)
301+
if 'shift/reduce' in text and not counterexamples:
302+
print("---------------------------------------------------------------------")
303+
print("In case of shift/reduce conflicts, try re-running with --counterexamples")
304+
print("Note: this requires a more recent version of Bison (e.g. version 3.8)")
305+
print("On a Macbook you can obtain this using \"brew install bison\"")
306+
if counterexamples and 'time limit exceeded' in text:
307+
print("---------------------------------------------------------------------")
308+
print("The counterexamples time limit was exceeded. This likely means that no useful counterexample was generated.")
309+
print("")
310+
print("The counterexamples time limit can be increased by setting the TIME_LIMIT environment variable, e.g.:")
311+
print("export TIME_LIMIT=100")
312+
exit(1)
313+
314+
315+
os.rename(result_source, target_source_loc)
316+
os.rename(result_header, target_header_loc)
317+
318+
with open_utf8(target_source_loc, 'r') as f:
319+
text = f.read()
320+
321+
text = text.replace('#include "grammar_out.hpp"',
322+
'#include "include/parser/gram.hpp"')
323+
text = text.replace('yynerrs = 0;', 'yynerrs = 0; (void)yynerrs;')
324+
325+
with open_utf8(target_source_loc, 'w+') as f:
326+
f.write(text)

test/binder/binder_test.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ TEST(BinderTest, BindBinaryOp) {
173173
PrintStatements(statements);
174174
}
175175

176-
TEST(BinderTest, BindCopyFrom) {
177-
auto statements = TryBind("copy from");
176+
TEST(BinderTest, DIABLED_BindCopyFrom) {
177+
auto statements = TryBind("copy from 'a.csv'");
178178
PrintStatements(statements);
179179
}
180180

third_party/libpg_query/grammar/statements/create.y

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ GeneratedColumnType:
222222
;
223223

224224
opt_GeneratedColumnType:
225-
GeneratedColumnType { $$ = $1 }
225+
GeneratedColumnType { $$ = $1; }
226226
| /* EMPTY */ { $$ = PG_CONSTR_GENERATED_VIRTUAL; }
227227
;
228228

0 commit comments

Comments
 (0)