Skip to content

Commit cbad70b

Browse files
xpengahanaethanyzhangArchy-X
authored
Genddl (#27)
* Add sketch code * Add ddl generation (#11) * add genddl cmd * add generated examples * add analyze statement for unpartitioned hive schema --------- Co-authored-by: Xin Peng <[email protected]> --------- Co-authored-by: Yiqun (Ethan) Zhang <[email protected]> Co-authored-by: Archy-X <[email protected]>
1 parent 1a4d6aa commit cbad70b

File tree

70 files changed

+13268
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+13268
-0
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,6 @@ pbench_*
3232

3333
# tarball
3434
*.tar.gz
35+
36+
# genddl output dir
37+
cmd/genddl/out/

.idea/.gitignore

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/genddl.go

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package cmd
2+
3+
import (
4+
"github.com/spf13/cobra"
5+
"path/filepath"
6+
"pbench/cmd/genddl"
7+
)
8+
9+
var genddlCmd = &cobra.Command{
10+
Use: `genddl [config file]`,
11+
DisableFlagsInUseLine: true,
12+
Run: genddl.Run,
13+
Args: cobra.ExactArgs(1),
14+
ValidArgsFunction: fileCompletion,
15+
Short: "Generate DDL scripts based on a config file",
16+
}
17+
18+
func init() {
19+
RootCmd.AddCommand(genddlCmd)
20+
}
21+
22+
func fileCompletion(_ *cobra.Command, _ []string, toComplete string) ([]string, cobra.ShellCompDirective) {
23+
matches, err := filepath.Glob(toComplete + "*")
24+
if err != nil {
25+
return nil, cobra.ShellCompDirectiveDefault
26+
}
27+
28+
return matches, cobra.ShellCompDirectiveDefault
29+
}

cmd/genddl/aws_s3_cp.sh.tmpl

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{{- /*gotype:pbench/cmd/genddl.Schema*/ -}}
2+
{{- range .Tables }}
3+
{{- if .Partitioned -}}
4+
aws s3 cp --recursive s3://presto-workload-v2/{{ $.PartIcebergName }}/{{ .Name }}/data/{{ .LastColumn.Name }}=__HIVE_DEFAULT_PARTITION__/ s3://presto-workload-v2/{{ $.PartIcebergName }}/{{ .Name }}/data/{{ .LastColumn.Name }}=null/
5+
{{ end -}}
6+
{{ end -}}

cmd/genddl/aws_s3_mv.sh.tmpl

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{{- /*gotype:pbench/cmd/genddl.Schema*/ -}}
2+
{{- range .Tables }}
3+
{{- if and .Partitioned $.Partitioned -}}
4+
aws s3 mv --recursive s3://presto-workload-v2/{{ $.PartIcebergName }}/{{ .Name }}/data/{{ .LastColumn.Name }}=null/ s3://presto-workload-v2/{{ $.PartIcebergName }}/{{ .Name }}/data/{{ .LastColumn.Name }}=__HIVE_DEFAULT_PARTITION__/
5+
{{ end }}
6+
{{- end -}}

cmd/genddl/call_analyze.sql.tmpl

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{{- /*gotype:pbench/cmd/genddl.Schema*/ -}}
2+
USE hive.{{ .SchemaName }};
3+
4+
{{ range .Tables }}
5+
{{- if .Partitioned -}}
6+
CALL system.sync_partition_metadata('{{ $.SchemaName }}', '{{ .Name }}', 'FULL');
7+
{{ end }}
8+
{{- end }}
9+
{{ range .Tables -}}
10+
ANALYZE {{ .Name }};
11+
{{ end -}}

cmd/genddl/config.json

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"scale_factor": "10",
3+
"file_format": "parquet",
4+
"compression_method": "uncompressed"
5+
}

cmd/genddl/create_table.sql.tmpl

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{{- /*gotype:pbench/cmd/genddl.Schema*/ -}}
2+
{{ range $key, $value := .SessionVariables -}}
3+
SET SESSION {{ $key }}='{{ $value }}';
4+
{{ end }}
5+
CREATE SCHEMA IF NOT EXISTS {{ if .Iceberg }}iceberg.{{ .SchemaName }}
6+
{{- else }}hive.{{ .SchemaName }}
7+
{{- end }}
8+
WITH (
9+
location = 's3a://presto-workload-v2/{{ .LocationName }}/'
10+
);
11+
{{ if .Iceberg }}
12+
USE iceberg.{{ .SchemaName }};
13+
{{- else }}
14+
USE hive.{{ .SchemaName }};
15+
{{- end }}
16+
17+
{{- if .RegisterTables }}
18+
{{ range .RegisterTables }}
19+
CALL iceberg.system.register_table('{{ $.SchemaName }}', '{{ .TableName }}', 's3a://presto-workload-v2/{{ .ExternalLocation }}/{{ .TableName }}/metadata');
20+
{{- end }}
21+
{{- end }}
22+
{{ range .Tables }}
23+
CREATE TABLE IF NOT EXISTS {{ .Name }} (
24+
{{- $first := true }}
25+
{{- range .Columns }}
26+
{{- if $first }}
27+
{{- $first = false }}
28+
{{- else -}}
29+
,
30+
{{- end }}
31+
{{ .Name }} {{ .Type }}
32+
{{- end }}
33+
)
34+
WITH (
35+
format = 'PARQUET',
36+
{{- if $.Partitioned }}
37+
{{- if $.Iceberg}}
38+
partitioning = array['{{ .LastColumn.Name }}']
39+
{{- else if .Partitioned }}
40+
partitioned_by = array['{{ .LastColumn.Name }}'],
41+
external_location = 's3a://presto-workload-v2/{{ $.PartIcebergName }}/{{ .Name }}/data'
42+
{{- else }}
43+
external_location = 's3a://presto-workload-v2/{{ $.IcebergLocationName }}/{{ .Name }}/data/'
44+
{{- end }}
45+
{{- else if $.Iceberg }}
46+
location = 's3a://presto-workload-v2/{{ $.LocationName }}/{{ .Name }}'
47+
{{- else }}
48+
external_location = 's3a://presto-workload-v2/{{ $.IcebergLocationName }}/{{ .Name }}/data/'
49+
{{- end }}
50+
);
51+
{{ end }}
52+
{{- if (and (not .Iceberg) (not .Partitioned)) }}
53+
{{- range .InsertTables -}}
54+
ANALYZE {{ .Name }};
55+
{{ end -}}
56+
{{ end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
{
2+
"name": "call_center",
3+
"columns": [
4+
{
5+
"name": "cc_call_center_sk",
6+
"type": "INT"
7+
},
8+
{
9+
"name": "cc_call_center_id",
10+
"type": "VARCHAR(16)"
11+
},
12+
{
13+
"name": "cc_rec_start_date",
14+
"type": "DATE"
15+
},
16+
{
17+
"name": "cc_rec_end_date",
18+
"type": "DATE"
19+
},
20+
{
21+
"name": "cc_closed_date_sk",
22+
"type": "INT"
23+
},
24+
{
25+
"name": "cc_open_date_sk",
26+
"type": "INT"
27+
},
28+
{
29+
"name": "cc_name",
30+
"type": "VARCHAR(50)"
31+
},
32+
{
33+
"name": "cc_class",
34+
"type": "VARCHAR(50)"
35+
},
36+
{
37+
"name": "cc_employees",
38+
"type": "INT"
39+
},
40+
{
41+
"name": "cc_sq_ft",
42+
"type": "INT"
43+
},
44+
{
45+
"name": "cc_hours",
46+
"type": "VARCHAR(20)"
47+
},
48+
{
49+
"name": "cc_manager",
50+
"type": "VARCHAR(40)"
51+
},
52+
{
53+
"name": "cc_mkt_id",
54+
"type": "INT"
55+
},
56+
{
57+
"name": "cc_mkt_class",
58+
"type": "VARCHAR(50)"
59+
},
60+
{
61+
"name": "cc_mkt_desc",
62+
"type": "VARCHAR(100)"
63+
},
64+
{
65+
"name": "cc_market_manager",
66+
"type": "VARCHAR(40)"
67+
},
68+
{
69+
"name": "cc_division",
70+
"type": "INT"
71+
},
72+
{
73+
"name": "cc_division_name",
74+
"type": "VARCHAR(50)"
75+
},
76+
{
77+
"name": "cc_company",
78+
"type": "INT"
79+
},
80+
{
81+
"name": "cc_company_name",
82+
"type": "VARCHAR(50)"
83+
},
84+
{
85+
"name": "cc_street_number",
86+
"type": "VARCHAR(10)"
87+
},
88+
{
89+
"name": "cc_street_name",
90+
"type": "VARCHAR(60)"
91+
},
92+
{
93+
"name": "cc_street_type",
94+
"type": "VARCHAR(15)"
95+
},
96+
{
97+
"name": "cc_suite_number",
98+
"type": "VARCHAR(10)"
99+
},
100+
{
101+
"name": "cc_city",
102+
"type": "VARCHAR(60)"
103+
},
104+
{
105+
"name": "cc_county",
106+
"type": "VARCHAR(30)"
107+
},
108+
{
109+
"name": "cc_state",
110+
"type": "VARCHAR(2)"
111+
},
112+
{
113+
"name": "cc_zip",
114+
"type": "VARCHAR(10)"
115+
},
116+
{
117+
"name": "cc_country",
118+
"type": "VARCHAR(20)"
119+
},
120+
{
121+
"name": "cc_gmt_offset",
122+
"type": "DECIMAL(5,2)"
123+
},
124+
{
125+
"name": "cc_tax_percentage",
126+
"type": "DECIMAL(5,2)"
127+
}
128+
]
129+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"name": "catalog_page",
3+
"columns": [
4+
{
5+
"name": "cp_catalog_page_sk",
6+
"type": "INT"
7+
},
8+
{
9+
"name": "cp_catalog_page_id",
10+
"type": "VARCHAR(16)"
11+
},
12+
{
13+
"name": "cp_start_date_sk",
14+
"type": "INT"
15+
},
16+
{
17+
"name": "cp_end_date_sk",
18+
"type": "INT"
19+
},
20+
{
21+
"name": "cp_department",
22+
"type": "VARCHAR(50)"
23+
},
24+
{
25+
"name": "cp_catalog_number",
26+
"type": "INT"
27+
},
28+
{
29+
"name": "cp_catalog_page_number",
30+
"type": "INT"
31+
},
32+
{
33+
"name": "cp_description",
34+
"type": "VARCHAR(100)"
35+
},
36+
{
37+
"name": "cp_type",
38+
"type": "VARCHAR(100)"
39+
}
40+
]
41+
}

0 commit comments

Comments
 (0)