Skip to content

Commit 775ffdf

Browse files
suxiaogang223Doris-Extras
authored andcommitted
[docker](hive) add hive3 docker compose and modify scripts (#33115)
add hive3 docker compose from: big-data-europe/docker-hive#56
1 parent 8640246 commit 775ffdf

20 files changed

+304
-121
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
19+
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
20+
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
21+
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
22+
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
23+
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
24+
HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0
25+
HIVE_SITE_CONF_hive_server2_thrift_port=10000
26+
HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader
27+
28+
CORE_CONF_fs_defaultFS=hdfs://${IP_HOST}:${FS_PORT}
29+
CORE_CONF_hadoop_http_staticuser_user=root
30+
CORE_CONF_hadoop_proxyuser_hue_hosts=*
31+
CORE_CONF_hadoop_proxyuser_hue_groups=*
32+
33+
HDFS_CONF_dfs_webhdfs_enabled=true
34+
HDFS_CONF_dfs_permissions_enabled=false
35+
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
36+
37+
YARN_CONF_yarn_log___aggregation___enable=true
38+
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
39+
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
40+
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
41+
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
42+
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
43+
YARN_CONF_yarn_timeline___service_enabled=true
44+
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
45+
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
46+
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
47+
YARN_CONF_yarn_timeline___service_hostname=historyserver
48+
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
49+
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
50+
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031

docker/thirdparties/docker-compose/hive/hadoop-hive.env.tpl.tpl renamed to docker/thirdparties/docker-compose/hive/hadoop-hive.env

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,24 @@
1515
# limitations under the License.
1616
#
1717

18-
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://externalEnvIp:5432/metastore
18+
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
1919
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
2020
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
2121
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
2222
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
23-
HIVE_SITE_CONF_hive_metastore_uris=thrift://externalEnvIp:9083
24-
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
23+
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
2524
HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0
2625
HIVE_SITE_CONF_hive_server2_thrift_port=10000
27-
HIVE_SITE_CONF_hive_compactor_initiator_on=true
28-
HIVE_SITE_CONF_hive_compactor_worker_threads=2
26+
HIVE_SITE_CONF_metastore_storage_schema_reader_impl=org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader
2927

28+
CORE_CONF_fs_defaultFS=hdfs://namenode:8020
3029
CORE_CONF_hadoop_http_staticuser_user=root
3130
CORE_CONF_hadoop_proxyuser_hue_hosts=*
3231
CORE_CONF_hadoop_proxyuser_hue_groups=*
33-
CORE_CONF_hadoop_proxyuser_hive_hosts=*
3432

3533
HDFS_CONF_dfs_webhdfs_enabled=true
3634
HDFS_CONF_dfs_permissions_enabled=false
35+
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
3736

3837
YARN_CONF_yarn_log___aggregation___enable=true
3938
YARN_CONF_yarn_resourcemanager_recovery_enabled=true

docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,87 +19,83 @@
1919
version: "3.8"
2020

2121
services:
22-
doris--namenode:
22+
namenode:
2323
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
2424
environment:
2525
- CLUSTER_NAME=test
2626
env_file:
2727
- ./hadoop-hive.env
28-
container_name: doris--namenode
29-
expose:
30-
- "50070"
31-
- "8020"
32-
- "9000"
28+
container_name: ${CONTAINER_UID}hadoop2-namenode
29+
ports:
30+
- "${FS_PORT}:8020"
3331
healthcheck:
3432
test: [ "CMD", "curl", "http://localhost:50070/" ]
3533
interval: 5s
3634
timeout: 120s
3735
retries: 120
38-
network_mode: "host"
3936

40-
doris--datanode:
37+
datanode:
4138
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
4239
env_file:
4340
- ./hadoop-hive.env
4441
environment:
45-
SERVICE_PRECONDITION: "externalEnvIp:50070"
46-
container_name: doris--datanode
47-
expose:
48-
- "50075"
42+
SERVICE_PRECONDITION: "namenode:50070"
43+
container_name: ${CONTAINER_UID}hadoop2-datanode
4944
healthcheck:
5045
test: [ "CMD", "curl", "http://localhost:50075" ]
5146
interval: 5s
5247
timeout: 60s
5348
retries: 120
54-
network_mode: "host"
5549

56-
doris--hive-server:
50+
hive-server:
5751
image: bde2020/hive:2.3.2-postgresql-metastore
5852
env_file:
59-
- ./hadoop-hive.env
53+
- ./hadoop-hive-metastore.env
6054
environment:
61-
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://externalEnvIp:5432/metastore"
62-
SERVICE_PRECONDITION: "externalEnvIp:9083"
63-
container_name: doris--hive-server
64-
expose:
65-
- "10000"
55+
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore"
56+
SERVICE_PRECONDITION: "hive-metastore:9083"
57+
container_name: ${CONTAINER_UID}hive2-server
58+
ports:
59+
- "${HS_PORT}:10000"
6660
depends_on:
67-
- doris--datanode
68-
- doris--namenode
61+
- datanode
62+
- namenode
6963
healthcheck:
7064
test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check -e "show databases;"
7165
interval: 10s
7266
timeout: 120s
7367
retries: 120
74-
network_mode: "host"
7568

7669

77-
doris--hive-metastore:
70+
hive-metastore:
7871
image: bde2020/hive:2.3.2-postgresql-metastore
7972
env_file:
80-
- ./hadoop-hive.env
73+
- ./hadoop-hive-metastore.env
8174
command: /bin/bash /mnt/scripts/hive-metastore.sh
8275
# command: /opt/hive/bin/hive --service metastore
8376
environment:
84-
SERVICE_PRECONDITION: "externalEnvIp:50070 externalEnvIp:50075 externalEnvIp:5432"
85-
container_name: doris--hive-metastore
86-
expose:
87-
- "9083"
77+
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 hive-metastore-postgresql:5432"
78+
container_name: ${CONTAINER_UID}hive2-metastore
79+
ports:
80+
- "${HMS_PORT}:9083"
8881
volumes:
8982
- ./scripts:/mnt/scripts
9083
depends_on:
91-
- doris--hive-metastore-postgresql
92-
network_mode: "host"
84+
- hive-metastore-postgresql
9385

94-
doris--hive-metastore-postgresql:
86+
hive-metastore-postgresql:
9587
image: bde2020/hive-metastore-postgresql:2.3.0
96-
restart: always
97-
container_name: doris--hive-metastore-postgresql
98-
expose:
99-
- "5432"
88+
container_name: ${CONTAINER_UID}hive2-metastore-postgresql
89+
ports:
90+
- "${PG_PORT}:5432"
10091
healthcheck:
10192
test: ["CMD-SHELL", "pg_isready -U postgres"]
10293
interval: 5s
10394
timeout: 60s
10495
retries: 120
105-
network_mode: "host"
96+
97+
# solve HiveServer2 connect error:
98+
# java.net.URISyntaxException Illegal character in hostname :thrift://${CONTAINER_UID}hive2_default:9083
99+
networks:
100+
default:
101+
name: ${CONTAINER_UID}hive2-default

docker/thirdparties/docker-compose/hive/gen_env.sh.tpl renamed to docker/thirdparties/docker-compose/hive/hive-2x_settings.env

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,11 @@
1616
# specific language governing permissions and limitations
1717
# under the License.
1818

19-
####################################################################
20-
# This script will generate hadoop-hive.env from hadoop-hive.env.tpl
21-
####################################################################
19+
# Change this to a specific string.
20+
# Do not use "_" or other sepcial characters, only number and alphabeta.
21+
# NOTICE: change this uid will modify hive-*.yaml
2222

23-
set -eo pipefail
24-
25-
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
26-
FS_PORT=8020
27-
HMS_PORT=9083
28-
29-
cp "${ROOT}"/hadoop-hive.env.tpl "${ROOT}"/hadoop-hive.env
30-
# Need to set hostname of container to same as host machine's.
31-
# Otherwise, the doris process can not connect to namenode directly.
32-
HOST_NAME="doris--"
33-
34-
{
35-
echo "FS_PORT=${FS_PORT}"
36-
echo "HMS_PORT=${HMS_PORT}"
37-
echo "CORE_CONF_fs_defaultFS=hdfs://${externalEnvIp}:${FS_PORT}"
38-
echo "HOST_NAME=${HOST_NAME}"
39-
echo "externalEnvIp=${externalEnvIp}"
40-
41-
} >>"${ROOT}"/hadoop-hive.env
23+
export FS_PORT=8220 #should be same in regression-conf.groovy
24+
export HMS_PORT=9283 #should be same in regression-conf.groovy
25+
export HS_PORT=12000 #should be same in regression-conf.groovy
26+
export PG_PORT=5632 #should be same in regression-conf.groovy
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
19+
version: "3.8"
20+
21+
services:
22+
namenode:
23+
image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8
24+
environment:
25+
- CLUSTER_NAME=test
26+
env_file:
27+
- ./hadoop-hive.env
28+
container_name: ${CONTAINER_UID}hadoop3-namenode
29+
ports:
30+
- "${FS_PORT}:8020"
31+
healthcheck:
32+
test: [ "CMD", "curl", "http://localhost:9870/" ]
33+
interval: 5s
34+
timeout: 120s
35+
retries: 120
36+
37+
datanode:
38+
image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8
39+
env_file:
40+
- ./hadoop-hive.env
41+
environment:
42+
SERVICE_PRECONDITION: "namenode:9870"
43+
container_name: ${CONTAINER_UID}hadoop3-datanode
44+
healthcheck:
45+
test: [ "CMD", "curl", "http://localhost:9864" ]
46+
interval: 5s
47+
timeout: 60s
48+
retries: 120
49+
50+
hive-server:
51+
image: lishizhen/hive:3.1.2-postgresql-metastore
52+
env_file:
53+
- ./hadoop-hive-metastore.env
54+
environment:
55+
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore"
56+
SERVICE_PRECONDITION: "hive-metastore:9083"
57+
container_name: ${CONTAINER_UID}hive3-server
58+
ports:
59+
- "${HS_PORT}:10000"
60+
depends_on:
61+
- datanode
62+
- namenode
63+
healthcheck:
64+
test: beeline -u "jdbc:hive2://127.0.0.1:10000/default" -n health_check -e "show databases;"
65+
interval: 10s
66+
timeout: 120s
67+
retries: 120
68+
69+
70+
hive-metastore:
71+
image: lishizhen/hive:3.1.2-postgresql-metastore
72+
env_file:
73+
- ./hadoop-hive-metastore.env
74+
command: /bin/bash /mnt/scripts/hive-metastore.sh
75+
# command: /opt/hive/bin/hive --service metastore
76+
environment:
77+
SERVICE_PRECONDITION: "namenode:9870 datanode:9864 hive-metastore-postgresql:5432"
78+
container_name: ${CONTAINER_UID}hive3-metastore
79+
ports:
80+
- "${HMS_PORT}:9083"
81+
volumes:
82+
- ./scripts:/mnt/scripts
83+
depends_on:
84+
- hive-metastore-postgresql
85+
86+
hive-metastore-postgresql:
87+
image: bde2020/hive-metastore-postgresql:3.1.0
88+
container_name: ${CONTAINER_UID}hive3-metastore-postgresql
89+
ports:
90+
- "${PG_PORT}:5432"
91+
healthcheck:
92+
test: ["CMD-SHELL", "pg_isready -U postgres"]
93+
interval: 5s
94+
timeout: 60s
95+
retries: 120
96+
97+
# solve HiveServer2 connect error:
98+
# java.net.URISyntaxException Illegal character in hostname :thrift://${CONTAINER_UID}hive3_default:9083
99+
100+
networks:
101+
default:
102+
name: ${CONTAINER_UID}hive3-default
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/bin/bash
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
# Change this to a specific string.
20+
# Do not use "_" or other sepcial characters, only number and alphabeta.
21+
# NOTICE: change this uid will modify hive-*.yaml
22+
23+
export FS_PORT=8020 #should be same in regression-conf.groovy
24+
export HMS_PORT=9083 #should be same in regression-conf.groovy
25+
export HS_PORT=10000 #should be same in regression-conf.groovy
26+
export PG_PORT=5432 #should be same in regression-conf.groovy

docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,8 @@ CREATE TABLE `unsupported_type_table`(
601601
k6 int
602602
);
603603

604+
set hive.stats.column.autogather=false;
605+
604606
CREATE TABLE `schema_evo_test_text`(
605607
id int,
606608
name string
@@ -628,6 +630,8 @@ insert into `schema_evo_test_orc` select 1, "kaka";
628630
alter table `schema_evo_test_orc` ADD COLUMNS (`ts` timestamp);
629631
insert into `schema_evo_test_orc` select 2, "messi", from_unixtime(to_unix_timestamp('20230101 13:01:03','yyyyMMdd HH:mm:ss'));
630632

633+
set hive.stats.column.autogather=true;
634+
631635
-- Currently docker is hive 2.x version. Hive 2.x versioned full-acid tables need to run major compaction.
632636
SET hive.support.concurrency=true;
633637
SET hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;

0 commit comments

Comments
 (0)