-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
81 lines (71 loc) · 3.51 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
namespace = spark-playground
service_account_name = spark
spark_version=3.3.0
hadoop_version=3.3.4
image_repo_name = mlobo
image_name = spark-base
image_tag = v$(spark_version)
docker_image = $(image_repo_name)/$(image_name):$(image_tag)
history_image_name = spark-history-server
history_server_docker_image = $(image_repo_name)/$(history_image_name):$(image_tag)
localstack_compose_file = localstack-compose.yml
jars_bucket = spark-jars
logs_bucket = spark-logs
minikube_internal_host = 192.168.59.1
minikube_internal_host_and_port = "$(minikube_internal_host)/24"
init:
minikube start --driver virtualbox --host-only-cidr $(minikube_internal_host_and_port) --cpus 4 --memory 8192
kubectl create namespace $(namespace)
kubectl config set-context --current --namespace=$(namespace)
kubectl create serviceaccount spark
kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=$(namespace):$(service_account_name) --namespace=$(namespace)
kubectl proxy
#TODO start using git submodules instead
prepare_docker_build_context:
mkdir -p tmp_docker_image/hadoop
mkdir -p tmp_docker_image/spark
cp -r ${HADOOP_HOME}/* tmp_docker_image/hadoop
cp -r ${SPARK_HOME}/* tmp_docker_image/spark
create-docker-image:
wget -nc https://dlcdn.apache.org/spark/spark-$(spark_version)/spark-$(spark_version)-bin-without-hadoop.tgz -P docker-image/stage-0
wget -nc https://dlcdn.apache.org/hadoop/common/hadoop-$(hadoop_version)/hadoop-$(hadoop_version).tar.gz -P docker-image/stage-0
docker build --build-arg SPARK_VERSION=$(spark_version) --build-arg HADOOP_VERSION=$(hadoop_version) -t tmp/spark-image:stage-0 docker-image/stage-0
docker build -t $(docker_image) docker-image/stage-1
minikube image load $(docker_image)
enable-localstack:
docker-compose -f $(localstack_compose_file) up
create-buckets:
awslocal s3api create-bucket --bucket $(jars_bucket)
awslocal s3api create-bucket --bucket $(logs_bucket)
start-history-server:
kubectl apply -f history-server.yaml
echo "In a few seconds, you'll be able to access the history server in the following url:"
minikube service spark-history-server-service -n $(namespace) --url
#TODO move in shell script the submit
sparkpi:
echo "Don't forget to copy hadoop-env.sh and spark-env.sh to your local setup"
spark-submit \
--master k8s://http://127.0.0.1:8001 \
--deploy-mode cluster \
--class org.apache.spark.examples.SparkPi \
--conf spark.hadoop.fs.s3a.endpoint=http://$(minikube_internal_host):4566 \
--conf spark.hadoop.fs.s3a.fast.upload=true \
--conf spark.hadoop.fs.s3a.access.key=foobar \
--conf spark.hadoop.fs.s3a.secret.key=foobar \
--conf spark.executor.instances=3 \
--conf spark.executor.memory=1g \
--conf spark.executor.cores=1 \
--conf spark.kubernetes.file.upload.path=s3a://$(jars_bucket)/ \
--conf spark.kubernetes.container.image=$(docker_image) \
--conf spark.kubernetes.container.image.pullPolicy=Never \
--conf spark.kubernetes.namespace=$(namespace) \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=$(service_account_name) \
--conf spark.eventLog.enabled=True \
--conf spark.eventLog.dir=s3a://$(logs_bucket)/. \
file://${SPARK_HOME}/examples/jars/spark-examples_2.12-$(spark_version).jar 10
stop:
minikube delete
docker-compose -f $(localstack_compose_file) down
# TODO: document that you did this: https://spark.apache.org/docs/latest/hadoop-provided.html
#Also need to do: `export SPARK_DIST_CLASSPATH=$SPARK_DIST_CLASSPATH:$HADOOP_HOME/share/hadoop/tools/lib/*`
#TODO enable the ZGarbageCollector