3333
3434# hbase-connectors-builder: Build the Spark HBase connector and copy
3535# required JARs into /stackable/spark/jars
36- FROM local-image/java-devel AS hbase-connectors-builder
37-
38- ARG PRODUCT_VERSION
39- ARG RELEASE_VERSION
40- ARG HADOOP_HADOOP_VERSION
41- # Reassign the arg to `HADOOP_VERSION` for better readability.
42- ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION}
43- ARG HBASE_VERSION
44- ARG HBASE_CONNECTOR_VERSION
45- ARG STACKABLE_USER_UID
46-
47- WORKDIR /stackable
48-
49- # Copy the pom.xml file from the patched Spark source code to read the
50- # versions used by Spark. The pom.xml defines child modules which are
51- # not required and not copied, therefore mvn must be called with the
52- # parameter --non-recursive.
53- COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
54- /stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT_VERSION}/pom.xml \
55- spark/
56-
57- # Patch the hbase-connectors source code
58- WORKDIR /stackable
59-
60- COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml
61- COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION}
62-
63- RUN <<EOF
64-
65- # IMPORTANT: HBase connectors don't support Spark 4 yet, so we skip the build.
66- # Watch this PR for updates: https://github.com/apache/hbase-connectors/pull/130
67- if [[ "${PRODUCT_VERSION}" == 4* ]]; then
68- # Create this empty directory so that following COPY layers succeed.
69- mkdir -p /stackable/spark/jars
70- # Create a dummy tarball to satisfy the build process for Spark 3.
71- touch hbase-connector-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}-src.tar.gz
72- exit 0
73- fi
74-
75- cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR_VERSION})/spark"
76-
77- NEW_VERSION="${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}"
78-
79- mvn versions:set -DnewVersion=$NEW_VERSION
80-
81- # Create snapshot of the source code including custom patches
82- tar -czf /stackable/hbase-connector-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}-src.tar.gz .
83-
84- # Building the hbase-connectors with JDK 17 is not yet supported, see
85- # https://github.com/apache/hbase-connectors/pull/132.
86- # As there are no JDK profiles, access to the non-public elements must
87- # be enabled with --add-opens, see https://openjdk.org/jeps/403 and
88- # https://openjdk.org/jeps/261#Breaking-encapsulation.
89- export JDK_JAVA_OPTIONS="\
90- --add-opens java.base/java.lang=ALL-UNNAMED \
91- --add-opens java.base/java.util=ALL-UNNAMED"
92-
93- # Get the Scala version used by Spark
94- SCALA_VERSION=$(grep "scala.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
95-
96- # Get the Scala binary version used by Spark
97- SCALA_BINARY_VERSION=$(grep "scala.binary.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
98-
99- # Build the Spark HBase connector
100- # Skip the tests because the MiniHBaseCluster does not get ready for
101- # whatever reason:
102- # Caused by: java.lang.RuntimeException: Master not active after 30000ms
103- # at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
104- # at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
105- # at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
106- # at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
107- mvn \
108- --batch-mode \
109- --no-transfer-progress \
110- --define spark.version="${PRODUCT_VERSION}" \
111- --define scala.version="${SCALA_VERSION}" \
112- --define scala.binary.version="${SCALA_BINARY_VERSION}" \
113- --define hadoop-three.version="${HADOOP_VERSION}" \
114- --define hbase.version="${HBASE_VERSION}" \
115- --define skipTests \
116- --define maven.test.skip=true \
117- clean package
118-
119- mkdir -p /stackable/spark/jars
120- ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}.jar
121-
122- cd /stackable/spark/jars
123-
124- # Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
125- # which is required by the connector.
126- # Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
127- # log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
128- # classpath as long as they have the same version.
129- mvn --non-recursive --file /stackable/spark/pom.xml \
130- dependency:copy \
131- -Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
132- -DoutputDirectory=./jars
133- chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}-src.tar.gz .
134- EOF
135-
36+ FROM local-image/spark-k8s/hbase-connectors AS hbase-connectors-builder
13637
13738# spark-builder: Build Spark into /stackable/spark-${PRODUCT_VERSION}/dist,
13839# download additional JARs and perform checks
@@ -173,26 +74,11 @@ RUN <<EOF
17374 MAVEN_BIN="/usr/bin/mvn"
17475 export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
17576
176- case "${PRODUCT_VERSION}" in
177- 4*)
178- # The Spark 4 script has a --connect option which is not available in Spark 3.
179- # This option is required to build Spark Connect.
180- # Also this option breaks the Spark 3 build so we ensure it's only provided here.
181- ./dev/make-distribution.sh \
182- --mvn "${MAVEN_BIN}" \
183- --connect \
184- -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE_VERSION}" \
185- -DskipTests \
186- -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
187- ;;
188- *)
189- ./dev/make-distribution.sh \
190- --mvn "${MAVEN_BIN}" \
191- -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE_VERSION}" \
192- -DskipTests \
193- -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
194- ;;
195- esac
77+ ./dev/make-distribution.sh \
78+ --mvn "${MAVEN_BIN}" \
79+ -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE_VERSION}" \
80+ -DskipTests \
81+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
19682
19783 sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" assembly/target/bom.json
19884EOF
@@ -206,18 +92,9 @@ RUN <<EOF
20692 mkdir -p dist/connect
20793 cd dist/connect
20894
209- case "${PRODUCT_VERSION}" in
210- 4*)
211- cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
212- cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
213- cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
214- ;;
215- *)
216- cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
217- cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
218- cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
219- ;;
220- esac
95+ cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
96+ cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
97+ cp "/stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}.jar" .
22198
22299 # This link is needed by the operator and is kept for backwards compatibility.
223100 # TODO: remove it at some time in the future.
@@ -272,12 +149,17 @@ WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/ext
272149
273150RUN <<EOF
274151# Download jackson-dataformat-xml, stax2-api, and woodstox-core which are required for logging.
275- curl --fail https://repo.stackable.tech/repository/packages/jackson-dataformat-xml/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML_VERSION}.jar \
276- -o /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/extra-jars/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML_VERSION}.jar
277- curl --fail https://repo.stackable.tech/repository/packages/stax2-api/stax2-api-${STAX2_API_VERSION}.jar \
278- -o /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/extra-jars/stax2-api-${STAX2_API_VERSION}.jar
279- curl --fail https://repo.stackable.tech/repository/packages/woodstox-core/woodstox-core-${WOODSTOX_CORE_VERSION}.jar \
280- -o /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/extra-jars/woodstox-core-${WOODSTOX_CORE_VERSION}.jar
152+ mvn dependency:get -Dartifact=com.fasterxml.jackson.dataformat:jackson-dataformat-xml:${JACKSON_DATAFORMAT_XML_VERSION}
153+ cp /root/.m2/repository/com/fasterxml/jackson/dataformat/jackson-dataformat-xml/${JACKSON_DATAFORMAT_XML_VERSION}/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML_VERSION}.jar \
154+ /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/extra-jars/jackson-dataformat-xml-${JACKSON_DATAFORMAT_XML_VERSION}.jar
155+
156+ mvn dependency:get -Dartifact=mvn dependency:get -Dartifact=org.codehaus.woodstox:stax2-api:${STAX2_API_VERSION}
157+ cp /root/.m2/repository/org/codehaus/woodstox/stax2-api/${STAX2_API_VERSION}/stax2-api-${STAX2_API_VERSION}.jar \
158+ /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/extra-jars/stax2-api-${STAX2_API_VERSION}.jar
159+
160+ mvn dependency:get -Dartifact=mvn dependency:get -Dartifact=com.fasterxml.woodstox:woodstox-core:${WOODSTOX_CORE_VERSION}
161+ cp /root/.m2/repository/com/fasterxml/woodstox/woodstox-core/${WOODSTOX_CORE_VERSION}/woodstox-core-${WOODSTOX_CORE_VERSION}.jar \
162+ /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/extra-jars/woodstox-core-${WOODSTOX_CORE_VERSION}.jar
281163
282164# Get the correct `tini` binary for our architecture.
283165curl --fail "https://repo.stackable.tech/repository/packages/tini/tini-${TINI_VERSION}-${TARGETARCH}" \
0 commit comments