1
1
# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7
2
2
# check=error=true
3
3
4
- FROM stackable/image/java-devel AS hadoop-builder
5
-
6
- ARG PRODUCT
7
- ARG RELEASE
8
- ARG ASYNC_PROFILER
9
- ARG JMX_EXPORTER
10
- ARG PROTOBUF
11
- ARG TARGETARCH
12
- ARG TARGETOS
13
- ARG STACKABLE_USER_UID
14
-
15
- WORKDIR /stackable
16
-
17
- COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/patchable.toml /stackable/src/shared/protobuf/stackable/patches/patchable.toml
18
- COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/${PROTOBUF} /stackable/src/shared/protobuf/stackable/patches/${PROTOBUF}
19
-
20
- RUN <<EOF
21
- rpm --install --replacepkgs https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
22
- microdnf update
23
- # boost is a build dependency starting in Hadoop 3.4.0 if compiling native code
24
- # automake and libtool are required to build protobuf
25
- microdnf install boost1.78-devel automake libtool
26
- microdnf clean all
27
- rm -rf /var/cache/yum
28
- mkdir /opt/protobuf
29
- chown ${STACKABLE_USER_UID}:0 /opt/protobuf
30
- EOF
31
-
32
- USER ${STACKABLE_USER_UID}
33
- # This Protobuf version is the exact version as used in the Hadoop Dockerfile
34
- # See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh
35
- # (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github)
36
- RUN <<EOF
37
- cd "$(/stackable/patchable --images-repo-root=src checkout shared/protobuf ${PROTOBUF})"
38
-
39
- # Create snapshot of the source code including custom patches
40
- tar -czf /stackable/protobuf-${PROTOBUF}-src.tar.gz .
41
-
42
- ./autogen.sh
43
- ./configure --prefix=/opt/protobuf
44
- make "-j$(nproc)"
45
- make install
46
- (cd .. && rm -r ${PROTOBUF})
47
- EOF
48
-
49
- ENV PROTOBUF_HOME=/opt/protobuf
50
- ENV PATH="${PATH}:/opt/protobuf/bin"
51
-
52
- RUN <<EOF
53
- # async-profiler
54
- ARCH="${TARGETARCH/amd64/x64}"
55
- curl "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz" | tar -xzC .
56
- ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}" /stackable/async-profiler
57
-
58
- # JMX Exporter
59
- mkdir /stackable/jmx
60
- curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
61
- chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
62
- ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
63
- EOF
64
-
65
- WORKDIR /build
66
- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/patchable.toml /build/src/hadoop/stackable/patches/patchable.toml
67
- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/patches/${PRODUCT} /build/src/hadoop/stackable/patches/${PRODUCT}
68
- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/fuse_dfs_wrapper /build
69
- COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
70
- USER ${STACKABLE_USER_UID}
71
- # Hadoop Pipes requires libtirpc to build, whose headers are not packaged in RedHat UBI, so skip building this module
72
- # Build from source to enable FUSE module, and to apply custom patches.
73
- # Also skip building the yarn, mapreduce and minicluster modules: this will result in the modules being excluded but not all
74
- # jar files will be stripped if they are needed elsewhere e.g. share/hadoop/yarn will not be part of the build, but yarn jars
75
- # will still exist in share/hadoop/tools as they would be needed by the resource estimator tool. Such jars are removed in a later step.
76
- RUN <<EOF
77
- cd "$(/stackable/patchable --images-repo-root=src checkout hadoop ${PRODUCT})"
78
-
79
- ORIGINAL_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
80
- NEW_VERSION=${PRODUCT}-stackable${RELEASE}
81
-
82
- mvn versions:set -DnewVersion=${NEW_VERSION}
83
-
84
- # Since we skip building the hadoop-pipes module, we need to set the version to the original version so it can be pulled from Maven Central
85
- sed -e '/<artifactId>hadoop-pipes<\/ artifactId>/,/<\/ dependency>/ { s/<version>.*<\/ version>/<version>' "$ORIGINAL_VERSION" '<\/ version>/ }' -i hadoop-tools/hadoop-tools-dist/pom.xml
86
-
87
- # Create snapshot of the source code including custom patches
88
- tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz .
89
-
90
- mvn \
91
- --batch-mode \
92
- --no-transfer-progress \
93
- clean package install \
94
- -Pdist,native \
95
- -pl '!hadoop-tools/hadoop-pipes' \
96
- -Dhadoop.version=${NEW_VERSION} \
97
- -Drequire.fuse=true \
98
- -DskipTests \
99
- -Dmaven.javadoc.skip=true
100
-
101
- mkdir -p /stackable/patched-libs/maven/org/apache
102
- cp -r /stackable/.m2/repository/org/apache/hadoop /stackable/patched-libs/maven/org/apache
103
-
104
- cp -r hadoop-dist/target/hadoop-${NEW_VERSION} /stackable/hadoop-${NEW_VERSION}
105
- sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" hadoop-dist/target/bom.json
106
- mv hadoop-dist/target/bom.json /stackable/hadoop-${NEW_VERSION}/hadoop-${NEW_VERSION}.cdx.json
107
-
108
- # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
109
- cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${NEW_VERSION}/bin
110
-
111
- # Remove source code
112
- (cd .. && rm -r ${PRODUCT})
113
-
114
- ln -s /stackable/hadoop-${NEW_VERSION} /stackable/hadoop
115
-
116
- mv /build/fuse_dfs_wrapper /stackable/hadoop/bin
117
-
118
- # Remove unneeded binaries:
119
- # - code sources
120
- # - mapreduce/yarn binaries that were built as cross-project dependencies
121
- # - minicluster (only used for testing) and test .jars
122
- # - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
123
- rm -rf /stackable/hadoop/share/hadoop/common/sources/
124
- rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/
125
- rm -rf /stackable/hadoop/share/hadoop/tools/sources/
126
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
127
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
128
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
129
- find /stackable/hadoop -name 'hadoop-minicluster-*.jar' -type f -delete
130
- find /stackable/hadoop -name 'hadoop-client-minicluster-*.jar' -type f -delete
131
- find /stackable/hadoop -name 'hadoop-*tests.jar' -type f -delete
132
- rm -rf /stackable/.m2
133
-
134
- # Set correct groups; make sure only required artifacts for the final image are located in /stackable
135
- chmod -R g=u /stackable
136
- EOF
4
+ FROM stackable/image/hadoop/hadoop AS hadoop-builder
137
5
138
6
FROM stackable/image/java-devel AS hdfs-utils-builder
139
7
140
8
ARG HDFS_UTILS
141
9
ARG PRODUCT
10
+ ARG RELEASE
142
11
ARG STACKABLE_USER_UID
12
+ ARG HADOOP_HADOOP
13
+ # Reassign the arg to `HADOOP_VERSION` for better readability.
14
+ # It is passed as `HADOOP_HADOOP`, because versions.py has to contain `hadoop/hadoop` to establish a dependency on the Hadoop builder.
15
+ # The value of `hadoop/hadoop` is transformed by `bake` and automatically passed as `HADOOP_HADOOP` arg.
16
+ ENV HADOOP_VERSION=${HADOOP_HADOOP}
143
17
144
18
# Starting with hdfs-utils 0.4.0 we need to use Java 17 for compilation.
145
19
# We can not simply use java-devel with Java 17, as it is also used to compile Hadoop in this
@@ -161,25 +35,31 @@ WORKDIR /stackable
161
35
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/hdfs-utils/stackable/patches/patchable.toml /stackable/src/hadoop/hdfs-utils/stackable/patches/patchable.toml
162
36
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/hdfs-utils/stackable/patches/${HDFS_UTILS} /stackable/src/hadoop/hdfs-utils/stackable/patches/${HDFS_UTILS}
163
37
38
+ COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
39
+
164
40
# The Stackable HDFS utils contain an OPA authorizer, group mapper & topology provider.
165
41
# The topology provider provides rack awareness functionality for HDFS by allowing users to specify Kubernetes
166
42
# labels to build a rackID from.
167
43
# Starting with hdfs-utils version 0.3.0 the topology provider is not a standalone jar anymore and included in hdfs-utils.
168
44
RUN <<EOF
169
45
cd "$(/stackable/patchable --images-repo-root=src checkout hadoop/hdfs-utils ${HDFS_UTILS})"
170
46
47
+ # Make Maven aware of custom Stackable libraries
48
+ mkdir -p /stackable/.m2/repository
49
+ cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository
50
+
171
51
# Create snapshot of the source code including custom patches
172
52
tar -czf /stackable/hdfs-utils-${HDFS_UTILS}-src.tar.gz .
173
53
174
54
mvn \
175
55
--batch-mode \
176
56
--no-transfer-progress\
177
57
clean package \
178
- -P hadoop-${PRODUCT} \
58
+ -P hadoop-${HADOOP_VERSION} \
59
+ -Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE} \
179
60
-DskipTests \
180
61
-Dmaven.javadoc.skip=true
181
62
182
- mkdir -p /stackable
183
63
cp target/hdfs-utils-$HDFS_UTILS.jar /stackable/hdfs-utils-${HDFS_UTILS}.jar
184
64
rm -rf hdfs-utils-main
185
65
@@ -191,11 +71,15 @@ FROM stackable/image/java-base AS final
191
71
192
72
ARG PRODUCT
193
73
ARG RELEASE
194
- ARG TARGETARCH
195
- ARG TARGETOS
74
+ ARG HADOOP_HADOOP
75
+ # Reassign the arg to `HADOOP_VERSION` for better readability.
76
+ ENV HADOOP_VERSION=${HADOOP_HADOOP}
196
77
ARG HDFS_UTILS
197
- ARG ASYNC_PROFILER
198
78
ARG STACKABLE_USER_UID
79
+ ARG ASYNC_PROFILER
80
+ ARG JMX_EXPORTER
81
+ ARG TARGETARCH
82
+ ARG TARGETOS
199
83
200
84
LABEL \
201
85
name="Apache Hadoop" \
@@ -206,17 +90,13 @@ LABEL \
206
90
summary="The Stackable image for Apache Hadoop." \
207
91
description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."
208
92
93
+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE} /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE}
94
+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/*-src.tar.gz /stackable
209
95
210
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${PRODUCT}-stackable${RELEASE} /stackable/hadoop-${PRODUCT}-stackable${RELEASE}
211
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-${PRODUCT}-stackable${RELEASE}-src.tar.gz /stackable/
212
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/async-profiler-${ASYNC_PROFILER}-* /stackable/async-profiler-${ASYNC_PROFILER}
213
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/jmx /stackable/jmx
214
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/protobuf-*-src.tar.gz /stackable/
215
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/patched-libs /stackable/patched-libs
216
-
217
- COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}-stackable${RELEASE}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
96
+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
218
97
COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hdfs-utils-${HDFS_UTILS}-src.tar.gz /stackable
219
98
99
+ COPY --chown=${STACKABLE_USER_UID}:0 hadoop/stackable/jmx /stackable/jmx
220
100
COPY --chown=${STACKABLE_USER_UID}:0 hadoop/licenses /licenses
221
101
222
102
# fuse is required for fusermount (called by fuse_dfs)
@@ -241,21 +121,24 @@ rm -rf /var/cache/yum
241
121
# It is so non-root users (as we are) can mount a FUSE device and let other users access it
242
122
echo "user_allow_other" > /etc/fuse.conf
243
123
244
- ln -s "/stackable/hadoop-${PRODUCT}-stackable${RELEASE}" /stackable/hadoop
245
- chown --no-dereference "${STACKABLE_USER_UID}:0" /stackable/hadoop
246
- chmod g=u "/stackable/hadoop-${PRODUCT}-stackable${RELEASE}"
247
- chmod g=u /stackable/*-src.tar.gz
124
+ ln -s "/stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE}" /stackable/hadoop
248
125
126
+ # async-profiler
249
127
ARCH="${TARGETARCH/amd64/x64}"
250
- mv /stackable/async-profiler-${ASYNC_PROFILER}* "/stackable/async-profiler-${ASYNC_PROFILER-}-${TARGETOS}-${ARCH}"
251
- chmod g=u "/stackable/async-profiler-${ASYNC_PROFILER-}-${TARGETOS}-${ARCH}"
128
+ curl "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz" | tar -xzC /stackable
252
129
ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}" /stackable/async-profiler
253
- chown --no-dereference "${STACKABLE_USER_UID}:0" /stackable/async-profiler
254
130
255
- chmod g=u /stackable/jmx
256
- chmod g=u /stackable/patched-libs
131
+ # JMX Exporter
132
+ curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
133
+ chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
134
+ ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
257
135
136
+ # Set correct permissions and ownerships
137
+ chown --recursive ${STACKABLE_USER_UID}:0 /stackable/hadoop /stackable/jmx /stackable/async-profiler "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}"
138
+ chmod --recursive g=u /stackable/jmx /stackable/async-profiler "/stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE}"
139
+ EOF
258
140
141
+ RUN <<EOF
259
142
# ----------------------------------------
260
143
# Checks
261
144
# This section is to run final checks to ensure the created final images
0 commit comments