diff --git a/dev/Dockerfile b/dev/Dockerfile index 1cc70beda5..baa3e09747 100644 --- a/dev/Dockerfile +++ b/dev/Dockerfile @@ -37,9 +37,9 @@ RUN mkdir -p ${HADOOP_HOME} && mkdir -p ${SPARK_HOME} && mkdir -p /home/iceberg/ WORKDIR ${SPARK_HOME} # Remember to also update `tests/conftest`'s spark setting -ENV SPARK_VERSION=3.5.3 -ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12 -ENV ICEBERG_VERSION=1.6.0 +ENV SPARK_VERSION=3.5.4 +ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_4.12 +ENV ICEBERG_VERSION=1.8.0 ENV PYICEBERG_VERSION=0.8.1 RUN curl --retry 5 -s -C - https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \ @@ -47,11 +47,10 @@ RUN curl --retry 5 -s -C - https://dlcdn.apache.org/spark/spark-${SPARK_VERSION} && rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz # Download iceberg spark runtime -RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar -Lo iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \ - && mv iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar /opt/spark/jars +RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.8.0-SNAPSHOT/iceberg-spark-runtime-3.5_2.12-1.8.0-20250115.001733-68.jar -Lo /opt/spark/jars/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar # Download AWS bundle -RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar +RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-aws-bundle/1.8.0-SNAPSHOT/iceberg-aws-bundle-1.8.0-20250115.002654-137.jar -Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar COPY spark-defaults.conf /opt/spark/conf ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}" diff --git a/dev/provision.py b/dev/provision.py index b358da6593..47c950f6f8 100644 --- a/dev/provision.py +++ b/dev/provision.py @@ -401,3 +401,43 @@ ) spark.sql(f"ALTER TABLE {catalog_name}.default.test_empty_scan_ordered_str WRITE ORDERED BY id") spark.sql(f"INSERT INTO {catalog_name}.default.test_empty_scan_ordered_str VALUES 'a', 'c'") + + + spark.sql( + f""" + CREATE OR REPLACE TABLE {catalog_name}.default.test_deletion_vectors ( + dt date, + number integer, + letter string + ) + USING iceberg + TBLPROPERTIES ( + 'write.delete.mode'='merge-on-read', + 'write.update.mode'='merge-on-read', + 'write.merge.mode'='merge-on-read', + 'format-version'='3' + ); + """ + ) + + + spark.sql( + f""" + INSERT INTO {catalog_name}.default.test_deletion_vectors + VALUES + (CAST('2023-03-01' AS date), 1, 'a'), + (CAST('2023-03-02' AS date), 2, 'b'), + (CAST('2023-03-03' AS date), 3, 'c'), + (CAST('2023-03-04' AS date), 4, 'd'), + (CAST('2023-03-05' AS date), 5, 'e'), + (CAST('2023-03-06' AS date), 6, 'f'), + (CAST('2023-03-07' AS date), 7, 'g'), + (CAST('2023-03-08' AS date), 8, 'h'), + (CAST('2023-03-09' AS date), 9, 'i'), + (CAST('2023-03-10' AS date), 10, 'j'), + (CAST('2023-03-11' AS date), 11, 'k'), + (CAST('2023-03-12' AS date), 12, 'l'); + """ + ) + + spark.sql(f"DELETE FROM {catalog_name}.default.test_deletion_vectors WHERE number = 9")