Skip to content

Commit e3dc967

Browse files
authored
add default tag integration test (#28)
* add default tag integration test
1 parent ae70ca5 commit e3dc967

File tree

12 files changed

+103
-9
lines changed

12 files changed

+103
-9
lines changed

Makefile

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,28 @@ test-sagemaker: install-sdk build-tests
8484
# History server tests can't run in parallel since they use the same container name.
8585
pytest -s -vv test/integration/history \
8686
--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
87+
--spark-version=$(SPARK_VERSION)
88+
--framework_version=$(FRAMEWORK_VERSION) \
8789
--role $(ROLE) \
8890
--image_uri $(IMAGE_URI) \
8991
--region ${REGION} \
9092
--domain ${AWS_DOMAIN}
9193
# OBJC_DISABLE_INITIALIZE_FORK_SAFETY: https://github.com/ansible/ansible/issues/32499#issuecomment-341578864
9294
OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES pytest --workers auto -s -vv test/integration/sagemaker \
9395
--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
96+
--spark-version=$(SPARK_VERSION)
97+
--framework_version=$(FRAMEWORK_VERSION) \
98+
--role $(ROLE) \
99+
--image_uri $(IMAGE_URI) \
100+
--region ${REGION} \
101+
--domain ${AWS_DOMAIN}
102+
103+
# This is included in a separate target because it will be run only in prod stage
104+
test-prod:
105+
pytest -s -vv test/integration/tag \
106+
--repo=$(DEST_REPO) --tag=$(VERSION) --durations=0 \
107+
--spark-version=$(SPARK_VERSION)
108+
--framework_version=$(FRAMEWORK_VERSION) \
94109
--role $(ROLE) \
95110
--image_uri $(IMAGE_URI) \
96111
--region ${REGION} \

test/integration/conftest.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ def pytest_addoption(parser) -> str:
2323
parser.addoption("--region", default="us-west-2")
2424
parser.addoption("--repo")
2525
parser.addoption("--tag")
26+
parser.addoption("--spark-version")
27+
parser.addoption("--framework-version")
2628
parser.addoption("--domain", default="amazonaws.com")
2729

2830

@@ -60,6 +62,18 @@ def tag(request) -> str:
6062
return request.config.getoption("--tag")
6163

6264

65+
@pytest.fixture(scope="session")
66+
def spark_version(request) -> str:
67+
"""Return Docker image framework_version to use in tests."""
68+
return request.config.getoption("--spark-version")
69+
70+
71+
@pytest.fixture(scope="session")
72+
def framework_version(request) -> str:
73+
"""Return Docker image framework_version to use in tests."""
74+
return request.config.getoption("--framework-version")
75+
76+
6377
@pytest.fixture(scope="session")
6478
def domain(request) -> str:
6579
"""Return AWS domain"""

test/integration/local/test_multinode_container.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
1313
import subprocess
14+
1415
import pytest
1516

1617

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from datetime import datetime
14+
15+
from sagemaker.s3 import S3Downloader, S3Uploader
16+
from sagemaker.spark.processing import PySparkProcessor
17+
18+
19+
def test_sagemaker_spark_processor_default_tag(spark_version, role, sagemaker_session, sagemaker_client):
20+
"""Test that spark processor works with default tag"""
21+
spark = PySparkProcessor(
22+
base_job_name="sm-spark-py",
23+
framework_version=spark_version,
24+
role=role,
25+
instance_count=1,
26+
instance_type="ml.c5.xlarge",
27+
max_runtime_in_seconds=1200,
28+
sagemaker_session=sagemaker_session,
29+
)
30+
bucket = spark.sagemaker_session.default_bucket()
31+
timestamp = datetime.now().isoformat()
32+
output_data_uri = "s3://{}/spark/output/sales/{}".format(bucket, timestamp)
33+
spark_event_logs_key_prefix = "spark/spark-events/{}".format(timestamp)
34+
spark_event_logs_s3_uri = "s3://{}/{}".format(bucket, spark_event_logs_key_prefix)
35+
36+
with open("test/resources/data/files/data.jsonl") as data:
37+
body = data.read()
38+
input_data_uri = "s3://{}/spark/input/data.jsonl".format(bucket)
39+
S3Uploader.upload_string_as_file_body(
40+
body=body, desired_s3_uri=input_data_uri, sagemaker_session=sagemaker_session
41+
)
42+
43+
spark.run(
44+
submit_app="test/resources/code/python/hello_py_spark/hello_py_spark_app.py",
45+
submit_py_files=["test/resources/code/python/hello_py_spark/hello_py_spark_udfs.py"],
46+
arguments=["--input", input_data_uri, "--output", output_data_uri],
47+
spark_event_logs_s3_uri=spark_event_logs_s3_uri,
48+
wait=True,
49+
)
50+
51+
processing_job = spark.latest_job
52+
waiter = sagemaker_client.get_waiter("processing_job_completed_or_stopped")
53+
waiter.wait(
54+
ProcessingJobName=processing_job.job_name,
55+
# poll every 15 seconds. timeout after 15 minutes.
56+
WaiterConfig={"Delay": 15, "MaxAttempts": 60},
57+
)
58+
59+
output_contents = S3Downloader.list(output_data_uri, sagemaker_session=sagemaker_session)
60+
assert len(output_contents) != 0

test/integration/sagemaker/test_spark.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,10 @@ def configuration() -> list:
9191
return configuration
9292

9393

94-
def test_sagemaker_pyspark_multinode(tag, role, image_uri, configuration, sagemaker_session, region, sagemaker_client):
94+
def test_sagemaker_pyspark_multinode(role, image_uri, configuration, sagemaker_session, region, sagemaker_client):
9595
"""Test that basic multinode case works on 32KB of data"""
9696
spark = PySparkProcessor(
9797
base_job_name="sm-spark-py",
98-
framework_version=tag,
9998
image_uri=image_uri,
10099
role=role,
101100
instance_count=2,
@@ -168,11 +167,10 @@ def test_sagemaker_pyspark_multinode(tag, role, image_uri, configuration, sagema
168167
# TODO: similar integ test case for SSE-KMS. This would require test infrastructure bootstrapping a KMS key.
169168
# Currently, Spark jobs can read data encrypted with SSE-KMS (assuming the execution role has permission),
170169
# however our Hadoop version (2.8.5) does not support writing data with SSE-KMS (enabled in version 3.0.0).
171-
def test_sagemaker_pyspark_sse_s3(tag, role, image_uri, sagemaker_session, region, sagemaker_client):
170+
def test_sagemaker_pyspark_sse_s3(role, image_uri, sagemaker_session, region, sagemaker_client):
172171
"""Test that Spark container can read and write S3 data encrypted with SSE-S3 (default AES256 encryption)"""
173172
spark = PySparkProcessor(
174173
base_job_name="sm-spark-py",
175-
framework_version=tag,
176174
image_uri=image_uri,
177175
role=role,
178176
instance_count=2,
@@ -212,11 +210,10 @@ def test_sagemaker_pyspark_sse_s3(tag, role, image_uri, sagemaker_session, regio
212210
assert len(output_contents) != 0
213211

214212

215-
def test_sagemaker_scala_jar_multinode(tag, role, image_uri, configuration, sagemaker_session, sagemaker_client):
213+
def test_sagemaker_scala_jar_multinode(role, image_uri, configuration, sagemaker_session, sagemaker_client):
216214
"""Test SparkJarProcessor using Scala application jar with external runtime dependency jars staged by SDK"""
217215
spark = SparkJarProcessor(
218216
base_job_name="sm-spark-scala",
219-
framework_version=tag,
220217
image_uri=image_uri,
221218
role=role,
222219
instance_count=2,

test/unit/test_bootstrapper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from unittest.mock import MagicMock, Mock, PropertyMock, call, mock_open, patch
1515

1616
import pytest
17+
1718
from smspark.bootstrapper import Bootstrapper
1819
from smspark.config import Configuration
1920
from smspark.defaults import default_resource_config

test/unit/test_cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import click
2020
import pytest
2121
from click.testing import CliRunner
22+
2223
from smspark.cli import submit, submit_main
2324
from smspark.errors import InputError
2425
from smspark.job import ProcessingJobManager

test/unit/test_history_server_cli.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,16 @@
1010
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
1111
# ANY KIND, either express or implied. See the License for the specific
1212
# language governing permissions and limitations under the License.
13-
import pytest
14-
1513
from dataclasses import dataclass
1614
from typing import Type, Union
1715
from unittest.mock import patch
16+
17+
import pytest
1818
from click.testing import CliRunner
19+
1920
from smspark.cli import submit
20-
from smspark.job import ProcessingJobManager
2121
from smspark.history_server_cli import run_history_server
22+
from smspark.job import ProcessingJobManager
2223

2324
ARGS_FORMAT = "--event-logs-s3-uri {} --remote-domain-name {}"
2425
EVENT_LOGS_S3_URI = "s3://bucket"

test/unit/test_history_server_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from unittest.mock import MagicMock, call, mock_open, patch
1717

1818
import pytest
19+
1920
from smspark.bootstrapper import Bootstrapper
2021
from smspark.errors import InputError
2122
from smspark.history_server_utils import (

test/unit/test_spark_event_logs_publisher.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from unittest.mock import call, mock_open, patch
1616

1717
import pytest
18+
1819
from smspark.spark_event_logs_publisher import (
1920
CONFIG_ENABLE_EVENT_LOG,
2021
CONFIG_EVENT_LOG_DIR_FORMAT,

0 commit comments

Comments
 (0)