-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Add insta
/ snapshot testing to CLI & set up AWS mock
#13672
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2887bb9
813d634
c3de620
0e11d36
cb7da8c
7c2b3fe
9146e4b
9d856c3
0574ab8
2045495
06c013d
4870219
a02625e
65809a7
05a562f
36f8550
921f229
107c515
2d430a0
4b56506
1581e7a
ad9734e
0b24daa
5f29c00
a3826d4
9cb1c99
53c9c51
eb87f60
855315f
fd3240d
17e8486
7d4050d
fe2d345
c037577
7293add
680b474
3c2ead2
7922db7
348e881
4238b41
a880c42
5f80368
5cafb77
7e3ed3f
194ce80
631a7bd
fc81ac9
83f81c5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
<!--- | ||
Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
--> | ||
|
||
# Development instructions | ||
|
||
## Running Tests | ||
|
||
Tests can be run using `cargo` | ||
|
||
```shell | ||
cargo test | ||
``` | ||
|
||
## Running Storage Integration Tests | ||
|
||
By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and | ||
then provide the necessary configuration for that object store. | ||
|
||
For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used. | ||
|
||
### AWS | ||
|
||
To test the S3 integration against [Minio](https://github.com/minio/minio) | ||
|
||
First start up a container with Minio and load test files. | ||
|
||
```shell | ||
docker run -d \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did we consider having the tests themselves start the containers they need? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it’s a nice improvement if tests just handle the setup automatically! Created an issue to discuss: #15092 |
||
--name datafusion-test-minio \ | ||
-p 9000:9000 \ | ||
-e MINIO_ROOT_USER=TEST-DataFusionLogin \ | ||
-e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \ | ||
-v $(pwd)/../datafusion/core/tests/data:/source \ | ||
quay.io/minio/minio server /data | ||
|
||
docker exec datafusion-test-minio /bin/sh -c "\ | ||
mc ready local | ||
mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \ | ||
mc mb localminio/data && \ | ||
mc cp -r /source/* localminio/data" | ||
``` | ||
|
||
Setup environment | ||
|
||
```shell | ||
export TEST_STORAGE_INTEGRATION=1 | ||
export AWS_ACCESS_KEY_ID=TEST-DataFusionLogin | ||
export AWS_SECRET_ACCESS_KEY=TEST-DataFusionPassword | ||
export AWS_ENDPOINT=http://127.0.0.1:9000 | ||
export AWS_ALLOW_HTTP=true | ||
``` | ||
|
||
Note that `AWS_ENDPOINT` is set without slash at the end. | ||
|
||
Run tests | ||
|
||
```shell | ||
cargo test | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,46 +17,131 @@ | |
|
||
use std::process::Command; | ||
|
||
use assert_cmd::prelude::{CommandCargoExt, OutputAssertExt}; | ||
use predicates::prelude::predicate; | ||
use rstest::rstest; | ||
|
||
use insta::{glob, Settings}; | ||
use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; | ||
use std::{env, fs}; | ||
|
||
fn cli() -> Command { | ||
Command::new(get_cargo_bin("datafusion-cli")) | ||
} | ||
|
||
fn make_settings() -> Settings { | ||
let mut settings = Settings::clone_current(); | ||
settings.set_prepend_module_to_snapshot(false); | ||
settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); | ||
settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); | ||
settings | ||
} | ||
|
||
#[cfg(test)] | ||
#[ctor::ctor] | ||
fn init() { | ||
// Enable RUST_LOG logging configuration for tests | ||
let _ = env_logger::try_init(); | ||
} | ||
|
||
// Disabled due to https://github.com/apache/datafusion/issues/10793 | ||
#[cfg(not(target_family = "windows"))] | ||
#[rstest] | ||
#[case::exec_from_commands( | ||
["--command", "select 1", "--format", "json", "-q"], | ||
"[{\"Int64(1)\":1}]\n" | ||
)] | ||
#[case::exec_multiple_statements( | ||
["--command", "select 1; select 2;", "--format", "json", "-q"], | ||
"[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n" | ||
"statements", | ||
["--command", "select 1; select 2;", "-q"], | ||
)] | ||
#[case::exec_backslash( | ||
["--file", "tests/data/backslash.txt", "--format", "json", "-q"], | ||
"[{\"Utf8(\\\"\\\\\\\")\":\"\\\\\",\"Utf8(\\\"\\\\\\\\\\\")\":\"\\\\\\\\\",\"Utf8(\\\"\\\\\\\\\\\\\\\\\\\\\\\")\":\"\\\\\\\\\\\\\\\\\\\\\",\"Utf8(\\\"dsdsds\\\\\\\\\\\\\\\\\\\")\":\"dsdsds\\\\\\\\\\\\\\\\\",\"Utf8(\\\"\\\\t\\\")\":\"\\\\t\",\"Utf8(\\\"\\\\0\\\")\":\"\\\\0\",\"Utf8(\\\"\\\\n\\\")\":\"\\\\n\"}]\n" | ||
"backslash", | ||
["--file", "tests/sql/backslash.sql", "--format", "json", "-q"], | ||
)] | ||
#[case::exec_from_files( | ||
["--file", "tests/data/sql.txt", "--format", "json", "-q"], | ||
"[{\"Int64(1)\":1}]\n" | ||
"files", | ||
["--file", "tests/sql/select.sql", "-q"], | ||
)] | ||
#[case::set_batch_size( | ||
["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"], | ||
"[{\"name\":\"datafusion.execution.batch_size\",\"value\":\"1\"}]\n" | ||
"batch_size", | ||
["--command", "show datafusion.execution.batch_size", "-q", "-b", "1"], | ||
)] | ||
#[test] | ||
fn cli_quick_test<'a>( | ||
#[case] snapshot_name: &'a str, | ||
#[case] args: impl IntoIterator<Item = &'a str>, | ||
#[case] expected: &str, | ||
) { | ||
let mut cmd = Command::cargo_bin("datafusion-cli").unwrap(); | ||
let mut settings = make_settings(); | ||
settings.set_snapshot_suffix(snapshot_name); | ||
let _bound = settings.bind_to_scope(); | ||
|
||
let mut cmd = cli(); | ||
cmd.args(args); | ||
cmd.assert().stdout(predicate::eq(expected)); | ||
|
||
assert_cmd_snapshot!(cmd); | ||
} | ||
|
||
#[rstest] | ||
#[case("csv")] | ||
#[case("tsv")] | ||
#[case("table")] | ||
#[case("json")] | ||
#[case("nd-json")] | ||
#[case("automatic")] | ||
#[test] | ||
fn test_cli_format<'a>(#[case] format: &'a str) { | ||
blaginin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let mut settings = make_settings(); | ||
settings.set_snapshot_suffix(format); | ||
let _bound = settings.bind_to_scope(); | ||
|
||
let mut cmd = cli(); | ||
cmd.args(["--command", "select 1", "-q", "--format", format]); | ||
|
||
assert_cmd_snapshot!(cmd); | ||
} | ||
|
||
#[tokio::test] | ||
async fn test_cli() { | ||
if env::var("TEST_STORAGE_INTEGRATION").is_err() { | ||
eprintln!("Skipping external storages integration tests"); | ||
return; | ||
} | ||
|
||
let settings = make_settings(); | ||
let _bound = settings.bind_to_scope(); | ||
|
||
glob!("sql/integration/*.sql", |path| { | ||
let input = fs::read_to_string(path).unwrap(); | ||
assert_cmd_snapshot!(cli().pass_stdin(input)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW we have had really nice luck in influxdb_iox using insta, and instead of using the external snap shot testing we used inline snapshots (so the expected results are inline with the test code) It looks like this: fn test_union_not_nested() {
let plan = Arc::new(UnionExec::new(vec![other_node()]));
let opt = NestedUnion;
insta::assert_yaml_snapshot!(
OptimizationTest::new(plan, opt),
@r#"
input:
- " UnionExec"
- " EmptyExec"
output:
Ok:
- " UnionExec"
- " EmptyExec"
"#
);
} Is that possible with assert_cmt_snapshot? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is absolutely possible! My idea was to make it closer to However, I'm happy to make changes if you'd prefer it to be inline? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally prefer it to be inline, but perhaps we can do that as a follow on PR (the way we do this in influxdb_iox is that we have a test function that returns a string and then compare the string with the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
FWIW I think .slt keeps the tests setup and expected output together (the slt driver is separate for sure, but most of the tests are self contained in the .slt file |
||
}); | ||
} | ||
|
||
#[tokio::test] | ||
async fn test_aws_options() { | ||
// Separate test is needed to pass aws as options in sql and not via env | ||
|
||
if env::var("TEST_STORAGE_INTEGRATION").is_err() { | ||
eprintln!("Skipping external storages integration tests"); | ||
return; | ||
} | ||
|
||
let settings = make_settings(); | ||
let _bound = settings.bind_to_scope(); | ||
|
||
let access_key_id = | ||
env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set"); | ||
let secret_access_key = | ||
env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set"); | ||
let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); | ||
|
||
let input = format!( | ||
r#"CREATE EXTERNAL TABLE CARS | ||
STORED AS CSV | ||
LOCATION 's3://data/cars.csv' | ||
OPTIONS( | ||
'aws.access_key_id' '{}', | ||
'aws.secret_access_key' '{}', | ||
'aws.endpoint' '{}', | ||
'aws.allow_http' 'true' | ||
); | ||
|
||
SELECT * FROM CARS limit 1; | ||
"#, | ||
access_key_id, secret_access_key, endpoint_url | ||
); | ||
|
||
assert_cmd_snapshot!(cli().env_clear().pass_stdin(input)); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
--- | ||
source: tests/cli_integration.rs | ||
info: | ||
program: datafusion-cli | ||
args: [] | ||
stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv'\nOPTIONS(\n 'aws.access_key_id' 'TEST-DataFusionLogin',\n 'aws.secret_access_key' 'TEST-DataFusionPassword',\n 'aws.endpoint' 'http://127.0.0.1:9000',\n 'aws.allow_http' 'true'\n);\n\nSELECT * FROM CARS limit 1;\n" | ||
--- | ||
success: true | ||
exit_code: 0 | ||
----- stdout ----- | ||
[CLI_VERSION] | ||
0 row(s) fetched. | ||
[ELAPSED] | ||
|
||
+-----+-------+---------------------+ | ||
| car | speed | time | | ||
+-----+-------+---------------------+ | ||
| red | 20.0 | 1996-04-12T12:05:03 | | ||
+-----+-------+---------------------+ | ||
1 row(s) fetched. | ||
[ELAPSED] | ||
|
||
\q | ||
|
||
----- stderr ----- |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tested the instructions locally and they worked great 👍