diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 38089d97fe704..44c7f0463a721 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -121,7 +121,7 @@ /src/sql/src/plan/lowering.rs @MaterializeInc/cluster /src/sql-lexer @MaterializeInc/adapter /src/sql-parser @MaterializeInc/adapter -/src/sqllogictest @MaterializeInc/adapter +/src/sqllogictest @MaterializeInc/testing @ggevay /src/ssh-util @MaterializeInc/cluster /src/storage @MaterializeInc/cluster /src/storage-client @MaterializeInc/cluster diff --git a/Cargo.lock b/Cargo.lock index e1aaff2e105b2..29ffd14486cd2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "RustyXML" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" + [[package]] name = "addr2line" version = "0.17.0" @@ -25,7 +31,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "const-random", - "getrandom", + "getrandom 0.2.10", "once_cell", "version_check", "zerocopy", @@ -402,7 +408,30 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cd0e2e25ea8e5f7e9df04578dc6cf5c83577fd09b1a46aaf5c85e1c33f2a7e" dependencies = [ - "event-listener", + "event-listener 5.3.1", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + +[[package]] +name = "async-channel" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b47800b0be77592da0afd425cc03468052844aff33b84e33cc696f64e77b6a" +dependencies = [ + "concurrent-queue", "event-listener-strategy", "futures-core", "pin-project-lite", @@ -423,6 +452,73 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-io" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a2b323ccce0a1d90b449fd71f2a06ca7faa7c54c2751f06c9bd851fc061059" +dependencies = [ + "async-lock", + "cfg-if", + "concurrent-queue", + "futures-io", + "futures-lite 2.6.0", + "parking", + "polling", + "rustix", + "slab", + "tracing", + "windows-sys 0.59.0", +] + +[[package]] +name = "async-lock" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" +dependencies = [ + "event-listener 5.3.1", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "async-process" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63255f1dc2381611000436537bbedfe83183faa303a5a0edaf191edef06526bb" +dependencies = [ + "async-channel 2.3.1", + "async-io", + "async-lock", + "async-signal", + "async-task", + "blocking", + "cfg-if", + "event-listener 5.3.1", + "futures-lite 2.6.0", + "rustix", + "tracing", +] + +[[package]] +name = "async-signal" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "637e00349800c0bdf8bfc21ebbc0b6524abea702b0da4168ac00d070d0c0b9f3" +dependencies = [ + "async-io", + "async-lock", + "atomic-waker", + "cfg-if", + "futures-core", + "futures-io", + "rustix", + "signal-hook-registry", + "slab", + "windows-sys 0.59.0", +] + [[package]] name = "async-stream" version = "0.3.5" @@ -445,6 +541,12 @@ dependencies = [ "syn 2.0.63", ] +[[package]] +name = "async-task" +version = "4.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" + [[package]] name = "async-trait" version = "0.1.83" @@ -521,7 +623,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "hex", "http 0.2.9", "hyper 0.14.27", @@ -560,7 +662,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.9", "http-body 0.4.5", "percent-encoding", @@ -590,7 +692,7 @@ dependencies = [ "aws-smithy-xml", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "hex", "hmac", "http 0.2.9", @@ -620,7 +722,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand", + "fastrand 2.3.0", "http 0.2.9", "once_cell", "regex-lite", @@ -812,7 +914,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand", + "fastrand 2.3.0", "h2 0.3.26", "http 0.2.9", "http-body 0.4.5", @@ -973,15 +1075,122 @@ dependencies = [ "tracing", ] +[[package]] +name = "azure_core" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b552ad43a45a746461ec3d3a51dfb6466b4759209414b439c165eb6a6b7729e" +dependencies = [ + "async-trait", + "base64 0.22.0", + "bytes", + "dyn-clone", + "futures", + "getrandom 0.2.10", + "hmac", + "http-types", + "once_cell", + "paste", + "pin-project", + "quick-xml", + "rand 0.8.5", + "reqwest 0.12.4", + "rustc_version", + "serde", + "serde_json", + "sha2", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_identity" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ddd80344317c40c04b603807b63a5cefa532f1b43522e72f480a988141f744" +dependencies = [ + "async-lock", + "async-process", + "async-trait", + "azure_core", + "futures", + "oauth2", + "pin-project", + "serde", + "time", + "tracing", + "tz-rs", + "url", + "uuid", +] + +[[package]] +name = "azure_storage" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f838159f4d29cb400a14d9d757578ba495ae64feb07a7516bf9e4415127126" +dependencies = [ + "RustyXML", + "async-lock", + "async-trait", + "azure_core", + "bytes", + "serde", + "serde_derive", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_storage_blobs" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97e83c3636ae86d9a6a7962b2112e3b19eb3903915c50ce06ff54ff0a2e6a7e4" +dependencies = [ + "RustyXML", + "azure_core", + "azure_storage", + "azure_svc_blobstorage", + "bytes", + "futures", + "serde", + "serde_derive", + "serde_json", + "time", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "azure_svc_blobstorage" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e6c6f20c5611b885ba94c7bae5e02849a267381aecb8aee577e8c35ff4064c6" +dependencies = [ + "azure_core", + "bytes", + "futures", + "log", + "once_cell", + "serde", + "serde_json", + "time", +] + [[package]] name = "backoff" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ - "getrandom", + "getrandom 0.2.10", "instant", - "rand", + "rand 0.8.5", ] [[package]] @@ -1134,6 +1343,19 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blocking" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703f41c54fc768e63e091340b424302bb1c29ef4aa0c7f10fe849dfb114d29ea" +dependencies = [ + "async-channel 2.3.1", + "async-task", + "futures-io", + "futures-lite 2.6.0", + "piper", +] + [[package]] name = "brotli" version = "7.0.0" @@ -1389,8 +1611,10 @@ checksum = "8eaf5903dcbc0a39312feb77df2ff4c76387d591b9fc7b04a238dcf8bb62639a" dependencies = [ "android-tzdata", "iana-time-zone", + "js-sys", "num-traits", "serde", + "wasm-bindgen", "windows-targets 0.52.6", ] @@ -1673,11 +1897,17 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom", + "getrandom 0.2.10", "once_cell", "tiny-keccak", ] +[[package]] +name = "const_fn" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f8a2ca5ac02d09563609681103aada9e1777d54fc57a5acd7a41404f9c93b6e" + [[package]] name = "convert_case" version = "0.6.0" @@ -2199,7 +2429,7 @@ dependencies = [ "futures-util", "libc", "octseq", - "rand", + "rand 0.8.5", "smallvec", "time", "tokio", @@ -2427,6 +2657,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "event-listener" version = "5.3.1" @@ -2444,7 +2680,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" dependencies = [ - "event-listener", + "event-listener 5.3.1", "pin-project-lite", ] @@ -2459,7 +2695,7 @@ dependencies = [ "hyper-tls 0.5.0", "log", "pin-project", - "rand", + "rand 0.8.5", "tokio", ] @@ -2480,7 +2716,7 @@ checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c" dependencies = [ "log", "once_cell", - "rand", + "rand 0.8.5", ] [[package]] @@ -2499,6 +2735,15 @@ dependencies = [ "regex", ] +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2669,6 +2914,34 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + +[[package]] +name = "futures-lite" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5edaec856126859abb19ed65f39e90fea3a9574b9707f13539acf4abf7eb532" +dependencies = [ + "fastrand 2.3.0", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + [[package]] name = "futures-macro" version = "0.3.30" @@ -2748,6 +3021,17 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.10" @@ -2757,7 +3041,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -2800,7 +3084,7 @@ dependencies = [ "nonzero_ext", "parking_lot", "quanta", - "rand", + "rand 0.8.5", "smallvec", ] @@ -3084,6 +3368,26 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-types" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9b187a72d63adbfba487f48095306ac823049cb504ee195541e91c7775f5ad" +dependencies = [ + "anyhow", + "async-channel 1.9.0", + "base64 0.13.1", + "futures-lite 1.13.0", + "infer", + "pin-project-lite", + "rand 0.7.3", + "serde", + "serde_json", + "serde_qs", + "serde_urlencoded", + "url", +] + [[package]] name = "httparse" version = "1.8.0" @@ -3304,7 +3608,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0acd33ff0285af998aaf9b57342af478078f53492322fafc47450e09397e0e9" dependencies = [ "bitmaps", - "rand_core", + "rand_core 0.6.2", "rand_xoshiro", "sized-chunks", "typenum", @@ -3363,6 +3667,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "infer" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" + [[package]] name = "insta" version = "1.33.0" @@ -3550,7 +3860,7 @@ dependencies = [ "k8s-openapi", "kube", "kube-runtime", - "rand", + "rand 0.8.5", "serde", "tracing", ] @@ -3615,7 +3925,7 @@ dependencies = [ "kube-core", "openssl", "pem", - "rand", + "rand 0.8.5", "secrecy", "serde", "serde_json", @@ -3867,9 +4177,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.8" +version = "1.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf" +checksum = "fdc53a7799a7496ebc9fd29f31f7df80e83c9bda5299768af5f9e59eeea74647" dependencies = [ "cc", "libc", @@ -4106,7 +4416,7 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "log", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.48.0", ] @@ -4165,7 +4475,7 @@ dependencies = [ "pem", "percent-encoding", "pin-project", - "rand", + "rand 0.8.5", "serde", "serde_json", "socket2 0.5.7", @@ -4199,7 +4509,7 @@ dependencies = [ "lazy_static", "num-bigint", "num-traits", - "rand", + "rand 0.8.5", "regex", "saturating", "serde", @@ -4232,7 +4542,7 @@ dependencies = [ "mz-ore", "open", "openssl-probe", - "reqwest", + "reqwest 0.12.4", "rpassword", "security-framework", "semver", @@ -4314,8 +4624,8 @@ dependencies = [ "opentelemetry", "prometheus", "qcell", - "rand", - "rand_chacha", + "rand 0.8.5", + "rand_chacha 0.3.0", "semver", "serde", "serde_json", @@ -4409,7 +4719,7 @@ dependencies = [ "flate2", "itertools 0.12.1", "mz-ore", - "rand", + "rand 0.8.5", "regex", "serde", "serde_json", @@ -4462,7 +4772,7 @@ dependencies = [ [[package]] name = "mz-balancerd" -version = "0.130.0-dev.0" +version = "0.130.13" dependencies = [ "anyhow", "async-trait", @@ -4498,7 +4808,7 @@ dependencies = [ "postgres", "prometheus", "proxy-header", - "reqwest", + "reqwest 0.11.24", "semver", "tempfile", "tokio", @@ -4580,7 +4890,7 @@ dependencies = [ "proptest", "proptest-derive", "prost", - "rand", + "rand 0.8.5", "semver", "serde", "serde_json", @@ -4597,12 +4907,13 @@ dependencies = [ [[package]] name = "mz-catalog-debug" -version = "0.130.0-dev.0" +version = "0.130.13" dependencies = [ "anyhow", "clap", "futures", "mz-adapter", + "mz-adapter-types", "mz-build-info", "mz-catalog", "mz-cloud-resources", @@ -4662,7 +4973,7 @@ dependencies = [ "proptest", "proptest-derive", "prost-build", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "tokio", @@ -4679,7 +4990,7 @@ dependencies = [ "chrono", "mz-frontegg-auth", "mz-frontegg-client", - "reqwest", + "reqwest 0.11.24", "serde", "thiserror", "tokio", @@ -4706,7 +5017,7 @@ dependencies = [ "kube", "mz-ore", "mz-repr", - "rand", + "rand 0.8.5", "schemars", "semver", "serde", @@ -4760,7 +5071,7 @@ dependencies = [ [[package]] name = "mz-clusterd" -version = "0.130.0-dev.0" +version = "0.130.13" dependencies = [ "anyhow", "axum", @@ -4820,6 +5131,7 @@ dependencies = [ "mz-persist-client", "mz-persist-types", "mz-repr", + "mz-service", "mz-storage-operators", "mz-storage-types", "mz-timely-util", @@ -5028,7 +5340,7 @@ dependencies = [ [[package]] name = "mz-environmentd" -version = "0.130.0-dev.0" +version = "0.130.13" dependencies = [ "anyhow", "askama", @@ -5071,6 +5383,7 @@ dependencies = [ "mz-catalog", "mz-cloud-resources", "mz-controller", + "mz-dyncfg", "mz-dyncfgs", "mz-environmentd", "mz-frontegg-auth", @@ -5114,11 +5427,11 @@ dependencies = [ "predicates", "prometheus", "proptest", - "rand", + "rand 0.8.5", "rdkafka", "rdkafka-sys", "regex", - "reqwest", + "reqwest 0.11.24", "rlimit", "semver", "sentry", @@ -5193,7 +5506,7 @@ dependencies = [ "proptest-derive", "prost", "prost-build", - "rand", + "rand 0.8.5", "regex", "regex-syntax 0.8.3", "seahash", @@ -5259,7 +5572,7 @@ dependencies = [ "prost", "prost-build", "prost-types", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "sha2", @@ -5292,7 +5605,7 @@ dependencies = [ "mz-ore", "mz-repr", "prometheus", - "reqwest", + "reqwest 0.11.24", "reqwest-middleware", "reqwest-retry", "serde", @@ -5311,7 +5624,7 @@ dependencies = [ "jsonwebtoken", "mz-frontegg-auth", "mz-ore", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "thiserror", @@ -5336,7 +5649,7 @@ dependencies = [ "mz-frontegg-auth", "mz-ore", "openssl", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "tokio", @@ -5425,7 +5738,7 @@ dependencies = [ "num_cpus", "prost", "prost-build", - "rand", + "rand 0.8.5", "rdkafka", "serde", "serde_json", @@ -5481,7 +5794,7 @@ dependencies = [ [[package]] name = "mz-materialized" -version = "0.130.0-dev.0" +version = "0.130.13" dependencies = [ "mz-clusterd", "mz-environmentd", @@ -5492,7 +5805,7 @@ dependencies = [ name = "mz-metabase" version = "0.0.0" dependencies = [ - "reqwest", + "reqwest 0.11.24", "serde", "workspace-hack", ] @@ -5566,7 +5879,7 @@ dependencies = [ "flate2", "hex", "hex-literal", - "reqwest", + "reqwest 0.11.24", "sha2", "tar", "walkdir", @@ -5671,7 +5984,7 @@ dependencies = [ [[package]] name = "mz-orchestratord" -version = "0.130.0-dev.0" +version = "0.130.13" dependencies = [ "anyhow", "async-trait", @@ -5695,8 +6008,8 @@ dependencies = [ "mz-ore", "mz-prof-http", "prometheus", - "rand", - "reqwest", + "rand 0.8.5", + "reqwest 0.11.24", "serde", "serde_json", "sha2", @@ -5746,7 +6059,7 @@ dependencies = [ "pin-project", "prometheus", "proptest", - "rand", + "rand 0.8.5", "scopeguard", "sentry", "sentry-tracing", @@ -5800,6 +6113,10 @@ dependencies = [ "aws-credential-types", "aws-sdk-s3", "aws-types", + "azure_core", + "azure_identity", + "azure_storage", + "azure_storage_blobs", "base64 0.13.1", "bytes", "deadpool-postgres", @@ -5824,7 +6141,8 @@ dependencies = [ "proptest-derive", "prost", "prost-build", - "rand", + "rand 0.8.5", + "reqwest 0.12.4", "serde", "serde_json", "sha2", @@ -5841,7 +6159,7 @@ dependencies = [ [[package]] name = "mz-persist-client" -version = "0.130.0-dev.0" +version = "0.130.13" dependencies = [ "anyhow", "arrayvec 0.7.4", @@ -6226,7 +6544,7 @@ dependencies = [ "proptest-derive", "prost", "prost-build", - "rand", + "rand 0.8.5", "regex", "ryu", "serde", @@ -6472,7 +6790,7 @@ dependencies = [ "protobuf-native", "rdkafka", "regex", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "static_assertions", @@ -6551,6 +6869,7 @@ dependencies = [ "itertools 0.12.1", "junit-report", "md-5", + "mz-adapter-types", "mz-build-info", "mz-catalog", "mz-controller", @@ -6572,7 +6891,7 @@ dependencies = [ "mz-tracing", "postgres-protocol", "regex", - "reqwest", + "reqwest 0.11.24", "serde_json", "shell-words", "tempfile", @@ -6598,7 +6917,7 @@ dependencies = [ "openssh", "openssh-mux-client", "openssl", - "rand", + "rand 0.8.5", "scopeguard", "serde", "serde_json", @@ -6669,7 +6988,7 @@ dependencies = [ "postgres-replication", "prometheus", "prost", - "rand", + "rand 0.8.5", "rdkafka", "regex", "rocksdb", @@ -6868,7 +7187,7 @@ dependencies = [ "proptest-derive", "prost", "prost-build", - "rand", + "rand 0.8.5", "rdkafka", "regex", "serde", @@ -6892,7 +7211,7 @@ dependencies = [ "chrono", "mz-kafka-util", "mz-ore", - "rand", + "rand 0.8.5", "rdkafka", "tokio", "tokio-postgres", @@ -6902,7 +7221,7 @@ dependencies = [ [[package]] name = "mz-testdrive" -version = "0.130.0-dev.0" +version = "0.130.13" dependencies = [ "anyhow", "arrow", @@ -6952,10 +7271,10 @@ dependencies = [ "prost-build", "prost-reflect", "prost-types", - "rand", + "rand 0.8.5", "rdkafka", "regex", - "reqwest", + "reqwest 0.11.24", "semver", "serde", "serde_json", @@ -7015,7 +7334,7 @@ dependencies = [ "mz-postgres-client", "mz-repr", "postgres-protocol", - "rand", + "rand 0.8.5", "serde", "tokio", "tracing", @@ -7102,7 +7421,7 @@ dependencies = [ "prometheus", "prost", "prost-build", - "rand", + "rand 0.8.5", "serde", "timely", "tokio", @@ -7318,6 +7637,15 @@ dependencies = [ "syn 1.0.107", ] +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + [[package]] name = "numa_maps" version = "0.1.0" @@ -7330,6 +7658,25 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +[[package]] +name = "oauth2" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c38841cdd844847e3e7c8d29cef9dcfed8877f8f56f9071f77843ecf3baf937f" +dependencies = [ + "base64 0.13.1", + "chrono", + "getrandom 0.2.10", + "http 0.2.9", + "rand 0.8.5", + "serde", + "serde_json", + "serde_path_to_error", + "sha2", + "thiserror", + "url", +] + [[package]] name = "object" version = "0.29.0" @@ -7515,7 +7862,7 @@ dependencies = [ "once_cell", "opentelemetry", "percent-encoding", - "rand", + "rand 0.8.5", "serde_json", "thiserror", "tokio", @@ -7544,7 +7891,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a76df7075c7d4d01fdcb46c912dd17fba5b60c78ea480b475f2b6ab6f666584e" dependencies = [ "num-traits", - "rand", + "rand 0.8.5", "serde", ] @@ -7716,6 +8063,7 @@ dependencies = [ "differential-dataflow", "futures", "humantime", + "mz-dyncfg", "mz-http-util", "mz-orchestrator-tracing", "mz-ore", @@ -7819,7 +8167,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b450720b6f75cfbfabc195814bd3765f337a4f9a83186f8537297cac12f6705" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -7864,6 +8212,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "piper" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" +dependencies = [ + "atomic-waker", + "fastrand 2.3.0", + "futures-io", +] + [[package]] name = "pkg-config" version = "0.3.20" @@ -7898,6 +8257,21 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "polling" +version = "3.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a604568c3202727d1507653cb121dbd627a58684eb09a820fd746bee38b4442f" +dependencies = [ + "cfg-if", + "concurrent-queue", + "hermit-abi 0.4.0", + "pin-project-lite", + "rustix", + "tracing", + "windows-sys 0.59.0", +] + [[package]] name = "portable-atomic" version = "0.3.20" @@ -7949,7 +8323,7 @@ dependencies = [ "hmac", "md-5", "memchr", - "rand", + "rand 0.8.5", "sha2", "stringprep", ] @@ -8157,8 +8531,8 @@ dependencies = [ "bitflags 2.4.1", "lazy_static", "num-traits", - "rand", - "rand_chacha", + "rand 0.8.5", + "rand_chacha 0.3.0", "rand_xorshift", "regex-syntax 0.8.3", "unarray", @@ -8346,7 +8720,7 @@ dependencies = [ "mach2", "once_cell", "raw-cpuid", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "web-sys", "winapi", ] @@ -8358,6 +8732,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" dependencies = [ "memchr", + "serde", ] [[package]] @@ -8366,7 +8741,7 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" dependencies = [ - "rand", + "rand 0.8.5", ] [[package]] @@ -8384,6 +8759,19 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + [[package]] name = "rand" version = "0.8.5" @@ -8391,11 +8779,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.0", + "rand_core 0.6.2", "serde", ] +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + [[package]] name = "rand_chacha" version = "0.3.0" @@ -8403,7 +8801,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.2", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", ] [[package]] @@ -8412,17 +8819,26 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" dependencies = [ - "getrandom", + "getrandom 0.2.10", "serde", ] +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "rand_xorshift" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" dependencies = [ - "rand_core", + "rand_core 0.6.2", ] [[package]] @@ -8431,7 +8847,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" dependencies = [ - "rand_core", + "rand_core 0.6.2", ] [[package]] @@ -8523,7 +8939,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom", + "getrandom 0.2.10", "redox_syscall 0.2.10", ] @@ -8603,7 +9019,50 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls-pemfile", + "rustls-pemfile 1.0.4", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 0.1.2", + "system-configuration", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg 0.50.0", +] + +[[package]] +name = "reqwest" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" +dependencies = [ + "base64 0.22.0", + "bytes", + "encoding_rs", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.4.5", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", + "hyper 1.4.1", + "hyper-tls 0.6.0", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls-pemfile 2.2.0", "serde", "serde_json", "serde_urlencoded", @@ -8611,12 +9070,14 @@ dependencies = [ "system-configuration", "tokio", "tokio-native-tls", + "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", - "winreg", + "winreg 0.52.0", ] [[package]] @@ -8627,7 +9088,7 @@ dependencies = [ "anyhow", "async-trait", "http 0.2.9", - "reqwest", + "reqwest 0.11.24", "serde", "task-local-extensions", "thiserror", @@ -8644,7 +9105,7 @@ dependencies = [ "futures", "http 0.2.9", "hyper 0.14.27", - "reqwest", + "reqwest 0.11.24", "reqwest-middleware", "retry-policies", "task-local-extensions", @@ -8666,7 +9127,7 @@ checksum = "e09bbcb5003282bcb688f0bae741b278e9c7e8f378f561522c9806c58e075d9b" dependencies = [ "anyhow", "chrono", - "rand", + "rand 0.8.5", ] [[package]] @@ -8676,7 +9137,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" dependencies = [ "cc", - "getrandom", + "getrandom 0.2.10", "libc", "spin", "untrusted", @@ -8798,6 +9259,21 @@ dependencies = [ "base64 0.21.5", ] +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" + [[package]] name = "rustversion" version = "1.0.9" @@ -8943,7 +9419,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24fc91c898e0487ff3e471d0849bbaf7d38a00ff5e3531009d386b0bab9b6b12" dependencies = [ "async-trait", - "reqwest", + "reqwest 0.11.24", "serde", "serde_json", "thiserror", @@ -8977,7 +9453,7 @@ checksum = "17ad137b9df78294b98cab1a650bef237cc6c950e82e5ce164655e674d07c5cc" dependencies = [ "httpdate", "native-tls", - "reqwest", + "reqwest 0.11.24", "sentry-backtrace", "sentry-contexts", "sentry-core", @@ -9020,7 +9496,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4df9b9d8de2658a1ecd4e45f7b06c80c5dd97b891bfbc7c501186189b7e9bbdf" dependencies = [ "once_cell", - "rand", + "rand 0.8.5", "sentry-types", "serde", "serde_json", @@ -9065,7 +9541,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccc95faa4078768a6bf8df45e2b894bbf372b3dbbfb364e9429c1c58ab7545c6" dependencies = [ "debugid", - "getrandom", + "getrandom 0.2.10", "hex", "serde", "serde_json", @@ -9172,6 +9648,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_qs" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" +dependencies = [ + "percent-encoding", + "serde", + "thiserror", +] + [[package]] name = "serde_repr" version = "0.1.13" @@ -9390,9 +9877,12 @@ dependencies = [ [[package]] name = "slab" -version = "0.4.6" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] [[package]] name = "smallvec" @@ -9443,7 +9933,7 @@ checksum = "f02d3730e8785e797a4552137d1acc0d7f7146dad3b5fe65ed83637711dfc6c5" dependencies = [ "base64ct", "pem-rfc7468", - "rand_core", + "rand_core 0.6.2", "sec1", "sha2", "signature", @@ -9719,7 +10209,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", - "fastrand", + "fastrand 2.3.0", "once_cell", "rustix", "windows-sys 0.59.0", @@ -9847,6 +10337,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" dependencies = [ "itoa", + "js-sys", + "libc", + "num_threads", "quickcheck", "serde", "time-core", @@ -10075,7 +10568,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "rand", + "rand 0.8.5", "serde", "socket2 0.5.7", "tokio", @@ -10248,7 +10741,7 @@ dependencies = [ "indexmap 1.9.1", "pin-project", "pin-project-lite", - "rand", + "rand 0.8.5", "slab", "tokio", "tokio-util", @@ -10325,11 +10818,10 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ - "cfg-if", "log", "pin-project-lite", "tracing-attributes", @@ -10338,13 +10830,13 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.23" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 1.0.107", + "syn 2.0.63", ] [[package]] @@ -10362,9 +10854,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.30" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", "valuable", @@ -10469,7 +10961,7 @@ dependencies = [ "http 1.1.0", "httparse", "log", - "rand", + "rand 0.8.5", "sha1", "thiserror", "url", @@ -10488,7 +10980,7 @@ dependencies = [ "http 1.1.0", "httparse", "log", - "rand", + "rand 0.8.5", "sha1", "thiserror", "utf-8", @@ -10501,7 +10993,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ "cfg-if", - "rand", + "rand 0.8.5", "static_assertions", ] @@ -10528,6 +11020,15 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +[[package]] +name = "tz-rs" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33851b15c848fad2cf4b105c6bb66eb9512b6f6c44a4b13f57c53c73c707e2b4" +dependencies = [ + "const_fn", +] + [[package]] name = "ucd-trie" version = "0.1.6" @@ -10674,7 +11175,7 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ - "getrandom", + "getrandom 0.2.10", "serde", "sha1_smol", ] @@ -10732,6 +11233,12 @@ dependencies = [ "libc", ] +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "walkdir" version = "2.3.2" @@ -10753,6 +11260,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -10792,9 +11305,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.19" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fe9756085a84584ee9457a002b7cdfe0bfff169f45d2591d8be1345a6780e35" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" dependencies = [ "cfg-if", "js-sys", @@ -10831,11 +11344,24 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +[[package]] +name = "wasm-streams" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" -version = "0.3.51" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582" +checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" dependencies = [ "js-sys", "wasm-bindgen", @@ -11137,6 +11663,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "winreg" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "workspace-hack" version = "0.0.0" @@ -11166,6 +11702,7 @@ dependencies = [ "chrono", "clap", "clap_builder", + "concurrent-queue", "console", "criterion", "crossbeam-deque", @@ -11176,6 +11713,8 @@ dependencies = [ "dec", "digest", "either", + "event-listener 5.3.1", + "event-listener-strategy", "flate2", "form_urlencoded", "futures", @@ -11186,7 +11725,7 @@ dependencies = [ "futures-sink", "futures-task", "futures-util", - "getrandom", + "getrandom 0.2.10", "hashbrown 0.14.5", "hyper 0.14.27", "hyper 1.4.1", @@ -11199,6 +11738,7 @@ dependencies = [ "kube-core", "libc", "libz-sys", + "linux-raw-sys", "log", "lru", "memchr", @@ -11232,13 +11772,14 @@ dependencies = [ "prost-reflect", "prost-types", "quote", - "rand", - "rand_chacha", + "rand 0.8.5", + "rand_chacha 0.3.0", "rdkafka-sys", "regex", "regex-automata 0.4.7", "regex-syntax 0.8.3", - "reqwest", + "reqwest 0.11.24", + "reqwest 0.12.4", "ring", "rustix", "schemars", diff --git a/LICENSE b/LICENSE index ac4782b458d2f..035f33959072c 100644 --- a/LICENSE +++ b/LICENSE @@ -13,19 +13,26 @@ Business Source License 1.1 Licensor: Materialize, Inc. -Licensed Work: Materialize Version 20250117 +Licensed Work: Materialize Version v0.130.13 The Licensed Work is © 2025 Materialize, Inc. -Additional Use Grant: Within a single installation of Materialize, you - may create one compute cluster with one - single-process replica for any purpose and you may - concurrently use multiple such installations, - subject to each of the following conditions: - (a) you may not create installations with multiple - clusters, nor compute clusters with multiple - replicas, nor compute cluster replicas with multiple - processes; and (b) you may not use the Licensed Work - for a Database Service. A “Database Service” is a +Additional Use Grant: Within a single installation of the Licensed Work, + the sum of the memory limits across all clusters + must remain below 24GiB, and the sum of the disk + limits across all clusters must remain below 48GiB. + You may operate multiple installations of the + Licensed Work for distinct applications, provided + each installation remains below the 24GiB memory + limit and 48GiB disk limit. You may also run + multiple applications on a single installation, so + long as that installation does not exceed the 24GiB + memory limit and 48GiB disk limit in total. However, + you may not split or “shard” a single application + across multiple installations in order to exceed + these limits. + + Further, you may not use the Licensed Work for a + Database Service. A "Database Service" is a commercial offering that allows third parties (other than your employees and contractors) to access the functionality of the Licensed Work by creating views diff --git a/about.toml b/about.toml index 0e1c16657b9ce..c66766980e63a 100644 --- a/about.toml +++ b/about.toml @@ -13,6 +13,7 @@ accepted = [ "MPL-2.0", "OpenSSL", "Zlib", + "Unicode-3.0", "Unicode-DFS-2016", ] private = { ignore = true } diff --git a/bin/ci-builder b/bin/ci-builder index c5c225ea17836..54881912e2518 100755 --- a/bin/ci-builder +++ b/bin/ci-builder @@ -188,6 +188,10 @@ case "$cmd" in --env COMMON_ANCESTOR_OVERRIDE --env CONFLUENT_CLOUD_DEVEX_KAFKA_PASSWORD --env CONFLUENT_CLOUD_DEVEX_KAFKA_USERNAME + --env AZURE_SERVICE_ACCOUNT_USERNAME + --env AZURE_SERVICE_ACCOUNT_PASSWORD + --env AZURE_SERVICE_ACCOUNT_TENANT + --env GCP_SERVICE_ACCOUNT_JSON --env GITHUB_TOKEN --env GPG_KEY --env LAUNCHDARKLY_API_TOKEN diff --git a/ci/builder/Dockerfile b/ci/builder/Dockerfile index 671d2389c6d03..2fd3299dc527d 100644 --- a/ci/builder/Dockerfile +++ b/ci/builder/Dockerfile @@ -289,7 +289,7 @@ RUN mkdir rust \ && gpg --verify rust.asc rust.tar.gz \ && tar -xzf rust.tar.gz -C /usr/local/lib/rustlib/ --strip-components=4 \ && rm -rf rust.asc rust.tar.gz rust \ - && cargo install --root /usr/local --version "=0.6.1" --locked cargo-about \ + && cargo install --root /usr/local --version "=0.6.6" --locked cargo-about \ && cargo install --root /usr/local --version "=2.0.2" --locked cargo-deb \ && cargo install --root /usr/local --version "=0.14.21" --locked cargo-deny \ && cargo install --root /usr/local --version "=0.1.0" --locked cargo-deplint \ @@ -326,7 +326,17 @@ RUN curl -fsSL https://github.com/deb-s3/deb-s3/releases/download/0.11.3/deb-s3- RUN curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-$ARCH_GCC-2.17.2.zip" > awscli.zip \ && unzip awscli.zip \ && ./aws/install \ - && rm -rf aws + && rm -rf aws awscli.zip + +# Install the gcloud CLI. + +RUN arch_gcloud=$(echo "$ARCH_GCC" | sed -e "s/aarch64/arm/" -e "s/amd64/x86_64/") \ + && curl -fsSL "https://storage.googleapis.com/cloud-sdk-release/google-cloud-cli-507.0.0-linux-$arch_gcloud.tar.gz" > gcloud.tar.gz \ + && tar -xzf gcloud.tar.gz -C /opt \ + && rm gcloud.tar.gz \ + && CLOUDSDK_CORE_DISABLE_PROMPTS=1 /opt/google-cloud-sdk/install.sh --path-update false \ + && /opt/google-cloud-sdk/bin/gcloud config set disable_usage_reporting false \ + && /opt/google-cloud-sdk/bin/gcloud components install gke-gcloud-auth-plugin # Install docs site dependencies. These are towards the end for the same reason # as the Python dependencies. These are only supported on x86_64 at the moment. @@ -404,7 +414,7 @@ ENV TARGET_CC=$CC ENV TARGET_CXX=$CXX ENV TARGET_CXXSTDLIB=static=stdc++ ENV TARGET_RANLIB=$RANLIB -ENV PATH=/opt/x-tools/$ARCH_GCC-unknown-linux-gnu/bin:$PATH +ENV PATH=/opt/google-cloud-sdk/bin:/opt/x-tools/$ARCH_GCC-unknown-linux-gnu/bin:$PATH ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER=x86_64-unknown-linux-gnu-cc ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-unknown-linux-gnu-cc ENV CARGO_TARGET_DIR=/mnt/build diff --git a/ci/mkpipeline.py b/ci/mkpipeline.py index 9e9e1aaff152c..5cb4e227ba909 100644 --- a/ci/mkpipeline.py +++ b/ci/mkpipeline.py @@ -366,7 +366,7 @@ def switch_jobs_to_aws(pipeline: Any, priority: int) -> None: if branch == "main" or priority < 0: return - # Consider Hetzner to be overloaded when at least 400 jobs exist with priority >= 0 + # Consider Hetzner to be overloaded when at least 600 jobs exist with priority >= 0 try: builds = generic_api.get_multiple( "builds", @@ -397,7 +397,7 @@ def switch_jobs_to_aws(pipeline: Any, priority: int) -> None: continue num_jobs += 1 print(f"Number of high-priority jobs on Hetzner: {num_jobs}") - if num_jobs < 400: + if num_jobs < 600: return except Exception: print("switch_jobs_to_aws failed, ignoring:") @@ -418,13 +418,13 @@ def visit(config: Any) -> None: "hetzner-x86-64-2cpu-4gb", "hetzner-x86-64-dedi-2cpu-8gb", ): - config["agents"]["queue"] = "linux-x86-64-small" + config["agents"]["queue"] = "linux-x86_64-small" if agent in ("hetzner-x86-64-8cpu-16gb", "hetzner-x86-64-dedi-4cpu-16gb"): - config["agents"]["queue"] = "linux-x86-64" + config["agents"]["queue"] = "linux-x86_64" if agent in ("hetzner-x86-64-16cpu-32gb", "hetzner-x86-64-dedi-8cpu-32gb"): - config["agents"]["queue"] = "linux-x86-64-medium" + config["agents"]["queue"] = "linux-x86_64-medium" if agent == "hetzner-x86-64-dedi-16cpu-64gb": - config["agents"]["queue"] = "linux-x86-64-large" + config["agents"]["queue"] = "linux-x86_64-large" if agent in ( "hetzner-x86-64-dedi-32cpu-128gb", "hetzner-x86-64-dedi-48cpu-192gb", diff --git a/ci/nightly/pipeline.template.yml b/ci/nightly/pipeline.template.yml index 2c11095d75ec7..832caf5ff0589 100644 --- a/ci/nightly/pipeline.template.yml +++ b/ci/nightly/pipeline.template.yml @@ -226,18 +226,6 @@ steps: composition: testdrive args: [--redpanda] - - id: redpanda-testdrive-aarch64 - label: ":panda_face: :racing_car: testdrive aarch64" - depends_on: build-aarch64 - timeout_in_minutes: 180 - agents: - queue: hetzner-aarch64-8cpu-16gb - plugins: - - ./ci/plugins/mzcompose: - composition: testdrive - args: [--redpanda] - skip: "Disabled due to taking too long for the value provided" - # TODO(def-) Remove this when old upsert implementation is removed - id: testdrive-old-upsert label: ":racing_car: testdrive with old Upsert" @@ -309,6 +297,51 @@ steps: --system-param=persist_encoding_enable_dictionary=true, ] + - id: persistence-testdrive + label: ":racing_car: testdrive with --persistent-user-tables" + depends_on: build-aarch64 + timeout_in_minutes: 30 + agents: + queue: hetzner-aarch64-4cpu-8gb + plugins: + - ./ci/plugins/mzcompose: + composition: testdrive + args: [--persistent-user-tables] + skip: "Persistence tests disabled" + + - id: azurite-testdrive + label: "testdrive with :azure: blob store" + depends_on: build-aarch64 + timeout_in_minutes: 180 + agents: + queue: hetzner-aarch64-8cpu-16gb + plugins: + - ./ci/plugins/mzcompose: + composition: testdrive + args: [--azurite] + + - id: azurite-testdrive-replicas-4 + label: ":racing_car: testdrive 4 replicas with :azure: blob store" + depends_on: build-aarch64 + timeout_in_minutes: 180 + agents: + queue: hetzner-aarch64-16cpu-32gb + plugins: + - ./ci/plugins/mzcompose: + composition: testdrive + args: [--replicas=4, --azurite] + + - id: azurite-testdrive-size-8 + label: ":racing_car: testdrive with SIZE 8 and :azure: blob store" + depends_on: build-aarch64 + timeout_in_minutes: 180 + agents: + queue: hetzner-aarch64-16cpu-32gb + plugins: + - ./ci/plugins/mzcompose: + composition: testdrive + args: [--default-size=8, --azurite] + - id: testdrive-in-cloudtest label: "Full Testdrive in Cloudtest (K8s)" depends_on: build-aarch64 @@ -326,17 +359,6 @@ steps: args: [-m=long, test/cloudtest/test_full_testdrive.py, --no-test-parallelism] sanitizer: skip - - id: persistence-testdrive - label: ":racing_car: testdrive with --persistent-user-tables" - depends_on: build-aarch64 - timeout_in_minutes: 30 - agents: - queue: hetzner-aarch64-4cpu-8gb - plugins: - - ./ci/plugins/mzcompose: - composition: testdrive - args: [--persistent-user-tables] - skip: "Persistence tests disabled" - group: Limits key: limits-group @@ -456,6 +478,17 @@ steps: composition: zippy args: [--scenario=KafkaSources, --actions=10000, --max-execution-time=30m] + - id: zippy-kafka-sources-azurite + label: "Zippy Kafka Sources with :azure: blob store" + depends_on: build-aarch64 + timeout_in_minutes: 120 + agents: + queue: hetzner-aarch64-8cpu-16gb + plugins: + - ./ci/plugins/mzcompose: + composition: zippy + args: [--scenario=KafkaSources, --actions=10000, --max-execution-time=30m, --azurite] + # TODO(def-) Remove this when old upsert implementation is removed - id: zippy-kafka-sources-old-upsert label: "Zippy Kafka Sources with old Upsert" @@ -468,6 +501,18 @@ steps: composition: zippy args: [--scenario=KafkaSources, --actions=10000, --max-execution-time=30m, --system-param=storage_use_continual_feedback_upsert=false] + # TODO(def-) Remove this when old upsert implementation is removed + - id: zippy-kafka-sources-old-upsert-azurite + label: "Zippy Kafka Sources with old Upsert and :azure: blob store" + depends_on: build-aarch64 + timeout_in_minutes: 120 + agents: + queue: hetzner-aarch64-8cpu-16gb + plugins: + - ./ci/plugins/mzcompose: + composition: zippy + args: [--scenario=KafkaSources, --actions=10000, --max-execution-time=30m, --system-param=storage_use_continual_feedback_upsert=false, --azurite] + - id: zippy-kafka-parallel-insert label: "Zippy Kafka Parallel Insert" depends_on: build-aarch64 @@ -479,6 +524,17 @@ steps: composition: zippy args: [--scenario=KafkaParallelInsert, --transaction-isolation=serializable, --actions=10000, --max-execution-time=30m] + - id: zippy-kafka-parallel-insert-azurite + label: "Zippy Kafka Parallel Insert with :azurite: blob store" + depends_on: build-aarch64 + timeout_in_minutes: 120 + agents: + queue: hetzner-aarch64-8cpu-16gb + plugins: + - ./ci/plugins/mzcompose: + composition: zippy + args: [--scenario=KafkaParallelInsert, --transaction-isolation=serializable, --actions=10000, --max-execution-time=30m, --azurite] + - id: zippy-user-tables label: "Zippy User Tables" depends_on: build-aarch64 @@ -488,7 +544,7 @@ steps: plugins: - ./ci/plugins/mzcompose: composition: zippy - args: [--scenario=UserTables, --actions=10000, --max-execution-time=30m] + args: [--scenario=UserTables, --actions=10000, --max-execution-time=30m, --azurite] - id: zippy-postgres-cdc label: "Zippy Postgres CDC" @@ -499,7 +555,7 @@ steps: plugins: - ./ci/plugins/mzcompose: composition: zippy - args: [--scenario=PostgresCdc, --actions=10000, --max-execution-time=30m] + args: [--scenario=PostgresCdc, --actions=10000, --max-execution-time=30m, --azurite] - id: zippy-mysql-cdc label: "Zippy MySQL CDC" @@ -510,7 +566,7 @@ steps: plugins: - ./ci/plugins/mzcompose: composition: zippy - args: [--scenario=MySqlCdc, --actions=10000, --max-execution-time=30m] + args: [--scenario=MySqlCdc, --actions=10000, --max-execution-time=30m, --azurite] - id: zippy-debezium-postgres label: "Zippy Debezium Postgres" @@ -623,12 +679,13 @@ steps: queue: hetzner-aarch64-4cpu-8gb - id: testdrive-old-kafka-src-syntax - label: "Testdrive (before Kafka source versioning)" + label: "Testdrive (before Kafka source versioning) with :azure: blob store" depends_on: build-aarch64 timeout_in_minutes: 180 plugins: - ./ci/plugins/mzcompose: composition: testdrive-old-kafka-src-syntax + args: [--azurite] agents: queue: hetzner-aarch64-8cpu-16gb @@ -675,6 +732,19 @@ steps: - group: "Platform checks" key: platform-checks steps: + - id: checks-no-restart-no-upgrade-azurite + label: "Checks without restart or upgrade with :azure: blob store" + depends_on: build-aarch64 + inputs: [misc/python/materialize/checks] + timeout_in_minutes: 45 + parallelism: 2 + agents: + queue: hetzner-aarch64-16cpu-32gb + plugins: + - ./ci/plugins/mzcompose: + composition: platform-checks + args: [--scenario=NoRestartNoUpgrade, "--seed=$BUILDKITE_JOB_ID", --azurite] + - id: checks-restart-entire-mz label: "Checks + restart of the entire Mz" depends_on: build-aarch64 @@ -687,7 +757,25 @@ steps: plugins: - ./ci/plugins/mzcompose: composition: platform-checks - args: [--scenario=RestartEntireMz, "--seed=$BUILDKITE_JOB_ID"] + args: [--scenario=RestartEntireMz, "--seed=$BUILDKITE_JOB_ID", --azurite] + + - id: checks-restart-environmentd-clusterd-storage-azurite + label: "Checks + restart of environmentd & storage clusterd with :azure: blob store" + depends_on: build-aarch64 + inputs: [misc/python/materialize/checks] + timeout_in_minutes: 45 + parallelism: 2 + agents: + queue: hetzner-aarch64-16cpu-32gb + plugins: + - ./ci/plugins/mzcompose: + composition: platform-checks + args: + [ + --scenario=RestartEnvironmentdClusterdStorage, + "--seed=$BUILDKITE_JOB_ID", + --azurite, + ] # TODO(def-) Remove this when old upsert implementation is removed - id: checks-restart-entire-mz-old-upsert @@ -752,6 +840,18 @@ steps: composition: platform-checks args: [--scenario=RestartEntireMz, --execution-mode=parallel, "--seed=$BUILDKITE_JOB_ID"] + - id: checks-parallel-restart-entire-mz-azurite + label: "Checks parallel + restart of the entire Mz with :azure: blob store" + depends_on: build-aarch64 + timeout_in_minutes: 180 + parallelism: 2 + agents: + queue: hetzner-aarch64-16cpu-32gb + plugins: + - ./ci/plugins/mzcompose: + composition: platform-checks + args: [--scenario=RestartEntireMz, --execution-mode=parallel, "--seed=$BUILDKITE_JOB_ID", --azurite] + - id: checks-parallel-restart-environmentd-clusterd-storage label: "Checks parallel + restart of environmentd & storage clusterd" depends_on: build-aarch64 @@ -788,6 +888,18 @@ steps: composition: platform-checks args: [--scenario=UpgradeEntireMz, "--seed=$BUILDKITE_JOB_ID"] + - id: checks-lts-upgrade + label: "Checks LTS upgrade, whole-Mz restart" + depends_on: build-aarch64 + timeout_in_minutes: 180 + parallelism: 2 + agents: + queue: hetzner-aarch64-16cpu-32gb + plugins: + - ./ci/plugins/mzcompose: + composition: platform-checks + args: [--scenario=UpgradeEntireMzFromLatestLTS, "--seed=$BUILDKITE_JOB_ID"] + - id: checks-preflight-check-rollback label: "Checks preflight-check and roll back upgrade" depends_on: build-aarch64 @@ -824,8 +936,8 @@ steps: composition: platform-checks args: [--scenario=UpgradeEntireMzFourVersions, "--seed=$BUILDKITE_JOB_ID"] - - id: checks-0dt-restart-entire-mz-forced-migrations - label: "Checks 0dt restart of the entire Mz with forced migrations" + - id: checks-0dt-restart-entire-mz-forced-migrations-azurite + label: "Checks 0dt restart of the entire Mz with forced migrations with :azure: blob store" depends_on: build-aarch64 timeout_in_minutes: 60 parallelism: 2 @@ -834,7 +946,7 @@ steps: plugins: - ./ci/plugins/mzcompose: composition: platform-checks - args: [--scenario=ZeroDowntimeRestartEntireMzForcedMigrations, "--seed=$BUILDKITE_JOB_ID"] + args: [--scenario=ZeroDowntimeRestartEntireMzForcedMigrations, "--seed=$BUILDKITE_JOB_ID", --azurite] - id: checks-0dt-upgrade-entire-mz label: "Checks 0dt upgrade, whole-Mz restart" @@ -1034,7 +1146,7 @@ steps: args: [--node-count=1, --consensus=mem, --blob=mem, --time-limit=600, --concurrency=4, --rate=500, --max-txn-length=16, --unreliability=0.1] - id: persist-maelstrom-multi-node - label: Long multi-node Maelstrom coverage of persist with postgres consensus + label: Long multi-node Maelstrom coverage of persist with CockroachDB consensus depends_on: build-aarch64 timeout_in_minutes: 20 agents: @@ -1045,8 +1157,20 @@ steps: composition: persist args: [--node-count=4, --consensus=cockroach, --blob=maelstrom, --time-limit=300, --concurrency=4, --rate=500, --max-txn-length=16, --unreliability=0.1] + - id: persist-maelstrom-multi-node-postgres + label: "Long multi-node Maelstrom coverage of persist with :postgres: Consensus" + depends_on: build-aarch64 + timeout_in_minutes: 40 + agents: + queue: hetzner-aarch64-4cpu-8gb + artifact_paths: [test/persist/maelstrom/**/*.log] + plugins: + - ./ci/plugins/mzcompose: + composition: persist + args: [--node-count=4, --consensus=postgres, --blob=maelstrom, --time-limit=300, --concurrency=4, --rate=500, --max-txn-length=16, --unreliability=0.1] + - id: txn-wal-maelstrom - label: Maelstrom coverage of txn-wal + label: Maelstrom coverage of txn-wal with CockroachDB Consensus depends_on: build-aarch64 timeout_in_minutes: 30 agents: @@ -1057,6 +1181,18 @@ steps: composition: persist args: [--node-count=4, --consensus=cockroach, --blob=maelstrom, --time-limit=300, --rate=500, --txn-wal] + - id: txn-wal-maelstrom-postgres + label: "Maelstrom coverage of txn-wal with :postgres: Consensus" + depends_on: build-aarch64 + timeout_in_minutes: 40 + agents: + queue: hetzner-aarch64-8cpu-16gb + artifact_paths: [test/persist/maelstrom/**/*.log] + plugins: + - ./ci/plugins/mzcompose: + composition: persist + args: [--node-count=4, --consensus=postgres, --blob=maelstrom, --time-limit=300, --rate=500, --txn-wal] + - id: persistence-failpoints label: Persistence failpoints depends_on: build-aarch64 @@ -1124,18 +1260,49 @@ steps: - id: terraform-aws label: "Terraform + Helm Chart E2E on AWS" - artifact_paths: [test/terraform/aws/terraform.tfstate] + artifact_paths: [test/terraform/aws-temporary/terraform.tfstate, "mz-debug/**/*"] depends_on: build-aarch64 - timeout_in_minutes: 1200 + timeout_in_minutes: 120 concurrency: 1 - concurrency_group: 'terraform' + concurrency_group: 'terraform-aws' agents: queue: linux-aarch64-small plugins: - ./ci/plugins/scratch-aws-access: ~ - ./ci/plugins/mzcompose: composition: terraform - branches: "main v*.*" + run: aws-temporary + branches: "main v*.* lts-v* *aws* *tf* *terraform* *helm* *self-managed*" + + - id: terraform-gcp + label: "Terraform + Helm Chart E2E on GCP" + artifact_paths: [test/terraform/gcp-temporary/terraform.tfstate, "mz-debug/**/*"] + depends_on: build-aarch64 + timeout_in_minutes: 120 + concurrency: 1 + concurrency_group: 'terraform-gcp' + agents: + queue: linux-aarch64-small + plugins: + - ./ci/plugins/mzcompose: + composition: terraform + run: gcp-temporary + branches: "main v*.* lts-v* *gcp* *tf* *terraform* *helm* *self-managed*" + + - id: terraform-azure + label: "Terraform + Helm Chart E2E on Azure" + artifact_paths: [test/terraform/azure-temporary/terraform.tfstate, "mz-debug/**/*"] + depends_on: build-aarch64 + timeout_in_minutes: 120 + concurrency: 1 + concurrency_group: 'terraform-azure' + agents: + queue: linux-aarch64-small + plugins: + - ./ci/plugins/mzcompose: + composition: terraform + run: azure-temporary + branches: "main v*.* lts-v* *azure* *tf* *terraform* *helm* *self-managed*" - group: "Output consistency" key: output-consistency @@ -1431,7 +1598,7 @@ steps: args: [--runtime=1500, --complexity=dml, --threads=16] - id: parallel-workload-ddl - label: "Parallel Workload (DDL)" + label: "Parallel Workload (DDL) with :azure: blob store" depends_on: build-aarch64 artifact_paths: [parallel-workload-queries.log.zst] timeout_in_minutes: 90 @@ -1440,7 +1607,7 @@ steps: plugins: - ./ci/plugins/mzcompose: composition: parallel-workload - args: [--runtime=1500, --threads=8] + args: [--runtime=1500, --threads=8, --azurite] - id: parallel-workload-ddl-only label: "Parallel Workload (DDL Only)" @@ -1481,7 +1648,7 @@ steps: args: [--runtime=1500, --scenario=rename, --naughty-identifiers, --threads=16] - id: parallel-workload-rename - label: "Parallel Workload (rename)" + label: "Parallel Workload (rename) with :azure: blob store" depends_on: build-aarch64 artifact_paths: [parallel-workload-queries.log.zst] timeout_in_minutes: 90 @@ -1490,7 +1657,7 @@ steps: plugins: - ./ci/plugins/mzcompose: composition: parallel-workload - args: [--runtime=1500, --scenario=rename, --threads=16] + args: [--runtime=1500, --scenario=rename, --threads=16, --azurite] - id: parallel-workload-cancel label: "Parallel Workload (cancel)" @@ -1564,16 +1731,31 @@ steps: - ./ci/plugins/mzcompose: composition: balancerd - - id: legacy-upgrade - label: Legacy upgrade tests (last version from git) - depends_on: build-aarch64 - timeout_in_minutes: 60 - plugins: - - ./ci/plugins/mzcompose: - composition: legacy-upgrade - args: ["--versions-source=git"] - agents: - queue: hetzner-aarch64-4cpu-8gb + - group: Legacy upgrade tests + key: legacy-upgrade + steps: + - id: legacy-upgrade-git + label: Legacy upgrade tests (last version from git) + depends_on: build-aarch64 + timeout_in_minutes: 60 + plugins: + - ./ci/plugins/mzcompose: + composition: legacy-upgrade + args: ["--versions-source=git"] + agents: + queue: hetzner-aarch64-4cpu-8gb + + - id: legacy-upgrade-docs + label: "Legacy upgrade tests (last version from docs)" + parallelism: 2 + depends_on: build-aarch64 + timeout_in_minutes: 60 + plugins: + - ./ci/plugins/mzcompose: + composition: legacy-upgrade + args: ["--versions-source=docs", "--lts-upgrade"] + agents: + queue: hetzner-aarch64-4cpu-8gb - group: Cloud tests key: cloudtests @@ -1621,13 +1803,14 @@ steps: sanitizer: skip - id: txn-wal-fencing - label: Txn-wal fencing + label: "Txn-wal fencing with :azure: blob store" depends_on: build-aarch64 timeout_in_minutes: 120 parallelism: 2 plugins: - ./ci/plugins/mzcompose: composition: txn-wal-fencing + args: [--azurite] agents: queue: hetzner-aarch64-8cpu-16gb @@ -1720,6 +1903,18 @@ steps: composition: sqllogictest run: slow-tests + - id: cluster-tests-azurite + label: "Cluster tests with :azure: blob store" + depends_on: build-aarch64 + timeout_in_minutes: 120 + inputs: [test/cluster] + plugins: + - ./ci/plugins/mzcompose: + composition: cluster + args: ["--azurite"] + agents: + queue: hetzner-aarch64-8cpu-16gb + - group: "Language tests" key: language-tests steps: diff --git a/ci/plugins/mzcompose/hooks/pre-exit b/ci/plugins/mzcompose/hooks/pre-exit index 5153e85197d18..508ad26c9ae74 100755 --- a/ci/plugins/mzcompose/hooks/pre-exit +++ b/ci/plugins/mzcompose/hooks/pre-exit @@ -85,7 +85,7 @@ timeout 300 buildkite-agent artifact upload "$artifacts_str" || true bin/ci-builder run stable bin/ci-annotate-errors --test-cmd="$(cat test_cmd)" --test-desc="$(cat test_desc)" "${artifacts[@]}" > ci-annotate-errors.log || CI_ANNOTATE_ERRORS_RESULT=$? buildkite-agent artifact upload "ci-annotate-errors.log" -if [ ! -s services.log ] && [ "$BUILDKITE_LABEL" != "Maelstrom coverage of persist" ] && [ "$BUILDKITE_LABEL" != "Long single-node Maelstrom coverage of persist" ] && [ "$BUILDKITE_LABEL" != "Maelstrom coverage of txn-wal" ] && [ "$BUILDKITE_LABEL" != "Mz E2E Test" ] && [ "$BUILDKITE_LABEL" != "Output consistency (version for DFR)" ] && [ "$BUILDKITE_LABEL" != "Output consistency (version for CTF)" ] && [ "$BUILDKITE_LABEL" != "QA Canary Environment Base Load" ] && [ "$BUILDKITE_LABEL" != "Parallel Benchmark against QA Canary Environment" ] && [ "$BUILDKITE_LABEL" != "Parallel Benchmark against QA Benchmarking Staging Environment" ] && [ "$BUILDKITE_LABEL" != "Terraform + Helm Chart E2E on AWS" ]; then +if [ ! -s services.log ] && [ "$BUILDKITE_LABEL" != "Maelstrom coverage of persist" ] && [ "$BUILDKITE_LABEL" != "Long single-node Maelstrom coverage of persist" ] && [ "$BUILDKITE_LABEL" != "Maelstrom coverage of txn-wal" ] && [ "$BUILDKITE_LABEL" != "Mz E2E Test" ] && [ "$BUILDKITE_LABEL" != "Output consistency (version for DFR)" ] && [ "$BUILDKITE_LABEL" != "Output consistency (version for CTF)" ] && [ "$BUILDKITE_LABEL" != "QA Canary Environment Base Load" ] && [ "$BUILDKITE_LABEL" != "Parallel Benchmark against QA Canary Environment" ] && [ "$BUILDKITE_LABEL" != "Parallel Benchmark against QA Benchmarking Staging Environment" ] && [ "$BUILDKITE_LABEL" != "Terraform + Helm Chart E2E on AWS" ] && [ "$BUILDKITE_LABEL" != "Terraform + Helm Chart E2E on GCP" ] && [ "$BUILDKITE_LABEL" != "Terraform AWS Persistent Test" ] && [ "$BUILDKITE_LABEL" != "Terraform + Helm Chart E2E on Azure" ]; then echo "+++ services.log is empty, failing" exit 1 fi @@ -145,3 +145,10 @@ ci_unimportant_heading ":docker: Cleaning up after mzcompose" run kill || true run rm --force -v run down --volumes + +if [[ "$BUILDKITE_LABEL" =~ Terraform\ .* ]]; then + ci_unimportant_heading "terraform: Destroying leftover state in case job was cancelled or timed out..." + bin/ci-builder run stable terraform -chdir=test/terraform/aws-temporary destroy || true + bin/ci-builder run stable terraform -chdir=test/terraform/gcp-temporary destroy || true + bin/ci-builder run stable terraform -chdir=test/terraform/azure-temporary destroy || true +fi diff --git a/ci/test/cargo-test/mzcompose.py b/ci/test/cargo-test/mzcompose.py index 85f17fe737175..4a58dec1878b2 100644 --- a/ci/test/cargo-test/mzcompose.py +++ b/ci/test/cargo-test/mzcompose.py @@ -19,6 +19,7 @@ from materialize import MZ_ROOT, buildkite, rustc_flags, spawn, ui from materialize.cli.run import SANITIZER_TARGET from materialize.mzcompose.composition import Composition, WorkflowArgumentParser +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.kafka import Kafka from materialize.mzcompose.services.minio import Minio from materialize.mzcompose.services.postgres import ( @@ -52,6 +53,10 @@ allow_host_ports=True, additional_directories=["copytos3"], ), + Azurite( + ports=["40111:10000"], + allow_host_ports=True, + ), ] @@ -65,7 +70,13 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument("args", nargs="*") args = parser.parse_args() c.up( - "zookeeper", "kafka", "schema-registry", "postgres", c.metadata_store(), "minio" + "zookeeper", + "kafka", + "schema-registry", + "postgres", + c.metadata_store(), + "minio", + "azurite", ) # Heads up: this intentionally runs on the host rather than in a Docker # image. See database-issues#3739. @@ -84,6 +95,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: MZ_SOFT_ASSERTIONS="1", MZ_PERSIST_EXTERNAL_STORAGE_TEST_S3_BUCKET="mz-test-persist-1d-lifecycle-delete", MZ_S3_UPLOADER_TEST_S3_BUCKET="mz-test-1d-lifecycle-delete", + MZ_PERSIST_EXTERNAL_STORAGE_TEST_AZURE_CONTAINER="mz-test-azure", MZ_PERSIST_EXTERNAL_STORAGE_TEST_POSTGRES_URL=cockroach_url, ) diff --git a/ci/test/lint-main/checks/check-mzcompose-files.sh b/ci/test/lint-main/checks/check-mzcompose-files.sh index 0031f92c3dc3d..ed16e1ac22f96 100755 --- a/ci/test/lint-main/checks/check-mzcompose-files.sh +++ b/ci/test/lint-main/checks/check-mzcompose-files.sh @@ -45,6 +45,7 @@ check_default_workflow_references_others() { -not -wholename "./test/canary-environment/mzcompose.py" `# Only run manually` \ -not -wholename "./test/ssh-connection/mzcompose.py" `# Handled differently` \ -not -wholename "./test/scalability/mzcompose.py" `# Other workflows are for manual usage` \ + -not -wholename "./test/terraform/mzcompose.py" `# Handled differently` \ ) for file in "${MZCOMPOSE_TEST_FILES[@]}"; do diff --git a/ci/test/pipeline.template.yml b/ci/test/pipeline.template.yml index 48460f6a878a9..ad327e7fe1a58 100644 --- a/ci/test/pipeline.template.yml +++ b/ci/test/pipeline.template.yml @@ -208,6 +208,7 @@ steps: queue: mac coverage: skip sanitizer: skip + branches: "main v*.* self-managed/v*" - id: lint-deps label: Lint dependencies @@ -742,6 +743,7 @@ steps: composition: skip-version-upgrade agents: queue: hetzner-aarch64-4cpu-8gb + skip: "Version upgrade skips are allowed for LTS releases now" - id: deploy-website label: Deploy website diff --git a/deny.toml b/deny.toml index 71c35233c19b0..60956cac1f630 100644 --- a/deny.toml +++ b/deny.toml @@ -48,10 +48,8 @@ skip = [ { name = "windows-sys", version = "0.52.0" }, # Newer versions of crates like `tempfile` are held back by crates like `atty`. # This is very Unfortunate as we don't actually use these platforms. - { name = "hermit-abi", version = "0.1.6" }, { name = "hermit-abi", version = "0.2.6" }, { name = "redox_syscall", version = "0.2.10" }, - { name = "linux-raw-sys", version = "0.3.4" }, { name = "rustix", version = "0.38.21" }, # Will require updating many crates @@ -118,6 +116,19 @@ skip = [ { name = "sync_wrapper", version = "0.1.2" }, { name = "memmap2", version = "0.5.4" }, + + { name = "wasi", version = "0.9.0+wasi-snapshot-preview1" }, + { name = "async-channel", version = "1.9.0" }, + { name = "event-listener", version = "2.5.3" }, + { name = "fastrand", version = "1.9.0" }, + { name = "futures-lite", version = "1.13.0" }, + { name = "getrandom", version = "0.1.16" }, + { name = "rand", version = "0.7.3" }, + { name = "rand_chacha", version = "0.2.2" }, + { name = "rand_core", version = "0.5.1" }, + { name = "reqwest", version = "0.11.24" }, + { name = "rustls-pemfile", version = "1.0.4" }, + { name = "winreg", version = "0.50.0" }, ] # Use `tracing` instead. @@ -148,6 +159,7 @@ name = "strum-macros" [[bans.deny]] name = "log" wrappers = [ + "azure_svc_blobstorage", "deadpool-postgres", "eventsource-client", "fail", @@ -187,7 +199,6 @@ wrappers = [ "bindgen", "bstr", "console", - "criterion", "dynfmt", "findshlibs", "insta", @@ -227,14 +238,10 @@ version = 2 ignore = [ # Consider switching `yaml-rust` to the actively maintained `yaml-rust2` fork of the original project "RUSTSEC-2024-0320", - # Consider `std::io::IsTerminal` or `is-terminal` instead of `atty` (unmaintained) - "RUSTSEC-2021-0145", # Consider `encoding_rs` instead of `encoding` (unmaintained) "RUSTSEC-2021-0153", # proc-macro-error is unmaintained, possible alternative: proc-macro-error2 "RUSTSEC-2024-0370", - # Use standard library's IsTerminal trait instead of `atty` (unmaintained) - "RUSTSEC-2024-0375", # `derivative` is unmaintained; consider using an alternative (unmaintained) "RUSTSEC-2024-0388", # `instant` is unmaintained, and the author recommends using the maintained [`web-time`] crate instead. diff --git a/doc/user/content/sql/show-create-connection.md b/doc/user/content/sql/show-create-connection.md index 92c914ac0f9b9..a079c25044f9b 100644 --- a/doc/user/content/sql/show-create-connection.md +++ b/doc/user/content/sql/show-create-connection.md @@ -11,9 +11,11 @@ menu: ## Syntax ```sql -SHOW CREATE CONNECTION +SHOW [REDACTED] CREATE CONNECTION ``` +{{< yaml-table data="show_create_redacted_option" >}} + For available connection names, see [`SHOW CONNECTIONS`](/sql/show-connections). ## Examples diff --git a/doc/user/content/sql/show-create-index.md b/doc/user/content/sql/show-create-index.md index fb78d636617f6..898e745c7069d 100644 --- a/doc/user/content/sql/show-create-index.md +++ b/doc/user/content/sql/show-create-index.md @@ -11,9 +11,11 @@ menu: ## Syntax ```sql -SHOW CREATE INDEX +SHOW [REDACTED] CREATE INDEX ``` +{{< yaml-table data="show_create_redacted_option" >}} + For available index names, see [`SHOW INDEXES`](/sql/show-indexes). ## Examples diff --git a/doc/user/content/sql/show-create-materialized-view.md b/doc/user/content/sql/show-create-materialized-view.md index 625011b9b275b..41319518a30eb 100644 --- a/doc/user/content/sql/show-create-materialized-view.md +++ b/doc/user/content/sql/show-create-materialized-view.md @@ -11,9 +11,11 @@ menu: ## Syntax ```sql -SHOW CREATE MATERIALIZED VIEW +SHOW [REDACTED] CREATE MATERIALIZED VIEW ``` +{{< yaml-table data="show_create_redacted_option" >}} + For available materialized view names, see [`SHOW MATERIALIZED VIEWS`](/sql/show-materialized-views). ## Examples diff --git a/doc/user/content/sql/show-create-sink.md b/doc/user/content/sql/show-create-sink.md index 410abe852fba6..eec3548b94d50 100644 --- a/doc/user/content/sql/show-create-sink.md +++ b/doc/user/content/sql/show-create-sink.md @@ -11,9 +11,11 @@ menu: ## Syntax ```sql -SHOW CREATE SINK +SHOW [REDACTED] CREATE SINK ``` +{{< yaml-table data="show_create_redacted_option" >}} + For available sink names, see [`SHOW SINKS`](/sql/show-sinks). ## Examples diff --git a/doc/user/content/sql/show-create-source.md b/doc/user/content/sql/show-create-source.md index 85cfe0f17e636..30414e0cf4774 100644 --- a/doc/user/content/sql/show-create-source.md +++ b/doc/user/content/sql/show-create-source.md @@ -11,9 +11,11 @@ menu: ## Syntax ```sql -SHOW CREATE SOURCE +SHOW [REDACTED] CREATE SOURCE ``` +{{< yaml-table data="show_create_redacted_option" >}} + For available source names, see [`SHOW SOURCES`](/sql/show-sources). ## Examples diff --git a/doc/user/content/sql/show-create-table.md b/doc/user/content/sql/show-create-table.md index 901abb802e45c..6da19dfb96f7e 100644 --- a/doc/user/content/sql/show-create-table.md +++ b/doc/user/content/sql/show-create-table.md @@ -11,9 +11,11 @@ menu: ## Syntax ```sql -SHOW CREATE TABLE +SHOW [REDACTED] CREATE TABLE ``` +{{< yaml-table data="show_create_redacted_option" >}} + For available table names, see [`SHOW TABLES`](/sql/show-tables). ## Examples diff --git a/doc/user/content/sql/show-create-view.md b/doc/user/content/sql/show-create-view.md index b51ecc743837c..799f7b4ab77f5 100644 --- a/doc/user/content/sql/show-create-view.md +++ b/doc/user/content/sql/show-create-view.md @@ -11,9 +11,11 @@ menu: ## Syntax ```sql -SHOW CREATE VIEW +SHOW [REDACTED] CREATE VIEW ``` +{{< yaml-table data="show_create_redacted_option" >}} + For available view names, see [`SHOW VIEWS`](/sql/show-views). ## Examples diff --git a/doc/user/data/show_create_redacted_option.yml b/doc/user/data/show_create_redacted_option.yml new file mode 100644 index 0000000000000..60da4b42b8cbf --- /dev/null +++ b/doc/user/data/show_create_redacted_option.yml @@ -0,0 +1,7 @@ +columns: + - column: Option + - column: Description + +rows: + - Option: "**REDACTED**" + Description: "If specified, literals will be redacted." diff --git a/misc/helm-charts/operator/Chart.yaml b/misc/helm-charts/operator/Chart.yaml index 7632a85dfdf83..97a089a2a5312 100644 --- a/misc/helm-charts/operator/Chart.yaml +++ b/misc/helm-charts/operator/Chart.yaml @@ -12,6 +12,6 @@ name: materialize-operator description: Materialize Kubernetes Operator Helm Chart type: application version: v25.1.0-beta.1 -appVersion: v0.130.0-dev.0 +appVersion: v0.130.13 icon: https://materialize.com/favicon.ico home: https://materialize.com diff --git a/misc/helm-charts/operator/README.md b/misc/helm-charts/operator/README.md index b6a877a144797..998117e93e63f 100644 --- a/misc/helm-charts/operator/README.md +++ b/misc/helm-charts/operator/README.md @@ -1,6 +1,6 @@ # Materialize Kubernetes Operator Helm Chart -![Version: v25.1.0-beta.1](https://img.shields.io/badge/Version-v25.1.0--beta.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.130.0-dev.0](https://img.shields.io/badge/AppVersion-v0.130.0--dev.0-informational?style=flat-square) +![Version: v25.1.0-beta.1](https://img.shields.io/badge/Version-v25.1.0--beta.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.130.13](https://img.shields.io/badge/AppVersion-v0.130.13-informational?style=flat-square) Materialize Kubernetes Operator Helm Chart @@ -107,7 +107,12 @@ The following table lists the configurable parameters of the Materialize operato | Parameter | Description | Default | |-----------|-------------|---------| +| `balancerd.enabled` | Flag to indicate whether to create balancerd pods for the environments | ``true`` | +| `balancerd.nodeSelector` | Node selector to use for balancerd pods spawned by the operator | ``{}`` | | `clusterd.nodeSelector` | Node selector to use for clusterd pods spawned by the operator | ``{}`` | +| `console.enabled` | Flag to indicate whether to create console pods for the environments | ``true`` | +| `console.imageTagMapOverride` | Override the mapping of environmentd versions to console versions | ``{}`` | +| `console.nodeSelector` | Node selector to use for console pods spawned by the operator | ``{}`` | | `environmentd.nodeSelector` | Node selector to use for environmentd pods spawned by the operator | ``{}`` | | `networkPolicies.egress` | egress from Materialize pods to sources and sinks | ``{"cidrs":["0.0.0.0/0"],"enabled":false}`` | | `networkPolicies.enabled` | Whether to enable network policies for securing communication between pods | ``false`` | @@ -125,19 +130,20 @@ The following table lists the configurable parameters of the Materialize operato | `operator.cloudProvider.providers.gcp` | GCP Configuration (placeholder for future use) | ``{"enabled":false}`` | | `operator.cloudProvider.region` | Common cloud provider settings | ``"kind"`` | | `operator.cloudProvider.type` | Specifies cloud provider. Valid values are 'aws', 'gcp', 'azure' , 'generic', or 'local' | ``"local"`` | +| `operator.clusters.defaultReplicationFactor.analytics` | | ``0`` | +| `operator.clusters.defaultReplicationFactor.probe` | | ``0`` | +| `operator.clusters.defaultReplicationFactor.support` | | ``0`` | +| `operator.clusters.defaultReplicationFactor.system` | | ``0`` | | `operator.clusters.defaultSizes.analytics` | | ``"25cc"`` | -| `operator.clusters.defaultSizes.catalogServer` | | ``"50cc"`` | +| `operator.clusters.defaultSizes.catalogServer` | | ``"25cc"`` | | `operator.clusters.defaultSizes.default` | | ``"25cc"`` | | `operator.clusters.defaultSizes.probe` | | ``"mz_probe"`` | | `operator.clusters.defaultSizes.support` | | ``"25cc"`` | | `operator.clusters.defaultSizes.system` | | ``"25cc"`` | | `operator.features.authentication` | Whether to enable environmentd rbac checks TODO: this is not yet supported in the helm chart | ``false`` | -| `operator.features.consoleImageTagMapOverride` | Override the mapping of environmentd versions to console versions | ``{}`` | -| `operator.features.createBalancers` | Flag to indicate whether to create balancerd pods for the environments | ``true`` | -| `operator.features.createConsole` | Flag to indicate whether to create console pods for the environments | ``true`` | | `operator.image.pullPolicy` | Policy for pulling the image: "IfNotPresent" avoids unnecessary re-pulling of images | ``"IfNotPresent"`` | | `operator.image.repository` | The Docker repository for the operator image | ``"materialize/orchestratord"`` | -| `operator.image.tag` | The tag/version of the operator image to be used | ``"v0.130.0-dev.0"`` | +| `operator.image.tag` | The tag/version of the operator image to be used | ``"v0.130.13"`` | | `operator.nodeSelector` | | ``{}`` | | `operator.resources.limits` | Resource limits for the operator's CPU and memory | ``{"memory":"512Mi"}`` | | `operator.resources.requests` | Resources requested by the operator for CPU and memory | ``{"cpu":"100m","memory":"512Mi"}`` | @@ -162,7 +168,7 @@ Specify each parameter using the `--set key=value[,key=value]` argument to `helm ```shell helm install my-materialize-operator \ - --set operator.image.tag=v0.130.0-dev.0 \ + --set operator.image.tag=v0.130.13 \ materialize/materialize-operator ``` @@ -197,7 +203,7 @@ metadata: name: 12345678-1234-1234-1234-123456789012 namespace: materialize-environment spec: - environmentdImageRef: materialize/environmentd:v0.130.0-dev.0 + environmentdImageRef: materialize/environmentd:v0.130.13 backendSecretName: materialize-backend environmentdResourceRequirements: limits: diff --git a/misc/helm-charts/operator/templates/deployment.yaml b/misc/helm-charts/operator/templates/deployment.yaml index 1cf4050946fca..8a3dbc4104884 100644 --- a/misc/helm-charts/operator/templates/deployment.yaml +++ b/misc/helm-charts/operator/templates/deployment.yaml @@ -40,6 +40,9 @@ spec: - "--cloud-provider={{ .Values.operator.cloudProvider.type }}" - "--region={{ .Values.operator.cloudProvider.region }}" - "--secrets-controller={{ .Values.operator.secretsController }}" + # (SangJunBak) For self-managed, we disable statement logging due to performance issues + # https://github.com/MaterializeInc/cloud/issues/10755 + - "--disable-statement-logging" {{- range $key, $value := include "materialize-operator.selectorLabels" . | fromYaml }} - "--orchestratord-pod-selector-labels={{ $key }}={{ $value }}" {{- end }} @@ -63,14 +66,14 @@ spec: {{- end }} {{/* Feature Flags */}} - {{- if .Values.operator.features.createBalancers }} + {{- if .Values.balancerd.enabled }} - "--create-balancers" {{- end }} - {{- if .Values.operator.features.createConsole }} + {{- if .Values.console.enabled }} - "--create-console" {{- end }} - - "--console-image-tag-default=25.1.0-beta.3" - {{- range $key, $value := .Values.operator.features.consoleImageTagMapOverride }} + - "--console-image-tag-default=25.2.0" + {{- range $key, $value := .Values.console.imageTagMapOverride }} - "--console-image-tag-map={{ $key }}={{ $value }}" {{- end }} {{- if not .Values.operator.features.authentication }} @@ -100,6 +103,18 @@ spec: {{ if .Values.operator.clusters.defaultSizes.analytics }} - "--bootstrap-builtin-analytics-cluster-replica-size={{ .Values.operator.clusters.defaultSizes.analytics }}" {{- end }} + {{ if ne .Values.operator.clusters.defaultReplicationFactor.system nil }} + - "--bootstrap-builtin-system-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.system }}" + {{- end }} + {{ if ne .Values.operator.clusters.defaultReplicationFactor.probe nil }} + - "--bootstrap-builtin-probe-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.probe }}" + {{- end }} + {{ if ne .Values.operator.clusters.defaultReplicationFactor.support nil }} + - "--bootstrap-builtin-support-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.support }}" + {{- end }} + {{ if ne .Values.operator.clusters.defaultReplicationFactor.analytics nil }} + - "--bootstrap-builtin-analytics-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.analytics }}" + {{- end }} {{- end }} - "--image-pull-policy={{ kebabcase .Values.operator.image.pullPolicy }}" {{- range $key, $value := .Values.environmentd.nodeSelector }} @@ -108,6 +123,12 @@ spec: {{- range $key, $value := .Values.clusterd.nodeSelector }} - "--clusterd-node-selector={{ $key }}={{ $value }}" {{- end }} + {{- range $key, $value := .Values.balancerd.nodeSelector }} + - "--balancerd-node-selector={{ $key }}={{ $value }}" + {{- end }} + {{- range $key, $value := .Values.console.nodeSelector }} + - "--console-node-selector={{ $key }}={{ $value }}" + {{- end }} {{- if .Values.storage.storageClass.name }} - "--ephemeral-volume-class={{ .Values.storage.storageClass.name }}" {{- end }} diff --git a/misc/helm-charts/operator/tests/deployment_test.yaml b/misc/helm-charts/operator/tests/deployment_test.yaml index 5ded5fa96bb2f..37b2041be5fc7 100644 --- a/misc/helm-charts/operator/tests/deployment_test.yaml +++ b/misc/helm-charts/operator/tests/deployment_test.yaml @@ -17,7 +17,7 @@ tests: of: Deployment - equal: path: spec.template.spec.containers[0].image - value: materialize/orchestratord:v0.130.0-dev.0 + value: materialize/orchestratord:v0.130.13 - equal: path: spec.template.spec.containers[0].imagePullPolicy value: IfNotPresent @@ -64,10 +64,10 @@ tests: storage.storageClass.name: "" asserts: - matchRegex: - path: spec.template.spec.containers[0].args[12] # Index of the environmentd-cluster-replica-sizes argument + path: spec.template.spec.containers[0].args[13] # Index of the environmentd-cluster-replica-sizes argument pattern: disk_limit":"0" - matchRegex: - path: spec.template.spec.containers[0].args[12] + path: spec.template.spec.containers[0].args[13] pattern: is_cc":true - it: should have a cluster with disk limit to 1552MiB when storage class is configured @@ -75,10 +75,10 @@ tests: storage.storageClass.name: "my-storage-class" asserts: - matchRegex: - path: spec.template.spec.containers[0].args[12] + path: spec.template.spec.containers[0].args[13] pattern: disk_limit":"1552MiB" - matchRegex: - path: spec.template.spec.containers[0].args[12] + path: spec.template.spec.containers[0].args[13] pattern: is_cc":true - it: should configure for AWS provider correctly @@ -127,7 +127,7 @@ tests: # Feature Flag Tests - it: should enable balancer creation when configured set: - operator.features.createBalancers: true + balancerd.enabled: true asserts: - contains: path: spec.template.spec.containers[0].args @@ -135,7 +135,7 @@ tests: - it: should not enable balancer creation when disabled set: - operator.features.createBalancers: false + balancerd.enabled: false asserts: - notContains: path: spec.template.spec.containers[0].args @@ -143,7 +143,7 @@ tests: - it: should enable console creation when configured set: - operator.features.createConsole: true + console.enabled: true asserts: - contains: path: spec.template.spec.containers[0].args @@ -151,7 +151,7 @@ tests: - it: should configure console image tag map override correctly set: - operator.features.consoleImageTagMapOverride: + console.imageTagMapOverride: "v0.125.0": "25.1.0" "v0.126.0": "25.2.0" asserts: diff --git a/misc/helm-charts/operator/values.yaml b/misc/helm-charts/operator/values.yaml index a35fd4aaf1e87..d0b5ffc460126 100644 --- a/misc/helm-charts/operator/values.yaml +++ b/misc/helm-charts/operator/values.yaml @@ -13,7 +13,7 @@ operator: # -- The Docker repository for the operator image repository: materialize/orchestratord # -- The tag/version of the operator image to be used - tag: v0.130.0-dev.0 + tag: v0.130.13 # -- Policy for pulling the image: "IfNotPresent" avoids unnecessary re-pulling of images pullPolicy: IfNotPresent @@ -24,12 +24,6 @@ operator: enableInternalStatementLogging: true features: - # -- Flag to indicate whether to create balancerd pods for the environments - createBalancers: true - # -- Flag to indicate whether to create console pods for the environments - createConsole: true - # -- Override the mapping of environmentd versions to console versions - consoleImageTagMapOverride: {} # -- Whether to enable environmentd rbac checks # TODO: this is not yet supported in the helm chart authentication: false @@ -168,37 +162,18 @@ operator: credits_per_hour: "64" disk_limit: "962560MiB" memory_limit: "481280MiB" - 128C: - workers: 62 - scale: 4 - cpu_exclusive: true - cpu_limit: 62 - credits_per_hour: "128" - disk_limit: "962560MiB" - memory_limit: "481280MiB" - 256C: - workers: 62 - scale: 8 - cpu_exclusive: true - cpu_limit: 62 - credits_per_hour: "256" - disk_limit: "962560MiB" - memory_limit: "481280MiB" - 512C: - workers: 62 - scale: 16 - cpu_exclusive: true - cpu_limit: 62 - credits_per_hour: "512" - disk_limit: "962560MiB" - memory_limit: "481280MiB" defaultSizes: default: 25cc system: 25cc probe: mz_probe support: 25cc - catalogServer: 50cc + catalogServer: 25cc analytics: 25cc + defaultReplicationFactor: + system: 0 + probe: 0 + support: 0 + analytics: 0 # Node selector to use for the operator pod nodeSelector: {} @@ -225,13 +200,27 @@ clusterd: # -- Node selector to use for clusterd pods spawned by the operator nodeSelector: {} +balancerd: + # -- Flag to indicate whether to create balancerd pods for the environments + enabled: true + # -- Node selector to use for balancerd pods spawned by the operator + nodeSelector: {} + +console: + # -- Flag to indicate whether to create console pods for the environments + enabled: true + # -- Override the mapping of environmentd versions to console versions + imageTagMapOverride: {} + # -- Node selector to use for console pods spawned by the operator + nodeSelector: {} + # RBAC (Role-Based Access Control) settings rbac: # -- Whether to create necessary RBAC roles and bindings create: true # -- Optionally use a non-default kubernetes scheduler. -schedulerName: null +schedulerName: # Service account settings serviceAccount: diff --git a/misc/helm-charts/testing/materialize.yaml b/misc/helm-charts/testing/materialize.yaml index d2b8c178af337..c9bdb2a5afa21 100644 --- a/misc/helm-charts/testing/materialize.yaml +++ b/misc/helm-charts/testing/materialize.yaml @@ -28,7 +28,7 @@ metadata: name: 12345678-1234-1234-1234-123456789012 namespace: materialize-environment spec: - environmentdImageRef: materialize/environmentd:v0.130.0-dev.0 + environmentdImageRef: materialize/environmentd:v0.130.13 backendSecretName: materialize-backend #balancerdExternalCertificateSpec: # dnsNames: diff --git a/misc/images/materialized-base/Dockerfile b/misc/images/materialized-base/Dockerfile index 7d5acd724849a..57f02dc491ca3 100644 --- a/misc/images/materialized-base/Dockerfile +++ b/misc/images/materialized-base/Dockerfile @@ -15,7 +15,7 @@ # deployed to production, but the version needs to be bumped whenever features # that the console depends upon are removed (to a version of the console that # doesn't depend on those features). -FROM materialize/console:25.1.0-beta.3 AS console +FROM materialize/console:25.2.0 AS console MZFROM ubuntu-base @@ -46,7 +46,7 @@ RUN apt-get update \ COPY postgresql.conf pg_hba.conf /etc/postgresql/16/main/ COPY --from=console /usr/share/nginx/html /usr/share/nginx/html -COPY --from=console /etc/nginx/templates/default.conf.template /etc/nginx/templates/default.conf.template +COPY console_nginx.template /etc/nginx/templates/default.conf.template # Configure the console to listen on port 6874 and proxy API requests through to # the Materialize instance that will be started and listening for requests on diff --git a/misc/images/materialized-base/console_nginx.template b/misc/images/materialized-base/console_nginx.template new file mode 100644 index 0000000000000..fcab991fb62dd --- /dev/null +++ b/misc/images/materialized-base/console_nginx.template @@ -0,0 +1,73 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +# This is a separate mzimage so that we don't have to re-install the apt things +# every time we get a CI builder with a cold cache. + +# Package a pinned version of the console into the image, for ease of getting +# started with Materialize. It's okay if this console lags a bit behind what's +# deployed to production, but the version needs to be bumped whenever features +# that the console depends upon are removed (to a version of the console that +# doesn't depend on those features). + +server { +${MZ_NGINX_LISTENER_CONFIG} + server_name _; + + location / { + root /usr/share/nginx/html; + index index.html; + try_files $uri $uri/ /index.html; + } + + location /api { + client_max_body_size 100M; + proxy_connect_timeout 600s; + proxy_send_timeout 600s; + proxy_read_timeout 600s; + + # WebSocket support + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_http_version 1.1; + + proxy_request_buffering on; + proxy_buffers 16 32k; + proxy_buffer_size 64k; + proxy_busy_buffers_size 128k; + proxy_temp_file_write_size 128k; + + proxy_pass ${MZ_ENDPOINT}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Request-ID $request_id; + proxy_set_header X-Webhook-Event $http_x_webhook_event; + } + + # Cache policy for static assets + location ~* \.(js|css|png|jpg|jpeg|gif|svg|woff|woff2)$ { + root /usr/share/nginx/html; + try_files $uri =404; + expires 1y; + add_header Cache-Control "public, max-age=31536000, immutable"; + } + + # Gzip compression + gzip on; + gzip_types text/plain text/css application/json application/javascript application/x-javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml; + gzip_min_length 256; + gzip_comp_level 6; + gzip_vary on; + gzip_proxied any; + gzip_buffers 16 8k; + + error_page 404 /404.html; +} diff --git a/misc/python/materialize/checks/all_checks/mysql_cdc.py b/misc/python/materialize/checks/all_checks/mysql_cdc.py index 5e70894c7fe1e..69c6bdf91cc84 100644 --- a/misc/python/materialize/checks/all_checks/mysql_cdc.py +++ b/misc/python/materialize/checks/all_checks/mysql_cdc.py @@ -14,6 +14,7 @@ from materialize.checks.actions import Testdrive from materialize.checks.checks import Check, externally_idempotent +from materialize.checks.executors import Executor from materialize.mz_version import MzVersion from materialize.mzcompose.services.mysql import MySql @@ -364,5 +365,120 @@ def validate(self) -> Testdrive: ) +@externally_idempotent(False) +class MySqlBitType(Check): + def _can_run(self, e: Executor) -> bool: + return self.base_version > MzVersion.parse_mz("v0.131.0-dev") + + def initialize(self) -> Testdrive: + return Testdrive( + dedent( + f""" + $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD} + + $ mysql-execute name=mysql + # create the database if it does not exist yet but do not drop it + CREATE DATABASE IF NOT EXISTS public; + USE public; + + CREATE USER mysql3 IDENTIFIED BY 'mysql'; + GRANT REPLICATION SLAVE ON *.* TO mysql3; + GRANT ALL ON public.* TO mysql3; + + DROP TABLE IF EXISTS mysql_bit_table; + + CREATE TABLE mysql_bit_table (f1 BIT(11), f2 BIT(1)); + + INSERT INTO mysql_bit_table VALUES (8, 0); + INSERT INTO mysql_bit_table VALUES (13, 1) + INSERT INTO mysql_bit_table VALUES (b'11100000100', b'1'); + INSERT INTO mysql_bit_table VALUES (b'0000', b'0'); + INSERT INTO mysql_bit_table VALUES (b'11111111111', b'0'); + + > CREATE SECRET mysql_bit_pass AS 'mysql'; + > CREATE CONNECTION mysql_bit_conn TO MYSQL ( + HOST 'mysql', + USER mysql3, + PASSWORD SECRET mysql_bit_pass + ) + + > CREATE SOURCE mysql_bit_source + FROM MYSQL CONNECTION mysql_bit_conn; + > CREATE TABLE mysql_bit_table FROM SOURCE mysql_bit_source (REFERENCE public.mysql_bit_table); + + # Return all rows + > CREATE MATERIALIZED VIEW mysql_bit_view AS + SELECT * FROM mysql_bit_table + """ + ) + ) + + def manipulate(self) -> list[Testdrive]: + return [ + Testdrive(dedent(s)) + for s in [ + f""" + $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD} + + $ mysql-execute name=mysql + USE public; + INSERT INTO mysql_bit_table VALUES (20, 1); + """, + f""" + $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD} + + $ mysql-execute name=mysql + USE public; + INSERT INTO mysql_bit_table VALUES (30, 1); + """, + ] + ] + + def validate(self) -> Testdrive: + return Testdrive( + dedent( + f""" + > SELECT * FROM mysql_bit_table; + 0 0 + 8 0 + 13 1 + 20 1 + 30 1 + 1796 1 + 2047 0 + + $ mysql-connect name=mysql url=mysql://root@mysql password={MySql.DEFAULT_ROOT_PASSWORD} + + $ mysql-execute name=mysql + USE public; + INSERT INTO mysql_bit_table VALUES (40, 1); + + > SELECT * FROM mysql_bit_table; + 0 0 + 8 0 + 13 1 + 20 1 + 30 1 + 40 1 + 1796 1 + 2047 0 + + # Rollback the last INSERTs so that validate() can be called multiple times + $ mysql-execute name=mysql + DELETE FROM mysql_bit_table WHERE f1 = 40; + + > SELECT * FROM mysql_bit_table; + 0 0 + 8 0 + 13 1 + 20 1 + 30 1 + 1796 1 + 2047 0 + """ + ) + ) + + def remove_target_cluster_from_explain(sql: str) -> str: return re.sub(r"\n\s*Target cluster: \w+\n", "", sql) diff --git a/misc/python/materialize/checks/mzcompose_actions.py b/misc/python/materialize/checks/mzcompose_actions.py index 65a20b49509ef..4128694399f73 100644 --- a/misc/python/materialize/checks/mzcompose_actions.py +++ b/misc/python/materialize/checks/mzcompose_actions.py @@ -61,6 +61,7 @@ def __init__( self.restart = restart self.force_migrations = force_migrations self.publish = publish + self.scenario = scenario def execute(self, e: Executor) -> None: c = e.mzcompose_composition() @@ -72,7 +73,8 @@ def execute(self, e: Executor) -> None: name=self.mz_service, image=image, external_metadata_store=True, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=self.scenario.azurite, environment_extra=self.environment_extra, system_parameter_defaults=self.system_parameter_defaults, additional_system_parameter_defaults=self.additional_system_parameter_defaults, diff --git a/misc/python/materialize/checks/scenarios.py b/misc/python/materialize/checks/scenarios.py index 5cadc80146cb5..a6fc5a940ca43 100644 --- a/misc/python/materialize/checks/scenarios.py +++ b/misc/python/materialize/checks/scenarios.py @@ -45,10 +45,15 @@ class Scenario: def __init__( - self, checks: list[type[Check]], executor: Executor, seed: str | None = None + self, + checks: list[type[Check]], + executor: Executor, + azurite: bool, + seed: str | None = None, ) -> None: self._checks = checks self.executor = executor + self.azurite = azurite self.rng = None if seed is None else Random(seed) self._base_version = MzVersion.parse_cargo() @@ -269,10 +274,11 @@ def __init__( self, checks: list[type[Check]], executor: Executor, + azurite: bool, seed: str | None, change_entries: list[SystemVarChangeEntry], ): - super().__init__(checks, executor, seed) + super().__init__(checks, executor, azurite, seed) self.change_entries = change_entries def actions(self) -> list[Action]: diff --git a/misc/python/materialize/checks/scenarios_upgrade.py b/misc/python/materialize/checks/scenarios_upgrade.py index 7411b7b7484f8..2c0b0aab80a0c 100644 --- a/misc/python/materialize/checks/scenarios_upgrade.py +++ b/misc/python/materialize/checks/scenarios_upgrade.py @@ -23,7 +23,7 @@ from materialize.checks.scenarios import Scenario from materialize.mz_version import MzVersion from materialize.mzcompose.services.materialized import LEADER_STATUS_HEALTHCHECK -from materialize.version_list import get_published_minor_mz_versions +from materialize.version_list import LTS_VERSIONS, get_published_minor_mz_versions # late initialization _minor_versions: list[MzVersion] | None = None @@ -83,6 +83,40 @@ def start_mz_read_only( ) +class UpgradeEntireMzFromLatestLTS(Scenario): + """Upgrade the entire Mz instance from the last LTS version without any intermediate steps. This makes sure our LTS releases for self-managed Materialize stay upgradable.""" + + def base_version(self) -> MzVersion: + return LTS_VERSIONS[-1] + + def actions(self) -> list[Action]: + print(f"Upgrading from tag {self.base_version()}") + return [ + StartMz( + self, + tag=self.base_version(), + ), + Initialize(self), + Manipulate(self, phase=1), + KillMz( + capture_logs=True + ), # We always use True here otherwise docker-compose will lose the pre-upgrade logs + StartMz( + self, + tag=None, + ), + Manipulate(self, phase=2), + Validate(self), + # A second restart while already on the new version + KillMz(capture_logs=True), + StartMz( + self, + tag=None, + ), + Validate(self), + ] + + class UpgradeEntireMz(Scenario): """Upgrade the entire Mz instance from the last released version.""" @@ -162,10 +196,14 @@ class UpgradeEntireMzFourVersions(Scenario): """Test upgrade X-4 -> X-3 -> X-2 -> X-1 -> X""" def __init__( - self, checks: list[type[Check]], executor: Executor, seed: str | None = None + self, + checks: list[type[Check]], + executor: Executor, + azurite: bool, + seed: str | None = None, ): self.minor_versions = get_minor_versions() - super().__init__(checks, executor, seed) + super().__init__(checks, executor, azurite, seed) def base_version(self) -> MzVersion: return self.minor_versions[3] diff --git a/misc/python/materialize/checks/scenarios_zero_downtime.py b/misc/python/materialize/checks/scenarios_zero_downtime.py index 3f1723137cf45..29ad53565900b 100644 --- a/misc/python/materialize/checks/scenarios_zero_downtime.py +++ b/misc/python/materialize/checks/scenarios_zero_downtime.py @@ -257,10 +257,14 @@ class ZeroDowntimeUpgradeEntireMzFourVersions(Scenario): """Test 0dt upgrade from X-4 -> X-3 -> X-2 -> X-1 -> X""" def __init__( - self, checks: list[type[Check]], executor: Executor, seed: str | None = None + self, + checks: list[type[Check]], + executor: Executor, + azurite: bool, + seed: str | None = None, ): self.minor_versions = get_minor_versions() - super().__init__(checks, executor, seed) + super().__init__(checks, executor, azurite, seed) def base_version(self) -> MzVersion: return self.minor_versions[3] diff --git a/misc/python/materialize/cli/run.py b/misc/python/materialize/cli/run.py index bc2ee69cb5759..92925b9b6f57d 100644 --- a/misc/python/materialize/cli/run.py +++ b/misc/python/materialize/cli/run.py @@ -50,6 +50,8 @@ else f"{Arch.host()}-apple-darwin" ) DEFAULT_POSTGRES = "postgres://root@localhost:26257/materialize" +MZDATA = MZ_ROOT / "mzdata" +DEFAULT_BLOB = f"file://{MZDATA}/persist/blob" # sets entitlements on the built binary, e.g. environmentd, so you can inspect it with Instruments MACOS_ENTITLEMENTS_DATA = """ @@ -90,6 +92,11 @@ def main() -> int: help="Postgres/CockroachDB connection string", default=os.getenv("MZDEV_POSTGRES", DEFAULT_POSTGRES), ) + parser.add_argument( + "--blob", + help="Blob storage connection string", + default=os.getenv("MZDEV_BLOB", DEFAULT_BLOB), + ) parser.add_argument( "--release", help="Build artifacts in release mode, with optimizations", @@ -228,7 +235,6 @@ def main() -> int: command += ["--tokio-console-listen-addr=127.0.0.1:6669"] if args.program == "environmentd": _handle_lingering_services(kill=args.reset) - mzdata = MZ_ROOT / "mzdata" scratch = MZ_ROOT / "scratch" urlparse(args.postgres).path.removeprefix("/") dbconn = _connect_sql(args.postgres) @@ -243,10 +249,10 @@ def main() -> int: if args.reset: # Remove everything in the `mzdata`` directory *except* for # the `prometheus` directory and all contents of `tempo`. - paths = list(mzdata.glob("prometheus/*")) + paths = list(MZDATA.glob("prometheus/*")) paths.extend( p - for p in mzdata.glob("*") + for p in MZDATA.glob("*") if p.name != "prometheus" and p.name != "tempo" ) paths.extend(p for p in scratch.glob("*")) @@ -257,28 +263,29 @@ def main() -> int: else: path.unlink() - mzdata.mkdir(exist_ok=True) + MZDATA.mkdir(exist_ok=True) scratch.mkdir(exist_ok=True) - environment_file = mzdata / "environment-id" + environment_file = MZDATA / "environment-id" try: environment_id = environment_file.read_text().rstrip() except FileNotFoundError: environment_id = f"local-az1-{uuid.uuid4()}-0" environment_file.write_text(environment_id) + print(f"persist-blob-url: {args.blob}") command += [ # Setting the listen addresses below to 0.0.0.0 is required # to allow Prometheus running in Docker (misc/prometheus) # access these services to scrape metrics. "--internal-http-listen-addr=0.0.0.0:6878", "--orchestrator=process", - f"--orchestrator-process-secrets-directory={mzdata}/secrets", + f"--orchestrator-process-secrets-directory={MZDATA}/secrets", "--orchestrator-process-tcp-proxy-listen-addr=0.0.0.0", - f"--orchestrator-process-prometheus-service-discovery-directory={mzdata}/prometheus", + f"--orchestrator-process-prometheus-service-discovery-directory={MZDATA}/prometheus", f"--orchestrator-process-scratch-directory={scratch}", "--secrets-controller=local-file", f"--persist-consensus-url={args.postgres}?options=--search_path=consensus", - f"--persist-blob-url=file://{mzdata}/persist/blob", + f"--persist-blob-url={args.blob}", f"--timestamp-oracle-url={args.postgres}?options=--search_path=tsoracle", f"--environment-id={environment_id}", "--bootstrap-role=materialize", diff --git a/misc/python/materialize/data_ingest/transaction_def.py b/misc/python/materialize/data_ingest/transaction_def.py index d370c03912a9a..a06baada4c9ff 100644 --- a/misc/python/materialize/data_ingest/transaction_def.py +++ b/misc/python/materialize/data_ingest/transaction_def.py @@ -64,11 +64,16 @@ class RestartMz(TransactionDef): workload: "Workload" def __init__( - self, composition: Composition, probability: float, workload: "Workload" + self, + composition: Composition, + probability: float, + workload: "Workload", + azurite: bool, ): self.composition = composition self.probability = probability self.workload = workload + self.azurite = azurite def generate(self, fields: list[Field]) -> Iterator[Transaction | None]: if random.random() < self.probability: @@ -82,7 +87,8 @@ def generate(self, fields: list[Field]) -> Iterator[Transaction | None]: Materialized( name=self.workload.mz_service, ports=ports, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=self.azurite, external_metadata_store=True, system_parameter_defaults=get_default_system_parameters( zero_downtime=True @@ -106,11 +112,16 @@ class ZeroDowntimeDeploy(TransactionDef): workload: "Workload" def __init__( - self, composition: Composition, probability: float, workload: "Workload" + self, + composition: Composition, + probability: float, + workload: "Workload", + azurite: bool, ): self.composition = composition self.probability = probability self.workload = workload + self.azurite = azurite def generate(self, fields: list[Field]) -> Iterator[Transaction | None]: if random.random() < self.probability: @@ -131,7 +142,8 @@ def generate(self, fields: list[Field]) -> Iterator[Transaction | None]: Materialized( name=self.workload.mz_service, ports=ports, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=self.azurite, external_metadata_store=True, system_parameter_defaults=get_default_system_parameters( zero_downtime=True diff --git a/misc/python/materialize/data_ingest/workload.py b/misc/python/materialize/data_ingest/workload.py index e6087527728c9..cdb0af1c3dc83 100644 --- a/misc/python/materialize/data_ingest/workload.py +++ b/misc/python/materialize/data_ingest/workload.py @@ -46,8 +46,12 @@ class Workload: deploy_generation: int def __init__( - self, mz_service: str = "materailized", deploy_generation: int = 0 + self, + azurite: bool, + mz_service: str = "materialized", + deploy_generation: int = 0, ) -> None: + self.azurite = azurite self.mz_service = mz_service self.deploy_generation = deploy_generation @@ -62,11 +66,12 @@ def generate(self, fields: list[Field]) -> Iterator[Transaction]: class SingleSensorUpdating(Workload): def __init__( self, + azurite: bool, composition: Composition | None = None, mz_service: str = "materialized", deploy_generation: int = 0, ) -> None: - super().__init__(mz_service, deploy_generation) + super().__init__(azurite, mz_service, deploy_generation) self.cycle = [ TransactionDef( [ @@ -83,11 +88,12 @@ def __init__( class SingleSensorUpdatingDisruptions(Workload): def __init__( self, + azurite: bool, composition: Composition | None = None, mz_service: str = "materialized", deploy_generation: int = 0, ) -> None: - super().__init__(mz_service, deploy_generation) + super().__init__(azurite, mz_service, deploy_generation) self.cycle = [ TransactionDef( [ @@ -100,17 +106,22 @@ def __init__( ), ] if composition: - self.cycle.append(RestartMz(composition, probability=0.1, workload=self)) + self.cycle.append( + RestartMz( + composition, probability=0.1, workload=self, azurite=self.azurite + ) + ) class SingleSensorUpdating0dtDeploy(Workload): def __init__( self, + azurite: bool, composition: Composition | None = None, mz_service: str = "materialized", deploy_generation: int = 0, ) -> None: - super().__init__(mz_service, deploy_generation) + super().__init__(azurite, mz_service, deploy_generation) self.cycle = [ TransactionDef( [ @@ -124,18 +135,21 @@ def __init__( ] if composition: self.cycle.append( - ZeroDowntimeDeploy(composition, probability=0.1, workload=self) + ZeroDowntimeDeploy( + composition, probability=0.1, workload=self, azurite=self.azurite + ) ) class DeleteDataAtEndOfDay(Workload): def __init__( self, + azurite: bool, composition: Composition | None = None, mz_service: str = "materialized", deploy_generation: int = 0, ) -> None: - super().__init__(mz_service, deploy_generation) + super().__init__(azurite, mz_service, deploy_generation) insert = Insert( count=Records.SOME, record_size=RecordSize.SMALL, @@ -163,11 +177,12 @@ def __init__( class DeleteDataAtEndOfDayDisruptions(Workload): def __init__( self, + azurite: bool, composition: Composition | None = None, mz_service: str = "materialized", deploy_generation: int = 0, ) -> None: - super().__init__(mz_service, deploy_generation) + super().__init__(azurite, mz_service, deploy_generation) insert = Insert( count=Records.SOME, record_size=RecordSize.SMALL, @@ -192,17 +207,22 @@ def __init__( ] if composition: - self.cycle.append(RestartMz(composition, probability=0.1, workload=self)) + self.cycle.append( + RestartMz( + composition, probability=0.1, workload=self, azurite=self.azurite + ) + ) class DeleteDataAtEndOfDay0dtDeploys(Workload): def __init__( self, + azurite: bool, composition: Composition | None = None, mz_service: str = "materialized", deploy_generation: int = 0, ) -> None: - super().__init__(mz_service, deploy_generation) + super().__init__(azurite, mz_service, deploy_generation) insert = Insert( count=Records.SOME, record_size=RecordSize.SMALL, @@ -228,16 +248,18 @@ def __init__( if composition: self.cycle.append( - ZeroDowntimeDeploy(composition, probability=0.1, workload=self) + ZeroDowntimeDeploy( + composition, probability=0.1, workload=self, azurite=self.azurite + ) ) # TODO: Implement # class ProgressivelyEnrichRecords(Workload): # def __init__( -# self, composition: Composition | None = None, mz_service: str = "materialized", deploy_generation: int = 0 +# self, azurite: bool, composition: Composition | None = None, mz_service: str = "materialized", deploy_generation: int = 0 # ) -> None: -# super().__init__(mz_service, deploy_generation) +# super().__init__(azurite, mz_service, deploy_generation) # self.cycle: list[Definition] = [ # ] diff --git a/misc/python/materialize/mzcompose/__init__.py b/misc/python/materialize/mzcompose/__init__.py index 12b7c59df746c..c1ddef3c5e93a 100644 --- a/misc/python/materialize/mzcompose/__init__.py +++ b/misc/python/materialize/mzcompose/__init__.py @@ -76,7 +76,6 @@ def get_default_system_parameters( # Persist internals changes: advance coverage "persist_enable_arrow_lgalloc_noncc_sizes": "true", "persist_enable_s3_lgalloc_noncc_sizes": "true", - "persist_enable_one_alloc_per_request": "true", # ----- # Others (ordered by name) "allow_real_time_recency": "true", @@ -153,8 +152,7 @@ def get_default_system_parameters( "storage_statistics_interval": "2000", "storage_use_continual_feedback_upsert": "true", "storage_use_reclock_v2": "true", - "storage_reclock_to_latest": "true", - "with_0dt_deployment_max_wait": "900s", + "with_0dt_deployment_max_wait": "1800s", # End of list (ordered by name) } @@ -261,7 +259,7 @@ def replica_size( workers: int, scale: int, disabled: bool = False, - is_cc: bool = False, + is_cc: bool = True, memory_limit: str | None = None, ) -> dict[str, Any]: return { @@ -281,8 +279,10 @@ def replica_size( bootstrap_cluster_replica_size(): replica_size(1, 1), "2-4": replica_size(4, 2), "free": replica_size(0, 0, disabled=True), - "1cc": replica_size(1, 1, is_cc=True), - "1C": replica_size(1, 1, is_cc=True), + "1cc": replica_size(1, 1), + "1C": replica_size(1, 1), + "1-no-disk": replica_size(1, 1, is_cc=False), + "2-no-disk": replica_size(2, 1, is_cc=False), } for i in range(0, 6): diff --git a/misc/python/materialize/mzcompose/composition.py b/misc/python/materialize/mzcompose/composition.py index e1e12789ba178..fd3f3f1a65096 100644 --- a/misc/python/materialize/mzcompose/composition.py +++ b/misc/python/materialize/mzcompose/composition.py @@ -1077,6 +1077,13 @@ def metadata_store(self) -> str: f"No external metadata store found: {self.compose['services']}" ) + def blob_store(self) -> str: + for name in ["azurite", "minio"]: + if name in self.compose["services"]: + print(f"BLOB STORE IS: {name}") + return name + raise RuntimeError(f"No external blob store found: {self.compose['services']}") + def capture_logs(self, *services: str) -> None: # Capture logs into services.log since they will be lost otherwise # after dowing a composition. diff --git a/misc/python/materialize/mzcompose/services/azure.py b/misc/python/materialize/mzcompose/services/azure.py new file mode 100644 index 0000000000000..a4d2d0524324e --- /dev/null +++ b/misc/python/materialize/mzcompose/services/azure.py @@ -0,0 +1,71 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + + +from materialize.mzcompose.service import Service, ServiceHealthcheck + + +def azure_blob_uri(address: str = "azurite") -> str: + return f"http://devstoreaccount1.{address}:10000/container" + + +class Azurite(Service): + DEFAULT_AZURITE_TAG = "3.33.0" + + def __init__( + self, + name: str = "azurite", + aliases: list[str] = ["azurite", "devstoreaccount1.azurite"], + image: str | None = None, + command: list[str] | None = None, + in_memory: bool = False, + healthcheck: ServiceHealthcheck | None = None, + stop_grace_period: str = "120s", + ports: list[int | str] = [10000], + allow_host_ports: bool = False, + ): + if image is None: + image = ( + f"mcr.microsoft.com/azure-storage/azurite:{self.DEFAULT_AZURITE_TAG}" + ) + + if command is None: + command = [ + "azurite-blob", + "--blobHost", + "0.0.0.0", + "--blobPort", + "10000", + "--disableProductStyleUrl", + "--loose", + ] + + if in_memory: + command.append("--inMemoryPersistence") + + if healthcheck is None: + healthcheck = { + "test": "nc 127.0.0.1 10000 -z", + "interval": "1s", + "start_period": "30s", + } + + super().__init__( + name=name, + config={ + "image": image, + "networks": {"default": {"aliases": aliases}}, + "ports": ports, + "allow_host_ports": allow_host_ports, + "command": command, + "init": True, + "healthcheck": healthcheck, + "stop_grace_period": stop_grace_period, + }, + ) diff --git a/misc/python/materialize/mzcompose/services/materialized.py b/misc/python/materialize/mzcompose/services/materialized.py index e734797187ca1..0105cb21b052d 100644 --- a/misc/python/materialize/mzcompose/services/materialized.py +++ b/misc/python/materialize/mzcompose/services/materialized.py @@ -27,6 +27,7 @@ ServiceConfig, ServiceDependency, ) +from materialize.mzcompose.services.azure import azure_blob_uri from materialize.mzcompose.services.minio import minio_blob_uri from materialize.mzcompose.services.postgres import METADATA_STORE @@ -69,7 +70,8 @@ def __init__( environment_id: str | None = None, propagate_crashes: bool = True, external_metadata_store: str | bool = False, - external_minio: str | bool = False, + external_blob_store: str | bool = False, + blob_store_is_azure: bool = False, unsafe_mode: bool = True, restart: str | None = None, use_default_volumes: bool = True, @@ -169,10 +171,15 @@ def __init__( environment_id = DEFAULT_MZ_ENVIRONMENT_ID command += [f"--environment-id={environment_id}"] - if external_minio: - depends_graph["minio"] = {"condition": "service_healthy"} - address = "minio" if external_minio == True else external_minio - persist_blob_url = minio_blob_uri(address) + if external_blob_store: + blob_store = "azurite" if blob_store_is_azure else "minio" + depends_graph[blob_store] = {"condition": "service_healthy"} + address = blob_store if external_blob_store == True else external_blob_store + persist_blob_url = ( + azure_blob_uri(address) + if blob_store_is_azure + else minio_blob_uri(address) + ) if persist_blob_url: command.append(f"--persist-blob-url={persist_blob_url}") diff --git a/misc/python/materialize/mzcompose/services/testdrive.py b/misc/python/materialize/mzcompose/services/testdrive.py index 1781ae8ae2fb6..67ff6c8c06359 100644 --- a/misc/python/materialize/mzcompose/services/testdrive.py +++ b/misc/python/materialize/mzcompose/services/testdrive.py @@ -18,6 +18,8 @@ ServiceConfig, ServiceDependency, ) +from materialize.mzcompose.services.azure import azure_blob_uri +from materialize.mzcompose.services.minio import minio_blob_uri from materialize.mzcompose.services.postgres import METADATA_STORE @@ -52,7 +54,8 @@ def __init__( aws_secret_access_key: str | None = "minioadmin", no_consistency_checks: bool = False, external_metadata_store: bool = False, - external_minio: bool = False, + external_blob_store: bool = False, + blob_store_is_azure: bool = False, fivetran_destination: bool = False, fivetran_destination_url: str = "http://fivetran-destination:6874", fivetran_destination_files_path: str = "/share/tmp", @@ -157,9 +160,14 @@ def __init__( f"--fivetran-destination-files-path={fivetran_destination_files_path}" ) - if external_minio: - depends_graph["minio"] = {"condition": "service_healthy"} - persist_blob_url = "s3://minioadmin:minioadmin@persist/persist?endpoint=http://minio:9000/®ion=minio" + if external_blob_store: + blob_store = "azurite" if blob_store_is_azure else "minio" + address = blob_store if external_blob_store == True else external_blob_store + persist_blob_url = ( + azure_blob_uri(address) + if blob_store_is_azure + else minio_blob_uri(address) + ) entrypoint.append(f"--persist-blob-url={persist_blob_url}") else: entrypoint.append("--persist-blob-url=file:///mzdata/persist/blob") diff --git a/misc/python/materialize/parallel_workload/action.py b/misc/python/materialize/parallel_workload/action.py index 6c53def51b743..13093aff3b389 100644 --- a/misc/python/materialize/parallel_workload/action.py +++ b/misc/python/materialize/parallel_workload/action.py @@ -1011,13 +1011,15 @@ def __init__( self.flags_with_values["persist_optimize_ignored_data_decode"] = ( BOOLEAN_FLAG_VALUES ) - self.flags_with_values["persist_write_diffs_sum"] = BOOLEAN_FLAG_VALUES self.flags_with_values["enable_variadic_left_join_lowering"] = ( BOOLEAN_FLAG_VALUES ) self.flags_with_values["enable_eager_delta_joins"] = BOOLEAN_FLAG_VALUES - self.flags_with_values["persist_batch_columnar_format"] = ["row", "both_v2"] - self.flags_with_values["persist_record_schema_id"] = BOOLEAN_FLAG_VALUES + self.flags_with_values["persist_batch_columnar_format"] = [ + "row", + "both_v2", + "structured", + ] self.flags_with_values["persist_batch_structured_order"] = BOOLEAN_FLAG_VALUES self.flags_with_values["persist_batch_builder_structured"] = BOOLEAN_FLAG_VALUES self.flags_with_values["persist_batch_structured_key_lower_len"] = [ @@ -1594,12 +1596,14 @@ def __init__( self, rng: random.Random, composition: Composition | None, + azurite: bool, sanity_restart: bool, system_param_fn: Callable[[dict[str, str]], dict[str, str]] = lambda x: x, ): super().__init__(rng, composition) self.system_param_fn = system_param_fn self.system_parameters = {} + self.azurite = azurite self.sanity_restart = sanity_restart def run(self, exe: Executor) -> bool: @@ -1609,7 +1613,9 @@ def run(self, exe: Executor) -> bool: with self.composition.override( Materialized( restart="on-failure", - external_minio="toxiproxy", + # TODO: Retry with toxiproxy on azurite + external_blob_store=True, + blob_store_is_azure=self.azurite, external_metadata_store="toxiproxy", ports=["6975:6875", "6976:6876", "6977:6877"], sanity_restart=self.sanity_restart, @@ -1627,9 +1633,11 @@ def __init__( self, rng: random.Random, composition: Composition | None, + azurite: bool, sanity_restart: bool, ): super().__init__(rng, composition) + self.azurite = azurite self.sanity_restart = sanity_restart self.deploy_generation = 0 @@ -1650,7 +1658,9 @@ def run(self, exe: Executor) -> bool: with self.composition.override( Materialized( name=mz_service, - external_minio="toxiproxy", + # TODO: Retry with toxiproxy on azurite + external_blob_store=True, + blob_store_is_azure=self.azurite, external_metadata_store="toxiproxy", ports=ports, sanity_restart=self.sanity_restart, diff --git a/misc/python/materialize/parallel_workload/database.py b/misc/python/materialize/parallel_workload/database.py index 97504f939bc70..0a47ba77f99af 100644 --- a/misc/python/materialize/parallel_workload/database.py +++ b/misc/python/materialize/parallel_workload/database.py @@ -517,7 +517,7 @@ def __init__( schema.name(), cluster.name(), ) - workload = rng.choice(list(WORKLOADS))() + workload = rng.choice(list(WORKLOADS))(azurite=False) for transaction_def in workload.cycle: for definition in transaction_def.operations: if type(definition) == Insert and definition.count > MAX_ROWS: @@ -677,7 +677,7 @@ def __init__( schema.name(), cluster.name(), ) - self.generator = rng.choice(list(WORKLOADS))().generate(fields) + self.generator = rng.choice(list(WORKLOADS))(azurite=False).generate(fields) self.lock = threading.Lock() def name(self) -> str: @@ -745,7 +745,7 @@ def __init__( schema.name(), cluster.name(), ) - self.generator = rng.choice(list(WORKLOADS))().generate(fields) + self.generator = rng.choice(list(WORKLOADS))(azurite=False).generate(fields) self.lock = threading.Lock() def name(self) -> str: diff --git a/misc/python/materialize/parallel_workload/parallel_workload.py b/misc/python/materialize/parallel_workload/parallel_workload.py index 83266e861607a..3722782de7316 100644 --- a/misc/python/materialize/parallel_workload/parallel_workload.py +++ b/misc/python/materialize/parallel_workload/parallel_workload.py @@ -68,6 +68,7 @@ def run( num_threads: int | None, naughty_identifiers: bool, composition: Composition | None, + azurite: bool, sanity_restart: bool, ) -> None: num_threads = num_threads or os.cpu_count() or 10 @@ -222,7 +223,7 @@ def run( assert composition, "Kill scenario only works in mzcompose" worker = Worker( worker_rng, - [KillAction(worker_rng, composition, sanity_restart)], + [KillAction(worker_rng, composition, azurite, sanity_restart)], [1], end_time, autocommit=False, @@ -246,6 +247,7 @@ def run( ZeroDowntimeDeployAction( worker_rng, composition, + azurite, sanity_restart, ) ], @@ -474,6 +476,9 @@ def parse_common_args(parser: argparse.ArgumentParser) -> None: action="store_true", help="Whether to initialize expensive parts like SQLsmith, sources, sinks (for fast local testing, reduces coverage)", ) + parser.add_argument( + "--azurite", action="store_true", help="Use Azurite as blob store instead of S3" + ) def main() -> int: @@ -525,6 +530,7 @@ def main() -> int: args.threads, args.naughty_identifiers, composition=None, # only works in mzcompose + azurite=args.azurite, sanity_restart=False, # only works in mzcompose ) return 0 diff --git a/misc/python/materialize/version_list.py b/misc/python/materialize/version_list.py index a830c82bee282..041a2043d3821 100644 --- a/misc/python/materialize/version_list.py +++ b/misc/python/materialize/version_list.py @@ -27,6 +27,11 @@ MZ_ROOT = Path(os.environ["MZ_ROOT"]) +LTS_VERSIONS = [ + MzVersion.parse_mz("v0.130.1"), # v25.1.0 + # Put new versions at the bottom +] + # not released on Docker INVALID_VERSIONS = { MzVersion.parse_mz("v0.52.1"), diff --git a/misc/python/materialize/zippy/backup_and_restore_actions.py b/misc/python/materialize/zippy/backup_and_restore_actions.py index 9f52e6acfa996..18185700fe7e4 100644 --- a/misc/python/materialize/zippy/backup_and_restore_actions.py +++ b/misc/python/materialize/zippy/backup_and_restore_actions.py @@ -21,6 +21,10 @@ def requires(cls) -> set[type[Capability]]: return {MzIsRunning, CockroachIsRunning} def run(self, c: Composition, state: State) -> None: + # TODO: Support and test azurite backups + if c.blob_store() == "azurite": + return + # Required because of database-issues#6880 c.kill("storaged") @@ -28,7 +32,8 @@ def run(self, c: Composition, state: State) -> None: with c.override( Materialized( name=state.mz_service, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=c.blob_store() == "azurite", external_metadata_store=True, deploy_generation=state.deploy_generation, system_parameter_defaults=state.system_parameter_defaults, diff --git a/misc/python/materialize/zippy/minio_actions.py b/misc/python/materialize/zippy/blob_store_actions.py similarity index 65% rename from misc/python/materialize/zippy/minio_actions.py rename to misc/python/materialize/zippy/blob_store_actions.py index f83f87926a4d1..64b82eed09fad 100644 --- a/misc/python/materialize/zippy/minio_actions.py +++ b/misc/python/materialize/zippy/blob_store_actions.py @@ -10,28 +10,29 @@ import time from materialize.mzcompose.composition import Composition +from materialize.zippy.blob_store_capabilities import BlobStoreIsRunning from materialize.zippy.framework import Action, Capability, State -from materialize.zippy.minio_capabilities import MinioIsRunning -class MinioStart(Action): - """Starts a Minio instance.""" +class BlobStoreStart(Action): + """Starts a BlobStore instance.""" def run(self, c: Composition, state: State) -> None: - c.up("minio") + c.up(c.blob_store()) def provides(self) -> list[Capability]: - return [MinioIsRunning()] + return [BlobStoreIsRunning()] -class MinioRestart(Action): - """Restart the Minio instance.""" +class BlobStoreRestart(Action): + """Restart the BlobStore instance.""" @classmethod def requires(cls) -> set[type[Capability]]: - return {MinioIsRunning} + return {BlobStoreIsRunning} def run(self, c: Composition, state: State) -> None: - c.kill("minio") + blob_store = c.blob_store() + c.kill(blob_store) time.sleep(1) - c.up("minio") + c.up(blob_store) diff --git a/misc/python/materialize/zippy/minio_capabilities.py b/misc/python/materialize/zippy/blob_store_capabilities.py similarity index 92% rename from misc/python/materialize/zippy/minio_capabilities.py rename to misc/python/materialize/zippy/blob_store_capabilities.py index 1d5b4a34bd559..a44cd11441eaa 100644 --- a/misc/python/materialize/zippy/minio_capabilities.py +++ b/misc/python/materialize/zippy/blob_store_capabilities.py @@ -10,5 +10,5 @@ from materialize.zippy.framework import Capability -class MinioIsRunning(Capability): +class BlobStoreIsRunning(Capability): pass diff --git a/misc/python/materialize/zippy/mz_actions.py b/misc/python/materialize/zippy/mz_actions.py index 840435b4d62dc..d41adbc7954f6 100644 --- a/misc/python/materialize/zippy/mz_actions.py +++ b/misc/python/materialize/zippy/mz_actions.py @@ -15,6 +15,7 @@ Materialized, ) from materialize.zippy.balancerd_capabilities import BalancerdIsRunning +from materialize.zippy.blob_store_capabilities import BlobStoreIsRunning from materialize.zippy.crdb_capabilities import CockroachIsRunning from materialize.zippy.framework import ( Action, @@ -24,7 +25,6 @@ Mz0dtDeployBaseAction, State, ) -from materialize.zippy.minio_capabilities import MinioIsRunning from materialize.zippy.mz_capabilities import MzIsRunning from materialize.zippy.view_capabilities import ViewExists @@ -34,7 +34,7 @@ class MzStartParameterized(ActionFactory): @classmethod def requires(cls) -> set[type[Capability]]: - return {CockroachIsRunning, MinioIsRunning} + return {CockroachIsRunning, BlobStoreIsRunning} @classmethod def incompatible_with(cls) -> set[type[Capability]]: @@ -59,7 +59,7 @@ class MzStart(Action): @classmethod def requires(cls) -> set[type[Capability]]: - return {CockroachIsRunning, MinioIsRunning} + return {CockroachIsRunning, BlobStoreIsRunning} @classmethod def incompatible_with(cls) -> set[type[Capability]]: @@ -81,7 +81,8 @@ def run(self, c: Composition, state: State) -> None: with c.override( Materialized( name=state.mz_service, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=c.blob_store() == "azurite", external_metadata_store=True, deploy_generation=state.deploy_generation, system_parameter_defaults=state.system_parameter_defaults, @@ -158,7 +159,8 @@ def run(self, c: Composition, state: State) -> None: with c.override( Materialized( name=state.mz_service, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=c.blob_store() == "azurite", external_metadata_store=True, deploy_generation=state.deploy_generation, system_parameter_defaults=state.system_parameter_defaults, @@ -190,7 +192,8 @@ def run(self, c: Composition, state: State) -> None: with c.override( Materialized( name=state.mz_service, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=c.blob_store() == "azurite", external_metadata_store=True, deploy_generation=state.deploy_generation, system_parameter_defaults=state.system_parameter_defaults, diff --git a/misc/python/materialize/zippy/scenarios.py b/misc/python/materialize/zippy/scenarios.py index d147f9a46c1dd..ba3c1c564979d 100644 --- a/misc/python/materialize/zippy/scenarios.py +++ b/misc/python/materialize/zippy/scenarios.py @@ -15,6 +15,7 @@ BalancerdStart, BalancerdStop, ) +from materialize.zippy.blob_store_actions import BlobStoreRestart, BlobStoreStart from materialize.zippy.crdb_actions import CockroachRestart, CockroachStart from materialize.zippy.debezium_actions import CreateDebeziumSource, DebeziumStart from materialize.zippy.framework import ActionOrFactory @@ -25,7 +26,6 @@ KafkaStart, ) from materialize.zippy.kafka_capabilities import Envelope -from materialize.zippy.minio_actions import MinioRestart, MinioStart from materialize.zippy.mysql_actions import ( CreateMySqlTable, MySqlDML, @@ -72,7 +72,7 @@ def bootstrap(self) -> list[ActionOrFactory]: return [ KafkaStart, CockroachStart, - MinioStart, + BlobStoreStart, MzStart, StoragedStart, BalancerdStart, @@ -277,8 +277,8 @@ def actions_with_weight(self) -> dict[ActionOrFactory, float]: } -class CrdbMinioRestart(Scenario): - """A Zippy test that restarts CRDB and Minio.""" +class CrdbBlobStoreRestart(Scenario): + """A Zippy test that restarts CRDB and BlobStore.""" def actions_with_weight(self) -> dict[ActionOrFactory, float]: return { @@ -296,7 +296,7 @@ def actions_with_weight(self) -> dict[ActionOrFactory, float]: # Disabled because a separate clusterd is not supported by Mz0dtDeploy yet # StoragedRestart: 10, CockroachRestart: 15, - MinioRestart: 15, + BlobStoreRestart: 15, } diff --git a/misc/python/materialize/zippy/storaged_actions.py b/misc/python/materialize/zippy/storaged_actions.py index 86244c14e054f..e9a79e157c0f7 100644 --- a/misc/python/materialize/zippy/storaged_actions.py +++ b/misc/python/materialize/zippy/storaged_actions.py @@ -9,9 +9,9 @@ from materialize.mzcompose.composition import Composition +from materialize.zippy.blob_store_capabilities import BlobStoreIsRunning from materialize.zippy.crdb_capabilities import CockroachIsRunning from materialize.zippy.framework import Action, Capability, State -from materialize.zippy.minio_capabilities import MinioIsRunning from materialize.zippy.mz_capabilities import MzIsRunning from materialize.zippy.storaged_capabilities import StoragedRunning @@ -21,7 +21,7 @@ class StoragedStart(Action): @classmethod def requires(cls) -> set[type[Capability]]: - return {CockroachIsRunning, MinioIsRunning} + return {CockroachIsRunning, BlobStoreIsRunning} @classmethod def incompatible_with(cls) -> set[type[Capability]]: diff --git a/pyproject.toml b/pyproject.toml index dbb16a72e407c..d0d13e3cd762f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ [tool.black] target_version = ["py310"] # DEFAULT_EXCLUDES from https://github.com/psf/black/blob/main/src/black/const.py but without "build" directory since we use it in our source code. Instead exclude target and target-xcompile -exclude = "(\\.direnv|\\.eggs|\\.git|\\.hg|\\.ipynb_checkpoints|\\.mypy_cache|\\.nox|\\.pytest_cache|\\.ruff_cache|\\.tox|\\.svn|\\.venv|\\.vscode|__pypackages__|_build|buck-out|dist|venv|target|target-xcompile)" +exclude = "(\\.direnv|\\.eggs|\\.git|\\.hg|\\.ipynb_checkpoints|\\.mypy_cache|\\.nox|\\.pytest_cache|\\.ruff_cache|\\.tox|\\.svn|\\.venv|\\.vscode|__pypackages__|_build|buck-out|dist|venv|target|target-xcompile|\\.terraform)" [tool.ruff] target-version = "py310" @@ -35,7 +35,7 @@ known-first-party = ["materialize"] extraPaths = ["misc/python"] venvPath = "./misc/python" venv = "venv" -exclude = ["./misc/python/venv"] +exclude = ["**/venv", "**/.terraform"] stubPath = "./misc/python/stubs" reportMissingImports = "warning" diff --git a/src/adapter-types/src/bootstrap_builtin_cluster_config.rs b/src/adapter-types/src/bootstrap_builtin_cluster_config.rs new file mode 100644 index 0000000000000..09e92913fb0d5 --- /dev/null +++ b/src/adapter-types/src/bootstrap_builtin_cluster_config.rs @@ -0,0 +1,24 @@ +// Copyright Materialize, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//! Types for bootstrap builtin cluster configuration. + +#[derive(Debug, Clone)] +pub struct BootstrapBuiltinClusterConfig { + pub size: String, + pub replication_factor: u32, +} + +pub const SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 1; +pub const CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 1; +pub const PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 1; +// Support and analytics clusters are ephemeral - they are only spun up temporarily when needed. +// Since they are short-lived, they don't need replication by default. +pub const SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 0; +pub const ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 0; diff --git a/src/adapter-types/src/lib.rs b/src/adapter-types/src/lib.rs index b2cc1e3f9b2cd..48cd0c52f480e 100644 --- a/src/adapter-types/src/lib.rs +++ b/src/adapter-types/src/lib.rs @@ -9,6 +9,7 @@ //! Types for the adapter. +pub mod bootstrap_builtin_cluster_config; pub mod compaction; pub mod connection; pub mod dyncfgs; diff --git a/src/adapter/src/catalog.rs b/src/adapter/src/catalog.rs index db4a00712e1ca..e5a19f706d7a1 100644 --- a/src/adapter/src/catalog.rs +++ b/src/adapter/src/catalog.rs @@ -19,6 +19,11 @@ use std::sync::Arc; use futures::future::BoxFuture; use futures::{Future, FutureExt}; use itertools::Itertools; +use mz_adapter_types::bootstrap_builtin_cluster_config::{ + BootstrapBuiltinClusterConfig, ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR, + CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR, + SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR, +}; use mz_adapter_types::connection::ConnectionId; use mz_audit_log::{EventType, FullNameV1, ObjectType, VersionedStorageUsage}; use mz_build_info::DUMMY_BUILD_INFO; @@ -691,11 +696,26 @@ impl Catalog { boot_ts: previous_ts, skip_migrations: true, cluster_replica_sizes: bootstrap_args.cluster_replica_size_map.clone(), - builtin_system_cluster_replica_size: replica_size.clone(), - builtin_catalog_server_cluster_replica_size: replica_size.clone(), - builtin_probe_cluster_replica_size: replica_size.clone(), - builtin_support_cluster_replica_size: replica_size.clone(), - builtin_analytics_cluster_replica_size: replica_size.clone(), + builtin_system_cluster_config: BootstrapBuiltinClusterConfig { + size: replica_size.clone(), + replication_factor: SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig { + size: replica_size.clone(), + replication_factor: CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_probe_cluster_config: BootstrapBuiltinClusterConfig { + size: replica_size.clone(), + replication_factor: PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_support_cluster_config: BootstrapBuiltinClusterConfig { + size: replica_size.clone(), + replication_factor: SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig { + size: replica_size.clone(), + replication_factor: ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, system_parameter_defaults, remote_system_parameters: None, availability_zones: vec![], diff --git a/src/adapter/src/catalog/open.rs b/src/adapter/src/catalog/open.rs index 2a1cb4c74411d..a4505e717155e 100644 --- a/src/adapter/src/catalog/open.rs +++ b/src/adapter/src/catalog/open.rs @@ -17,6 +17,7 @@ use std::time::{Duration, Instant}; use futures::future::{BoxFuture, FutureExt}; use itertools::{Either, Itertools}; +use mz_adapter_types::bootstrap_builtin_cluster_config::BootstrapBuiltinClusterConfig; use mz_adapter_types::compaction::CompactionWindow; use mz_adapter_types::dyncfgs::{ENABLE_CONTINUAL_TASK_BUILTINS, ENABLE_EXPRESSION_CACHE}; use mz_catalog::builtin::{ @@ -317,22 +318,22 @@ impl Catalog { // Add any new builtin objects and remove old ones. let (migrated_builtins, new_builtin_collections) = add_new_remove_old_builtin_items_migration(&state.config().builtins_cfg, &mut txn)?; - let cluster_sizes = BuiltinBootstrapClusterSizes { - system_cluster: config.builtin_system_cluster_replica_size, - catalog_server_cluster: config.builtin_catalog_server_cluster_replica_size, - probe_cluster: config.builtin_probe_cluster_replica_size, - support_cluster: config.builtin_support_cluster_replica_size, - analytics_cluster: config.builtin_analytics_cluster_replica_size, + let builtin_bootstrap_cluster_config_map = BuiltinBootstrapClusterConfigMap { + system_cluster: config.builtin_system_cluster_config, + catalog_server_cluster: config.builtin_catalog_server_cluster_config, + probe_cluster: config.builtin_probe_cluster_config, + support_cluster: config.builtin_support_cluster_config, + analytics_cluster: config.builtin_analytics_cluster_config, }; add_new_remove_old_builtin_clusters_migration( &mut txn, - &cluster_sizes, + &builtin_bootstrap_cluster_config_map, &state.cluster_replica_sizes, )?; add_new_remove_old_builtin_introspection_source_migration(&mut txn)?; add_new_remove_old_builtin_cluster_replicas_migration( &mut txn, - &cluster_sizes, + &builtin_bootstrap_cluster_config_map, &state.cluster_replica_sizes, )?; add_new_remove_old_builtin_roles_migration(&mut txn)?; @@ -1222,7 +1223,7 @@ fn add_new_remove_old_builtin_items_migration( fn add_new_remove_old_builtin_clusters_migration( txn: &mut mz_catalog::durable::Transaction<'_>, - builtin_cluster_sizes: &BuiltinBootstrapClusterSizes, + builtin_cluster_config_map: &BuiltinBootstrapClusterConfigMap, cluster_sizes: &ClusterReplicaSizeMap, ) -> Result<(), mz_catalog::durable::CatalogError> { let mut durable_clusters: BTreeMap<_, _> = txn @@ -1234,8 +1235,9 @@ fn add_new_remove_old_builtin_clusters_migration( // Add new clusters. for builtin_cluster in BUILTIN_CLUSTERS { if durable_clusters.remove(builtin_cluster.name).is_none() { - let cluster_size = builtin_cluster_sizes.get_size(builtin_cluster.name)?; - let cluster_allocation = cluster_sizes.get_allocation_by_name(&cluster_size)?; + let cluster_config = builtin_cluster_config_map.get_config(builtin_cluster.name)?; + let cluster_allocation = cluster_sizes.get_allocation_by_name(&cluster_config.size)?; + txn.insert_system_cluster( builtin_cluster.name, vec![], @@ -1243,9 +1245,9 @@ fn add_new_remove_old_builtin_clusters_migration( builtin_cluster.owner_id.to_owned(), mz_catalog::durable::ClusterConfig { variant: mz_catalog::durable::ClusterVariant::Managed(ClusterVariantManaged { - size: cluster_size, + size: cluster_config.size, availability_zones: vec![], - replication_factor: builtin_cluster.replication_factor, + replication_factor: cluster_config.replication_factor, disk: cluster_allocation.is_cc, logging: default_logging_config(), optimizer_feature_overrides: Default::default(), @@ -1335,7 +1337,7 @@ fn add_new_remove_old_builtin_roles_migration( fn add_new_remove_old_builtin_cluster_replicas_migration( txn: &mut Transaction<'_>, - builtin_cluster_sizes: &BuiltinBootstrapClusterSizes, + builtin_cluster_config_map: &BuiltinBootstrapClusterConfigMap, cluster_sizes: &ClusterReplicaSizeMap, ) -> Result<(), AdapterError> { let cluster_lookup: BTreeMap<_, _> = txn @@ -1363,12 +1365,18 @@ fn add_new_remove_old_builtin_cluster_replicas_migration( let replica_names = durable_replicas .get_mut(&cluster.id) .unwrap_or(&mut empty_map); - if replica_names.remove(builtin_replica.name).is_none() { + + let builtin_cluster_boostrap_config = + builtin_cluster_config_map.get_config(builtin_replica.cluster_name)?; + if replica_names.remove(builtin_replica.name).is_none() + // NOTE(SangJunBak): We need to explicitly check the replication factor because + // BUILT_IN_CLUSTER_REPLICAS is constant throughout all deployments but the replication + // factor is configurable on bootstrap. + && builtin_cluster_boostrap_config.replication_factor > 0 + { let replica_size = match cluster.config.variant { ClusterVariant::Managed(ClusterVariantManaged { ref size, .. }) => size.clone(), - ClusterVariant::Unmanaged => { - builtin_cluster_sizes.get_size(builtin_replica.cluster_name)? - } + ClusterVariant::Unmanaged => builtin_cluster_boostrap_config.size, }; let replica_allocation = cluster_sizes.get_allocation_by_name(&replica_size)?; @@ -1482,37 +1490,43 @@ fn default_logging_config() -> ReplicaLogging { interval: Some(Duration::from_secs(1)), } } -pub struct BuiltinBootstrapClusterSizes { - /// Size to default system_cluster on bootstrap - pub system_cluster: String, - /// Size to default catalog_server_cluster on bootstrap - pub catalog_server_cluster: String, - /// Size to default probe_cluster on bootstrap - pub probe_cluster: String, - /// Size to default support_cluster on bootstrap - pub support_cluster: String, + +#[derive(Debug)] +pub struct BuiltinBootstrapClusterConfigMap { + /// Size and replication factor to default system_cluster on bootstrap + pub system_cluster: BootstrapBuiltinClusterConfig, + /// Size and replication factor to default catalog_server_cluster on bootstrap + pub catalog_server_cluster: BootstrapBuiltinClusterConfig, + /// Size and replication factor to default probe_cluster on bootstrap + pub probe_cluster: BootstrapBuiltinClusterConfig, + /// Size and replication factor to default support_cluster on bootstrap + pub support_cluster: BootstrapBuiltinClusterConfig, /// Size to default analytics_cluster on bootstrap - pub analytics_cluster: String, + pub analytics_cluster: BootstrapBuiltinClusterConfig, } -impl BuiltinBootstrapClusterSizes { +impl BuiltinBootstrapClusterConfigMap { /// Gets the size of the builtin cluster based on the provided name - fn get_size(&self, cluster_name: &str) -> Result { - if cluster_name == mz_catalog::builtin::MZ_SYSTEM_CLUSTER.name { - Ok(self.system_cluster.clone()) + fn get_config( + &self, + cluster_name: &str, + ) -> Result { + let cluster_config = if cluster_name == mz_catalog::builtin::MZ_SYSTEM_CLUSTER.name { + &self.system_cluster } else if cluster_name == mz_catalog::builtin::MZ_CATALOG_SERVER_CLUSTER.name { - Ok(self.catalog_server_cluster.clone()) + &self.catalog_server_cluster } else if cluster_name == mz_catalog::builtin::MZ_PROBE_CLUSTER.name { - Ok(self.probe_cluster.clone()) + &self.probe_cluster } else if cluster_name == mz_catalog::builtin::MZ_SUPPORT_CLUSTER.name { - Ok(self.support_cluster.clone()) + &self.support_cluster } else if cluster_name == mz_catalog::builtin::MZ_ANALYTICS_CLUSTER.name { - Ok(self.analytics_cluster.clone()) + &self.analytics_cluster } else { - Err(mz_catalog::durable::CatalogError::Catalog( + return Err(mz_catalog::durable::CatalogError::Catalog( SqlCatalogError::UnexpectedBuiltinCluster(cluster_name.to_owned()), - )) - } + )); + }; + Ok(cluster_config.clone()) } } diff --git a/src/adapter/src/coord.rs b/src/adapter/src/coord.rs index 6ae88749b5e43..b97a786796d23 100644 --- a/src/adapter/src/coord.rs +++ b/src/adapter/src/coord.rs @@ -88,6 +88,7 @@ use futures::StreamExt; use http::Uri; use ipnet::IpNet; use itertools::{Either, Itertools}; +use mz_adapter_types::bootstrap_builtin_cluster_config::BootstrapBuiltinClusterConfig; use mz_adapter_types::compaction::CompactionWindow; use mz_adapter_types::connection::ConnectionId; use mz_adapter_types::dyncfgs::WITH_0DT_DEPLOYMENT_CAUGHT_UP_CHECK_INTERVAL; @@ -991,11 +992,11 @@ pub struct Config { pub cloud_resource_controller: Option>, pub availability_zones: Vec, pub cluster_replica_sizes: ClusterReplicaSizeMap, - pub builtin_system_cluster_replica_size: String, - pub builtin_catalog_server_cluster_replica_size: String, - pub builtin_probe_cluster_replica_size: String, - pub builtin_support_cluster_replica_size: String, - pub builtin_analytics_cluster_replica_size: String, + pub builtin_system_cluster_config: BootstrapBuiltinClusterConfig, + pub builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig, + pub builtin_probe_cluster_config: BootstrapBuiltinClusterConfig, + pub builtin_support_cluster_config: BootstrapBuiltinClusterConfig, + pub builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig, pub system_parameter_defaults: BTreeMap, pub storage_usage_client: StorageUsageClient, pub storage_usage_collection_interval: Duration, @@ -3816,11 +3817,11 @@ pub fn serve( secrets_controller, cloud_resource_controller, cluster_replica_sizes, - builtin_system_cluster_replica_size, - builtin_catalog_server_cluster_replica_size, - builtin_probe_cluster_replica_size, - builtin_support_cluster_replica_size, - builtin_analytics_cluster_replica_size, + builtin_system_cluster_config, + builtin_catalog_server_cluster_config, + builtin_probe_cluster_config, + builtin_support_cluster_config, + builtin_analytics_cluster_config, system_parameter_defaults, availability_zones, storage_usage_client, @@ -3974,11 +3975,11 @@ pub fn serve( boot_ts: boot_ts.clone(), skip_migrations: false, cluster_replica_sizes, - builtin_system_cluster_replica_size, - builtin_catalog_server_cluster_replica_size, - builtin_probe_cluster_replica_size, - builtin_support_cluster_replica_size, - builtin_analytics_cluster_replica_size, + builtin_system_cluster_config, + builtin_catalog_server_cluster_config, + builtin_probe_cluster_config, + builtin_support_cluster_config, + builtin_analytics_cluster_config, system_parameter_defaults, remote_system_parameters, availability_zones, diff --git a/src/adapter/src/coord/sequencer/inner/create_materialized_view.rs b/src/adapter/src/coord/sequencer/inner/create_materialized_view.rs index b298f4b6c95d1..d227fb43029a4 100644 --- a/src/adapter/src/coord/sequencer/inner/create_materialized_view.rs +++ b/src/adapter/src/coord/sequencer/inner/create_materialized_view.rs @@ -611,7 +611,12 @@ impl Coordinator { // `bootstrap_storage_collections`. if let Some(storage_as_of_ts) = storage_as_of.as_option() { let stmt = mz_sql::parse::parse(&create_sql) - .expect("create_sql is valid") + .map_err(|_| { + AdapterError::internal( + "create materialized view", + "original SQL should roundtrip", + ) + })? .into_element() .ast; let ast::Statement::CreateMaterializedView(mut stmt) = stmt else { diff --git a/src/adapter/src/explain/insights.rs b/src/adapter/src/explain/insights.rs index be2afe525ecd3..c63340123d611 100644 --- a/src/adapter/src/explain/insights.rs +++ b/src/adapter/src/explain/insights.rs @@ -14,10 +14,7 @@ use std::fmt::Debug; use std::sync::Arc; use mz_compute_types::dataflows::{BuildDesc, DataflowDescription}; -use mz_expr::{ - AccessStrategy, AggregateExpr, AggregateFunc, Id, MirRelationExpr, OptimizedMirRelationExpr, - RowSetFinishing, -}; +use mz_expr::{AccessStrategy, Id, MirRelationExpr, OptimizedMirRelationExpr, RowSetFinishing}; use mz_ore::num::NonNeg; use mz_repr::explain::ExprHumanizer; use mz_repr::{GlobalId, Timestamp}; @@ -266,15 +263,7 @@ fn global_insights( let [aggregate] = aggregates.as_slice() else { return; }; - let AggregateExpr { - func: AggregateFunc::Count, - distinct: false, - expr, - } = aggregate - else { - return; - }; - if !expr.is_literal_true() { + if !aggregate.is_count_asterisk() { return; } let name = structured_name(humanizer, *id); diff --git a/src/adapter/src/optimize/view.rs b/src/adapter/src/optimize/view.rs index 4b4f92f6126f8..c4b60a1fbb58a 100644 --- a/src/adapter/src/optimize/view.rs +++ b/src/adapter/src/optimize/view.rs @@ -78,6 +78,8 @@ impl Optimize for Optimizer { // MIR ⇒ MIR optimization (local) let expr = if expr.as_const().is_some() { // No need to optimize further, because we already have a constant. + // But trace this at "local", so that `EXPLAIN LOCALLY OPTIMIZED PLAN` can pick it up. + trace_plan!(at: "local", &expr); OptimizedMirRelationExpr(expr) } else { // Call the real optimization. diff --git a/src/balancerd/BUILD.bazel b/src/balancerd/BUILD.bazel index f3cd7662e310d..b827843e9b7fc 100644 --- a/src/balancerd/BUILD.bazel +++ b/src/balancerd/BUILD.bazel @@ -30,7 +30,7 @@ rust_library( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/alloc:mz_alloc", "//src/alloc-default:mz_alloc_default", @@ -70,7 +70,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/alloc:mz_alloc", "//src/alloc-default:mz_alloc_default", @@ -137,7 +137,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ ":mz_balancerd", "//src/alloc:mz_alloc", @@ -175,7 +175,7 @@ rust_binary( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ ":mz_balancerd", "//src/alloc:mz_alloc", diff --git a/src/balancerd/Cargo.toml b/src/balancerd/Cargo.toml index 3852704159e3c..3bbe925305227 100644 --- a/src/balancerd/Cargo.toml +++ b/src/balancerd/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mz-balancerd" description = "Balancer service." -version = "0.130.0-dev.0" +version = "0.130.13" edition.workspace = true rust-version.workspace = true publish = false diff --git a/src/catalog-debug/BUILD.bazel b/src/catalog-debug/BUILD.bazel index c081b6e880a05..4ef526ecd9e8b 100644 --- a/src/catalog-debug/BUILD.bazel +++ b/src/catalog-debug/BUILD.bazel @@ -29,9 +29,10 @@ rust_binary( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", + "//src/adapter-types:mz_adapter_types", "//src/build-info:mz_build_info", "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", diff --git a/src/catalog-debug/Cargo.toml b/src/catalog-debug/Cargo.toml index 7cd05bd22da80..7aafef572ca54 100644 --- a/src/catalog-debug/Cargo.toml +++ b/src/catalog-debug/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mz-catalog-debug" description = "Durable metadata storage debug tool." -version = "0.130.0-dev.0" +version = "0.130.13" edition.workspace = true rust-version.workspace = true publish = false @@ -14,6 +14,7 @@ anyhow = "1.0.66" clap = { version = "4.5.23", features = ["derive", "env"] } futures = "0.3.25" mz-adapter = { path = "../adapter" } +mz-adapter-types = { path = "../adapter-types" } mz-build-info = { path = "../build-info" } mz-catalog = { path = "../catalog" } mz-cloud-resources = { path = "../cloud-resources" } diff --git a/src/catalog-debug/src/main.rs b/src/catalog-debug/src/main.rs index 16cc5271c3207..369a5aac1aecd 100644 --- a/src/catalog-debug/src/main.rs +++ b/src/catalog-debug/src/main.rs @@ -23,6 +23,11 @@ use anyhow::Context; use clap::Parser; use futures::future::FutureExt; use mz_adapter::catalog::{Catalog, InitializeStateResult}; +use mz_adapter_types::bootstrap_builtin_cluster_config::{ + BootstrapBuiltinClusterConfig, ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR, + CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR, + SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR, +}; use mz_build_info::{build_info, BuildInfo}; use mz_catalog::config::{BuiltinItemMigrationConfig, ClusterReplicaSizeMap, StateConfig}; use mz_catalog::durable::debug::{ @@ -590,11 +595,26 @@ async fn upgrade_check( boot_ts, skip_migrations: false, cluster_replica_sizes, - builtin_system_cluster_replica_size: builtin_clusters_replica_size.clone(), - builtin_catalog_server_cluster_replica_size: builtin_clusters_replica_size.clone(), - builtin_probe_cluster_replica_size: builtin_clusters_replica_size.clone(), - builtin_support_cluster_replica_size: builtin_clusters_replica_size.clone(), - builtin_analytics_cluster_replica_size: builtin_clusters_replica_size, + builtin_system_cluster_config: BootstrapBuiltinClusterConfig { + size: builtin_clusters_replica_size.clone(), + replication_factor: SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig { + size: builtin_clusters_replica_size.clone(), + replication_factor: CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_probe_cluster_config: BootstrapBuiltinClusterConfig { + size: builtin_clusters_replica_size.clone(), + replication_factor: PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_support_cluster_config: BootstrapBuiltinClusterConfig { + size: builtin_clusters_replica_size.clone(), + replication_factor: SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig { + size: builtin_clusters_replica_size.clone(), + replication_factor: ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, system_parameter_defaults: Default::default(), remote_system_parameters: None, availability_zones: vec![], diff --git a/src/catalog/src/builtin.rs b/src/catalog/src/builtin.rs index cdf91465563f0..0dff83c3679ed 100644 --- a/src/catalog/src/builtin.rs +++ b/src/catalog/src/builtin.rs @@ -299,7 +299,6 @@ pub struct BuiltinCluster { pub name: &'static str, pub privileges: &'static [MzAclItem], pub owner_id: &'static RoleId, - pub replication_factor: u32, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -9059,7 +9058,6 @@ pub const MZ_SYSTEM_CLUSTER: BuiltinCluster = BuiltinCluster { }, rbac::owner_privilege(ObjectType::Cluster, MZ_SYSTEM_ROLE_ID), ], - replication_factor: 1, }; pub const MZ_SYSTEM_CLUSTER_REPLICA: BuiltinClusterReplica = BuiltinClusterReplica { @@ -9083,7 +9081,6 @@ pub const MZ_CATALOG_SERVER_CLUSTER: BuiltinCluster = BuiltinCluster { }, rbac::owner_privilege(ObjectType::Cluster, MZ_SYSTEM_ROLE_ID), ], - replication_factor: 1, }; pub const MZ_CATALOG_SERVER_CLUSTER_REPLICA: BuiltinClusterReplica = BuiltinClusterReplica { @@ -9107,7 +9104,6 @@ pub const MZ_PROBE_CLUSTER: BuiltinCluster = BuiltinCluster { }, rbac::owner_privilege(ObjectType::Cluster, MZ_SYSTEM_ROLE_ID), ], - replication_factor: 1, }; pub const MZ_PROBE_CLUSTER_REPLICA: BuiltinClusterReplica = BuiltinClusterReplica { name: BUILTIN_CLUSTER_REPLICA_NAME, @@ -9125,7 +9121,6 @@ pub const MZ_SUPPORT_CLUSTER: BuiltinCluster = BuiltinCluster { }, rbac::owner_privilege(ObjectType::Cluster, MZ_SUPPORT_ROLE_ID), ], - replication_factor: 0, }; pub const MZ_ANALYTICS_CLUSTER: BuiltinCluster = BuiltinCluster { @@ -9139,7 +9134,6 @@ pub const MZ_ANALYTICS_CLUSTER: BuiltinCluster = BuiltinCluster { }, rbac::owner_privilege(ObjectType::Cluster, MZ_ANALYTICS_ROLE_ID), ], - replication_factor: 0, }; /// List of all builtin objects sorted topologically by dependency. diff --git a/src/catalog/src/config.rs b/src/catalog/src/config.rs index b088ad52060c9..e779f2c2eaad7 100644 --- a/src/catalog/src/config.rs +++ b/src/catalog/src/config.rs @@ -11,6 +11,7 @@ use std::collections::{BTreeMap, BTreeSet}; use bytesize::ByteSize; use ipnet::IpNet; +use mz_adapter_types::bootstrap_builtin_cluster_config::BootstrapBuiltinClusterConfig; use mz_build_info::BuildInfo; use mz_cloud_resources::AwsExternalIdPrefix; use mz_controller::clusters::ReplicaAllocation; @@ -55,16 +56,16 @@ pub struct StateConfig { pub skip_migrations: bool, /// Map of strings to corresponding compute replica sizes. pub cluster_replica_sizes: ClusterReplicaSizeMap, - /// Builtin system cluster replica size. - pub builtin_system_cluster_replica_size: String, - /// Builtin catalog server cluster replica size. - pub builtin_catalog_server_cluster_replica_size: String, - /// Builtin probe cluster replica size. - pub builtin_probe_cluster_replica_size: String, - /// Builtin support cluster replica size. - pub builtin_support_cluster_replica_size: String, - /// Builtin analytics cluster replica size. - pub builtin_analytics_cluster_replica_size: String, + /// Builtin system cluster config. + pub builtin_system_cluster_config: BootstrapBuiltinClusterConfig, + /// Builtin catalog server cluster config. + pub builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig, + /// Builtin probe cluster config. + pub builtin_probe_cluster_config: BootstrapBuiltinClusterConfig, + /// Builtin support cluster config. + pub builtin_support_cluster_config: BootstrapBuiltinClusterConfig, + /// Builtin analytics cluster config. + pub builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig, /// Dynamic defaults for system parameters. pub system_parameter_defaults: BTreeMap, /// An optional map of system parameters pulled from a remote frontend. diff --git a/src/catalog/src/memory/objects.rs b/src/catalog/src/memory/objects.rs index f3dd7fe72493d..c929b447c8cfd 100644 --- a/src/catalog/src/memory/objects.rs +++ b/src/catalog/src/memory/objects.rs @@ -1274,7 +1274,7 @@ pub struct View { pub global_id: GlobalId, /// Unoptimized high-level expression from parsing the `create_sql`. pub raw_expr: Arc, - /// Optimized mid-level expression from optimizing the `raw_expr`. + /// Optimized mid-level expression from (locally) optimizing the `raw_expr`. pub optimized_expr: Arc, /// Columns of this view. pub desc: RelationDesc, diff --git a/src/cluster/src/server.rs b/src/cluster/src/server.rs index ffca4329ce86e..cf92bead5659e 100644 --- a/src/cluster/src/server.rs +++ b/src/cluster/src/server.rs @@ -23,7 +23,7 @@ use mz_ore::metrics::MetricsRegistry; use mz_ore::tracing::TracingHandle; use mz_persist_client::cache::PersistClientCache; use mz_service::client::{GenericClient, Partitionable, Partitioned}; -use mz_service::local::LocalClient; +use mz_service::local::{LocalActivator, LocalClient}; use mz_txn_wal::operator::TxnsContext; use timely::communication::initialize::WorkerGuards; use timely::execute::execute_from; @@ -34,7 +34,7 @@ use tracing::{info, warn}; use crate::communication::initialize_networking; -type PartitionedClient = Partitioned, C, R>; +type PartitionedClient = Partitioned, C, R>; /// Configures a cluster server. #[derive(Debug)] @@ -57,7 +57,7 @@ where /// The actual client to talk to the cluster inner: Option, /// The running timely instance - timely_container: TimelyContainerRef, + timely_container: TimelyContainerRef, /// Handle to the persist infrastructure. persist_clients: Arc, /// Context necessary for rendering txn-wal operators. @@ -70,7 +70,7 @@ where } /// Metadata about timely workers in this process. -pub struct TimelyContainer { +pub struct TimelyContainer { /// The current timely config in use config: TimelyConfig, /// Channels over which to send endpoints for wiring up a new Client @@ -78,21 +78,21 @@ pub struct TimelyContainer { crossbeam_channel::Sender<( crossbeam_channel::Receiver, mpsc::UnboundedSender, - mpsc::UnboundedSender, + mpsc::UnboundedSender, )>, >, /// Thread guards that keep worker threads alive _worker_guards: WorkerGuards<()>, } -impl Drop for TimelyContainer { +impl Drop for TimelyContainer { fn drop(&mut self) { panic!("Timely container must never drop"); } } /// Threadsafe reference to an optional TimelyContainer -pub type TimelyContainerRef = Arc>>>; +pub type TimelyContainerRef = Arc>>>; /// Initiates a timely dataflow computation, processing cluster commands. pub fn serve( @@ -100,8 +100,8 @@ pub fn serve( worker_config: Worker, ) -> Result< ( - TimelyContainerRef, - impl Fn() -> Box, Worker, C, R>>, + TimelyContainerRef, + impl Fn() -> Box, Worker, C, R>>, ), Error, > @@ -134,7 +134,7 @@ where Ok((timely_container, client_builder)) } -impl ClusterClient, Worker, C, R> +impl ClusterClient, Worker, C, R> where C: Send + 'static, R: Send + 'static, @@ -142,7 +142,7 @@ where Worker: crate::types::AsRunnableWorker + Clone + Send + Sync + 'static, { fn new( - timely_container: TimelyContainerRef, + timely_container: TimelyContainerRef, persist_clients: Arc, txns_ctx: TxnsContext, tokio_handle: tokio::runtime::Handle, @@ -168,7 +168,7 @@ where txns_ctx: TxnsContext, tracing_handle: Arc, tokio_executor: Handle, - ) -> Result, Error> { + ) -> Result, Error> { info!("Building timely container with config {config:?}"); let (client_txs, client_rxs): (Vec<_>, Vec<_>) = (0..config.workers) .map(|_| crossbeam_channel::unbounded()) @@ -309,21 +309,31 @@ where let timely = timely_container.as_ref().expect("set above"); + // Order is important here: If our future is canceled, we need to drop the `command_txs` + // before the `activators` so when the workers are unparked by the dropping of the + // activators they can observed that the senders have disconnected. + let mut activators = Vec::with_capacity(workers); let mut command_txs = Vec::with_capacity(workers); let mut response_rxs = Vec::with_capacity(workers); - let mut activators = Vec::with_capacity(workers); + let mut activator_rxs = Vec::with_capacity(workers); for client_tx in &timely.client_txs { let (cmd_tx, cmd_rx) = crossbeam_channel::unbounded(); let (resp_tx, resp_rx) = mpsc::unbounded_channel(); - let (activator_tx, mut activator_rx) = mpsc::unbounded_channel(); + let (activator_tx, activator_rx) = mpsc::unbounded_channel(); client_tx .send((cmd_rx, resp_tx, activator_tx)) .expect("worker not dropped"); - let activator = activator_rx.recv().await.expect("worker not dropped"); command_txs.push(cmd_tx); response_rxs.push(resp_rx); + activator_rxs.push(activator_rx); + } + + // It's important that we wait for activators only after we have sent the channels to all + // workers. Otherwise we could end up in a stalled state. See database-issues#8957. + for mut activator_rx in activator_rxs { + let activator = activator_rx.recv().await.expect("worker not dropped"); activators.push(activator); } @@ -348,14 +358,12 @@ impl, C, R> Debug } #[async_trait] -impl GenericClient - for ClusterClient, Worker, C, R> +impl GenericClient for ClusterClient, Worker, C, R> where C: Send + Debug + mz_cluster_client::client::TryIntoTimelyConfig + 'static, R: Send + Debug + 'static, (C, R): Partitionable, Worker: crate::types::AsRunnableWorker + Send + Sync + Clone + 'static, - Worker::Activatable: Send + Sync + 'static + Debug, { async fn send(&mut self, cmd: C) -> Result<(), Error> { // Changing this debug statement requires changing the replica-isolation test diff --git a/src/cluster/src/types.rs b/src/cluster/src/types.rs index bcf014f612cc3..b6d01a63fe808 100644 --- a/src/cluster/src/types.rs +++ b/src/cluster/src/types.rs @@ -13,21 +13,15 @@ use std::sync::Arc; use mz_ore::tracing::TracingHandle; use mz_persist_client::cache::PersistClientCache; +use mz_service::local::LocalActivator; use mz_txn_wal::operator::TxnsContext; use timely::worker::Worker as TimelyWorker; +use tokio::sync::mpsc; /// A trait for letting specific server implementations hook /// into handling of `CreateTimely` commands. Usually implemented by /// the config object that are specific to the implementation. pub trait AsRunnableWorker { - /// The `Activatable` type this server needs to be activated - /// when being send new commands. - // TODO(guswynn): cluster-unification: currently compute - // and storage have different ways of interacting with the timely - // threads from the grpc server. When the disparate internal - // command flow techniques are merged, this type should go away. - type Activatable: mz_service::local::Activatable + Send; - /// Build and continuously run a worker. Called on each timely /// thread. fn build_and_run( @@ -35,8 +29,8 @@ pub trait AsRunnableWorker { timely_worker: &mut TimelyWorker, client_rx: crossbeam_channel::Receiver<( crossbeam_channel::Receiver, - tokio::sync::mpsc::UnboundedSender, - tokio::sync::mpsc::UnboundedSender, + mpsc::UnboundedSender, + mpsc::UnboundedSender, )>, persist_clients: Arc, txns_ctx: TxnsContext, diff --git a/src/clusterd/BUILD.bazel b/src/clusterd/BUILD.bazel index 3c9b1981bda30..2bdb83f0f2a9c 100644 --- a/src/clusterd/BUILD.bazel +++ b/src/clusterd/BUILD.bazel @@ -31,7 +31,7 @@ rust_library( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/alloc:mz_alloc", "//src/alloc-default:mz_alloc_default", @@ -80,7 +80,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/alloc:mz_alloc", "//src/alloc-default:mz_alloc_default", @@ -153,7 +153,7 @@ rust_binary( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ ":mz_clusterd", "//src/alloc:mz_alloc", diff --git a/src/clusterd/Cargo.toml b/src/clusterd/Cargo.toml index 3ba6c78fbc867..84b5133fe8120 100644 --- a/src/clusterd/Cargo.toml +++ b/src/clusterd/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mz-clusterd" description = "Materialize's cluster server." -version = "0.130.0-dev.0" +version = "0.130.13" edition.workspace = true rust-version.workspace = true publish = false diff --git a/src/compute-types/src/dyncfgs.rs b/src/compute-types/src/dyncfgs.rs index d862efadcae86..5a95a2faf279c 100644 --- a/src/compute-types/src/dyncfgs.rs +++ b/src/compute-types/src/dyncfgs.rs @@ -38,10 +38,13 @@ pub const LINEAR_JOIN_YIELDING: Config<&str> = Config::new( work, respectively, rather than falling back to some default.", ); +/// Enable lgalloc. +pub const ENABLE_LGALLOC: Config = Config::new("enable_lgalloc", true, "Enable lgalloc."); + /// Enable lgalloc for columnation. pub const ENABLE_COLUMNATION_LGALLOC: Config = Config::new( "enable_columnation_lgalloc", - false, + true, "Enable allocating regions from lgalloc.", ); @@ -164,6 +167,7 @@ pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet { .add(&ENABLE_MZ_JOIN_CORE) .add(&ENABLE_MATERIALIZED_VIEW_SINK_V2) .add(&LINEAR_JOIN_YIELDING) + .add(&ENABLE_LGALLOC) .add(&ENABLE_COLUMNATION_LGALLOC) .add(&ENABLE_LGALLOC_EAGER_RECLAMATION) .add(&ENABLE_CHUNKED_STACK) diff --git a/src/compute/BUILD.bazel b/src/compute/BUILD.bazel index 8ba03578957f7..0955ce5359be8 100644 --- a/src/compute/BUILD.bazel +++ b/src/compute/BUILD.bazel @@ -39,6 +39,7 @@ rust_library( "//src/persist-client:mz_persist_client", "//src/persist-types:mz_persist_types", "//src/repr:mz_repr", + "//src/service:mz_service", "//src/storage-operators:mz_storage_operators", "//src/storage-types:mz_storage_types", "//src/timely-util:mz_timely_util", @@ -83,6 +84,7 @@ rust_test( "//src/persist-client:mz_persist_client", "//src/persist-types:mz_persist_types", "//src/repr:mz_repr", + "//src/service:mz_service", "//src/storage-operators:mz_storage_operators", "//src/storage-types:mz_storage_types", "//src/timely-util:mz_timely_util", @@ -107,6 +109,7 @@ rust_doc_test( "//src/persist-client:mz_persist_client", "//src/persist-types:mz_persist_types", "//src/repr:mz_repr", + "//src/service:mz_service", "//src/storage-operators:mz_storage_operators", "//src/storage-types:mz_storage_types", "//src/timely-util:mz_timely_util", diff --git a/src/compute/Cargo.toml b/src/compute/Cargo.toml index 5f459f9edeb53..ce9910d203469 100644 --- a/src/compute/Cargo.toml +++ b/src/compute/Cargo.toml @@ -30,6 +30,7 @@ mz-ore = { path = "../ore", features = ["async", "flatcontainer", "process", "tr mz-persist-client = { path = "../persist-client" } mz-persist-types = { path = "../persist-types" } mz-repr = { path = "../repr" } +mz-service = { path = "../service" } mz-storage-operators = { path = "../storage-operators" } mz-storage-types = { path = "../storage-types" } mz-timely-util = { path = "../timely-util" } diff --git a/src/compute/src/command_channel.rs b/src/compute/src/command_channel.rs new file mode 100644 index 0000000000000..2bf082baeca41 --- /dev/null +++ b/src/compute/src/command_channel.rs @@ -0,0 +1,200 @@ +// Copyright Materialize, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//! A channel for broadcasting compute commands from worker 0 to other workers. +//! +//! Compute uses a dataflow to distribute commands between workers. This is to ensure workers +//! retain a consistent dataflow state across reconnects. If each worker would receive its commands +//! directly from the controller, there wouldn't be any guarantee that after a reconnect all +//! workers have seen the same sequence of commands. This is particularly problematic for +//! `CreateDataflow` commands, since Timely requires that all workers render the same dataflows in +//! the same order. So the controller instead sends commands only to worker 0, which then +//! broadcasts them to other workers through the Timely fabric, taking care of the correct +//! sequencing. +//! +//! Commands in the command channel are tagged with an epoch identifying the incarnation of the +//! compute protocol the command belongs to, allowing workers to recognize client reconnects that +//! require a reconciliation. + +use std::sync::{Arc, Mutex}; + +use crossbeam_channel::TryRecvError; +use mz_compute_client::protocol::command::ComputeCommand; +use mz_compute_types::dataflows::{BuildDesc, DataflowDescription}; +use mz_ore::cast::CastFrom; +use timely::communication::Allocate; +use timely::dataflow::channels::pact::Exchange; +use timely::dataflow::operators::generic::source; +use timely::dataflow::operators::Operator; +use timely::scheduling::{Scheduler, SyncActivator}; +use timely::worker::Worker as TimelyWorker; + +/// A sender pushing commands onto the command channel. +pub struct Sender { + tx: crossbeam_channel::Sender<(ComputeCommand, u64)>, + activator: Arc>>, +} + +impl Sender { + /// Broadcasts the given command to all workers. + pub fn send(&self, message: (ComputeCommand, u64)) { + if self.tx.send(message).is_err() { + unreachable!("command channel never shuts down"); + } + + self.activator + .lock() + .expect("poisoned") + .as_ref() + .map(|a| a.activate()); + } +} + +/// A receiver reading commands from the command channel. +pub struct Receiver { + rx: crossbeam_channel::Receiver<(ComputeCommand, u64)>, +} + +impl Receiver { + /// Returns the next available command, if any. + /// + /// This returns `None` when there are currently no commands but there might be commands again + /// in the future. + pub fn try_recv(&self) -> Option<(ComputeCommand, u64)> { + match self.rx.try_recv() { + Ok(msg) => Some(msg), + Err(TryRecvError::Empty) => None, + Err(TryRecvError::Disconnected) => { + unreachable!("command channel never shuts down"); + } + } + } +} + +/// Render the command channel dataflow. +pub fn render(timely_worker: &mut TimelyWorker) -> (Sender, Receiver) { + let (input_tx, input_rx) = crossbeam_channel::unbounded(); + let (output_tx, output_rx) = crossbeam_channel::unbounded(); + let activator = Arc::new(Mutex::new(None)); + + // TODO(teskje): This implementation relies on Timely channels preserving the order of their + // inputs, which is not something they guarantee. We can avoid that by using explicit indexing, + // like storage's command sequencer does. + timely_worker.dataflow_named::("command_channel", { + let activator = Arc::clone(&activator); + move |scope| { + source(scope, "command_channel::source", |cap, info| { + let sync_activator = scope.sync_activator_for(info.address.to_vec()); + *activator.lock().expect("poisoned") = Some(sync_activator); + + let worker_id = scope.index(); + let peers = scope.peers(); + + // Only worker 0 broadcasts commands, other workers must drop their capability to + // avoid holding up dataflow progress. + let mut capability = (worker_id == 0).then_some(cap); + + move |output| { + let Some(cap) = &mut capability else { + // Non-leader workers will still receive `UpdateConfiguration` commands and + // we must drain those to not leak memory. + while let Ok((cmd, _epoch)) = input_rx.try_recv() { + assert_ne!(worker_id, 0); + assert!(matches!(cmd, ComputeCommand::UpdateConfiguration(_))); + } + return; + }; + + assert_eq!(worker_id, 0); + + let input: Vec<_> = input_rx.try_iter().collect(); + for (cmd, epoch) in input { + let worker_cmds = + split_command(cmd, peers).map(|(idx, cmd)| (idx, cmd, epoch)); + output.session(&cap).give_iterator(worker_cmds); + + cap.downgrade(&(cap.time() + 1)); + } + } + }) + .sink( + Exchange::new(|(idx, _, _)| u64::cast_from(*idx)), + "command_channel::sink", + move |input| { + while let Some((_cap, data)) = input.next() { + for (_idx, cmd, epoch) in data.drain(..) { + let _ = output_tx.send((cmd, epoch)); + } + } + }, + ); + } + }); + + let tx = Sender { + tx: input_tx, + activator, + }; + let rx = Receiver { rx: output_rx }; + + (tx, rx) +} + +/// Split the given command into the given number of parts. +/// +/// Returns an iterator that produces each command part, along with its part index. +fn split_command( + command: ComputeCommand, + parts: usize, +) -> impl Iterator { + use itertools::Either; + + let commands = match command { + ComputeCommand::CreateDataflow(dataflow) => { + // A list of descriptions of objects for each part to build. + let mut builds_parts = vec![Vec::new(); parts]; + // Partition each build description among `parts`. + for build_desc in dataflow.objects_to_build { + let build_part = build_desc.plan.partition_among(parts); + for (plan, objects_to_build) in build_part.into_iter().zip(builds_parts.iter_mut()) + { + objects_to_build.push(BuildDesc { + id: build_desc.id, + plan, + }); + } + } + + // Each list of build descriptions results in a dataflow description. + let commands = builds_parts + .into_iter() + .map(move |objects_to_build| DataflowDescription { + source_imports: dataflow.source_imports.clone(), + index_imports: dataflow.index_imports.clone(), + objects_to_build, + index_exports: dataflow.index_exports.clone(), + sink_exports: dataflow.sink_exports.clone(), + as_of: dataflow.as_of.clone(), + until: dataflow.until.clone(), + debug_name: dataflow.debug_name.clone(), + initial_storage_as_of: dataflow.initial_storage_as_of.clone(), + refresh_schedule: dataflow.refresh_schedule.clone(), + time_dependence: dataflow.time_dependence.clone(), + }) + .map(ComputeCommand::CreateDataflow); + Either::Left(commands) + } + command => { + let commands = std::iter::repeat_n(command, parts); + Either::Right(commands) + } + }; + + commands.into_iter().enumerate() +} diff --git a/src/compute/src/compute_state.rs b/src/compute/src/compute_state.rs index 70aa3b9f419dc..348bf0d4d660e 100644 --- a/src/compute/src/compute_state.rs +++ b/src/compute/src/compute_state.rs @@ -227,18 +227,6 @@ impl ComputeState { } } - /// Return whether a collection with the given ID exists. - pub fn collection_exists(&self, id: GlobalId) -> bool { - self.collections.contains_key(&id) - } - - /// Return a reference to the identified collection. - /// - /// Panics if the collection doesn't exist. - pub fn expect_collection(&self, id: GlobalId) -> &CollectionState { - self.collections.get(&id).expect("collection must exist") - } - /// Return a mutable reference to the identified collection. /// /// Panics if the collection doesn't exist. @@ -275,7 +263,7 @@ impl ComputeState { self.linear_join_spec = LinearJoinSpec::from_config(config); - if ENABLE_COLUMNATION_LGALLOC.get(config) { + if ENABLE_LGALLOC.get(config) { if let Some(path) = &self.context.scratch_directory { let eager_return = ENABLE_LGALLOC_EAGER_RECLAMATION.get(config); let interval = LGALLOC_BACKGROUND_INTERVAL.get(config); @@ -307,6 +295,11 @@ impl ComputeState { info!("using chunked stack: {chunked_stack}"); mz_timely_util::containers::stack::use_chunked_stack(chunked_stack); + mz_ore::region::ENABLE_LGALLOC_REGION.store( + ENABLE_COLUMNATION_LGALLOC.get(config), + std::sync::atomic::Ordering::Relaxed, + ); + // Remember the maintenance interval locally to avoid reading it from the config set on // every server iteration. self.server_maintenance_interval = COMPUTE_SERVER_MAINTENANCE_INTERVAL.get(config); diff --git a/src/compute/src/lib.rs b/src/compute/src/lib.rs index f62b5b84d587d..55b8eeb448e99 100644 --- a/src/compute/src/lib.rs +++ b/src/compute/src/lib.rs @@ -11,13 +11,15 @@ //! Materialize's compute layer. -pub(crate) mod arrangement; -pub mod compute_state; -pub(crate) mod extensions; -pub(crate) mod logging; -pub(crate) mod metrics; -pub(crate) mod render; -pub(crate) mod row_spine; pub mod server; -pub(crate) mod sink; + +mod arrangement; +mod command_channel; +mod compute_state; +mod extensions; +mod logging; +mod metrics; +mod render; +mod row_spine; +mod sink; mod typedefs; diff --git a/src/compute/src/server.rs b/src/compute/src/server.rs index a4440192356ca..60101425f94f7 100644 --- a/src/compute/src/server.rs +++ b/src/compute/src/server.rs @@ -10,38 +10,36 @@ //! An interactive dataflow server. use std::cell::RefCell; -use std::collections::{BTreeMap, BTreeSet, VecDeque}; +use std::cmp::Ordering; +use std::collections::{BTreeMap, BTreeSet}; +use std::convert::Infallible; use std::fmt::Debug; use std::path::PathBuf; use std::rc::Rc; use std::sync::Arc; +use std::thread; use std::time::{Duration, Instant}; use anyhow::Error; -use crossbeam_channel::{RecvError, TryRecvError}; +use crossbeam_channel::SendError; use mz_cluster::server::TimelyContainerRef; use mz_compute_client::protocol::command::ComputeCommand; use mz_compute_client::protocol::history::ComputeCommandHistory; use mz_compute_client::protocol::response::ComputeResponse; use mz_compute_client::service::ComputeClient; -use mz_compute_types::dataflows::{BuildDesc, DataflowDescription}; -use mz_ore::cast::CastFrom; use mz_ore::halt; use mz_ore::tracing::TracingHandle; use mz_persist_client::cache::PersistClientCache; +use mz_service::local::LocalActivator; use mz_storage_types::connections::ConnectionContext; use mz_txn_wal::operator::TxnsContext; use timely::communication::Allocate; -use timely::dataflow::channels::pact::Exchange; -use timely::dataflow::operators::generic::source; -use timely::dataflow::operators::Operator; -use timely::progress::{Antichain, Timestamp}; -use timely::scheduling::{Scheduler, SyncActivator}; +use timely::progress::Antichain; use timely::worker::Worker as TimelyWorker; use tokio::sync::mpsc; -use tokio::sync::mpsc::error::SendError; use tracing::{info, trace, warn}; +use crate::command_channel; use crate::compute_state::{ActiveComputeState, ComputeState, ReportedFrontier}; use crate::metrics::ComputeMetrics; @@ -74,7 +72,7 @@ pub fn serve( context: ComputeInstanceContext, ) -> Result< ( - TimelyContainerRef, + TimelyContainerRef, impl Fn() -> Box, ), Error, @@ -97,78 +95,102 @@ pub fn serve( Ok((timely_container, client_builder)) } -type ActivatorSender = mpsc::UnboundedSender; +/// Error type returned on connection epoch changes. +/// +/// An epoch change informs workers that subsequent commands come a from a new client connection +/// and therefore require reconciliation. +struct EpochChange(u64); /// Endpoint used by workers to receive compute commands. +/// +/// Observes epoch changes in the command stream and converts them into receive errors. struct CommandReceiver { - inner: crossbeam_channel::Receiver, + /// The channel supplying commands. + inner: command_channel::Receiver, + /// The ID of the Timely worker. worker_id: usize, + /// The epoch identifying the current cluster protocol incarnation. + epoch: Option, + /// A stash to enable peeking the next command, used in `try_recv`. + stashed_command: Option, } impl CommandReceiver { - fn new(inner: crossbeam_channel::Receiver, worker_id: usize) -> Self { - Self { inner, worker_id } + fn new(inner: command_channel::Receiver, worker_id: usize) -> Self { + Self { + inner, + worker_id, + epoch: None, + stashed_command: None, + } } - fn try_recv(&self) -> Result { - self.inner.try_recv().map(|cmd| { - trace!(worker = ?self.worker_id, command = ?cmd, "received command"); - cmd - }) + /// Receive the next pending command, if any. + /// + /// If the next command is at a different epoch, this method instead returns an `Err` + /// containing the new epoch. + fn try_recv(&mut self) -> Result, EpochChange> { + if let Some(command) = self.stashed_command.take() { + return Ok(Some(command)); + } + let Some((command, epoch)) = self.inner.try_recv() else { + return Ok(None); + }; + + trace!(worker = self.worker_id, %epoch, ?command, "received command"); + + match self.epoch.cmp(&Some(epoch)) { + Ordering::Less => { + self.epoch = Some(epoch); + self.stashed_command = Some(command); + Err(EpochChange(epoch)) + } + Ordering::Equal => Ok(Some(command)), + Ordering::Greater => panic!("epoch regression: {epoch} < {}", self.epoch.unwrap()), + } } } /// Endpoint used by workers to send sending compute responses. +/// +/// Tags responses with the current epoch, allowing receivers to filter out responses intended for +/// previous client connections. pub(crate) struct ResponseSender { - inner: mpsc::UnboundedSender, + /// The channel consuming responses. + inner: crossbeam_channel::Sender<(ComputeResponse, u64)>, + /// The ID of the Timely worker. worker_id: usize, + /// The epoch identifying the current cluster protocol incarnation. + epoch: Option, } impl ResponseSender { - fn new(inner: mpsc::UnboundedSender, worker_id: usize) -> Self { - Self { inner, worker_id } - } - - pub fn send(&self, response: ComputeResponse) -> Result<(), SendError> { - trace!(worker = ?self.worker_id, response = ?response, "sending response"); - self.inner.send(response) - } -} - -struct CommandReceiverQueue { - queue: Rc>>>, -} - -impl CommandReceiverQueue { - fn try_recv(&self) -> Result { - match self.queue.borrow_mut().pop_front() { - Some(Ok(cmd)) => Ok(cmd), - Some(Err(e)) => Err(e), - None => Err(TryRecvError::Empty), + fn new(inner: crossbeam_channel::Sender<(ComputeResponse, u64)>, worker_id: usize) -> Self { + Self { + inner, + worker_id, + epoch: None, } } - /// Block until a command is available. - /// This method takes the worker as an argument such that it can step timely while no result - /// is available. - fn recv(&self, worker: &mut Worker) -> Result { - while self.is_empty() { - let start = Instant::now(); - worker.timely_worker.step_or_park(None); - worker - .metrics - .timely_step_duration_seconds - .observe(start.elapsed().as_secs_f64()); - } - match self.try_recv() { - Ok(cmd) => Ok(cmd), - Err(TryRecvError::Disconnected) => Err(RecvError), - Err(TryRecvError::Empty) => unreachable!("checked above"), - } + /// Advance to the given epoch. + fn advance_epoch(&mut self, epoch: u64) { + assert!( + Some(epoch) > self.epoch, + "epoch regression: {epoch} <= {}", + self.epoch.unwrap(), + ); + self.epoch = Some(epoch); } - fn is_empty(&self) -> bool { - self.queue.borrow().is_empty() + /// Send a compute response. + pub fn send(&self, response: ComputeResponse) -> Result<(), SendError> { + let epoch = self.epoch.expect("epoch must be initialized"); + + trace!(worker = self.worker_id, %epoch, ?response, "sending response"); + self.inner + .send((response, epoch)) + .map_err(|SendError((resp, _))| SendError(resp)) } } @@ -179,13 +201,10 @@ impl CommandReceiverQueue { struct Worker<'w, A: Allocate> { /// The underlying Timely worker. timely_worker: &'w mut TimelyWorker, - /// The channel over which communication handles for newly connected clients - /// are delivered. - client_rx: crossbeam_channel::Receiver<( - crossbeam_channel::Receiver, - mpsc::UnboundedSender, - ActivatorSender, - )>, + /// The channel over which commands are received. + command_rx: CommandReceiver, + /// The channel over which responses are sent. + response_tx: ResponseSender, compute_state: Option, /// Compute metrics. metrics: ComputeMetrics, @@ -200,14 +219,13 @@ struct Worker<'w, A: Allocate> { } impl mz_cluster::types::AsRunnableWorker for Config { - type Activatable = SyncActivator; fn build_and_run( config: Self, timely_worker: &mut TimelyWorker, client_rx: crossbeam_channel::Receiver<( crossbeam_channel::Receiver, - tokio::sync::mpsc::UnboundedSender, - ActivatorSender, + mpsc::UnboundedSender, + mpsc::UnboundedSender, )>, persist_clients: Arc, txns_ctx: TxnsContext, @@ -217,13 +235,26 @@ impl mz_cluster::types::AsRunnableWorker for Co set_core_affinity(timely_worker.index()); } + let worker_id = timely_worker.index(); + let metrics = config.metrics.clone(); + + // Create the command channel that broadcasts commands from worker 0 to other workers. We + // reuse this channel between client connections, to avoid bugs where different workers end + // up creating incompatible sides of the channel dataflow after reconnects. + // See database-issues#8964. + let (cmd_tx, cmd_rx) = command_channel::render(timely_worker); + let (resp_tx, resp_rx) = crossbeam_channel::unbounded(); + + spawn_channel_adapter(client_rx, cmd_tx, resp_rx, worker_id); + Worker { timely_worker, - client_rx, - metrics: config.metrics, - context: config.context, - persist_clients, - txns_ctx, + command_rx: CommandReceiver::new(cmd_rx, worker_id), + response_tx: ResponseSender::new(resp_tx, worker_id), + metrics, + context: config.context.clone(), + persist_clients: Arc::clone(&persist_clients), + txns_ctx: txns_ctx.clone(), compute_state: None, tracing_handle, } @@ -276,176 +307,32 @@ fn set_core_affinity(_worker_id: usize) { } impl<'w, A: Allocate + 'static> Worker<'w, A> { - /// Waits for client connections and runs them to completion. + /// Runs a compute worker. pub fn run(&mut self) { - let mut shutdown = false; - while !shutdown { - match self.client_rx.recv() { - Ok((rx, tx, activator_tx)) => { - self.setup_channel_and_run_client(rx, tx, activator_tx) - } - Err(_) => shutdown = true, - } - } - } - - fn split_command( - command: ComputeCommand, - parts: usize, - ) -> Vec> { - match command { - ComputeCommand::CreateDataflow(dataflow) => { - // A list of descriptions of objects for each part to build. - let mut builds_parts = vec![Vec::new(); parts]; - // Partition each build description among `parts`. - for build_desc in dataflow.objects_to_build { - let build_part = build_desc.plan.partition_among(parts); - for (plan, objects_to_build) in - build_part.into_iter().zip(builds_parts.iter_mut()) - { - objects_to_build.push(BuildDesc { - id: build_desc.id, - plan, - }); - } - } + // The command receiver is initialized without an epoch, so receiving the first command + // always triggers an epoch change. + let EpochChange(epoch) = self.recv_command().expect_err("change to first epoch"); + self.advance_epoch(epoch); - // Each list of build descriptions results in a dataflow description. - builds_parts - .into_iter() - .map(|objects_to_build| DataflowDescription { - source_imports: dataflow.source_imports.clone(), - index_imports: dataflow.index_imports.clone(), - objects_to_build, - index_exports: dataflow.index_exports.clone(), - sink_exports: dataflow.sink_exports.clone(), - as_of: dataflow.as_of.clone(), - until: dataflow.until.clone(), - debug_name: dataflow.debug_name.clone(), - initial_storage_as_of: dataflow.initial_storage_as_of.clone(), - refresh_schedule: dataflow.refresh_schedule.clone(), - time_dependence: dataflow.time_dependence.clone(), - }) - .map(ComputeCommand::CreateDataflow) - .collect() - } - command => vec![command; parts], + loop { + let Err(EpochChange(epoch)) = self.run_client(); + self.advance_epoch(epoch); } } - fn setup_channel_and_run_client( - &mut self, - command_rx: crossbeam_channel::Receiver, - response_tx: mpsc::UnboundedSender, - activator_tx: ActivatorSender, - ) { - let cmd_queue = Rc::new(RefCell::new( - VecDeque::>::new(), - )); - let peers = self.timely_worker.peers(); - let worker_id = self.timely_worker.index(); - - let command_rx = CommandReceiver::new(command_rx, worker_id); - let response_tx = ResponseSender::new(response_tx, worker_id); - - self.timely_worker.dataflow::({ - let cmd_queue = Rc::clone(&cmd_queue); - - move |scope| { - source(scope, "CmdSource", |capability, info| { - // Send activator for this operator back. - let activator = scope.sync_activator_for(info.address.to_vec()); - // This might fail if the client has already shut down, which is fine. The rest - // of the operator implementation knows how to handle a disconnected client. - let _ = activator_tx.send(activator); - - //Hold onto capbility until we receive a disconnected error - let mut cap_opt = Some(capability); - // Drop capability if we are not the leader, as our queue will - // be empty and we will never use nor importantly downgrade it. - if worker_id != 0 { - cap_opt = None; - } - - move |output| { - let mut disconnected = false; - if let Some(cap) = cap_opt.as_mut() { - let time = cap.time().clone(); - let mut session = output.session(&cap); - - loop { - match command_rx.try_recv() { - Ok(cmd) => { - // Commands must never be accepted from another worker. This - // implementation does not guarantee an ordering of events - // sent to different workers. - assert_eq!(worker_id, 0); - session.give_iterator( - Self::split_command(cmd, peers).into_iter().enumerate(), - ); - } - Err(TryRecvError::Disconnected) => { - disconnected = true; - break; - } - Err(TryRecvError::Empty) => { - break; - } - }; - } - cap.downgrade(&(time + 1)); - } else { - // Non-leader workers will still receive `UpdateConfiguration` commands - // and we must drain those to not leak memory. - if let Ok(cmd) = command_rx.try_recv() { - assert_ne!(worker_id, 0); - assert!(matches!(cmd, ComputeCommand::UpdateConfiguration(_))); - } - } - - if disconnected { - cap_opt = None; - } - } - }) - .sink( - Exchange::new(|(idx, _)| u64::cast_from(*idx)), - "CmdReceiver", - move |input| { - let mut queue = cmd_queue.borrow_mut(); - if input.frontier().is_empty() { - queue.push_back(Err(TryRecvError::Disconnected)) - } - while let Some((_, data)) = input.next() { - for (_, cmd) in data.drain(..) { - queue.push_back(Ok(cmd)); - } - } - }, - ); - } - }); - - self.run_client( - CommandReceiverQueue { - queue: Rc::clone(&cmd_queue), - }, - response_tx, - ) + fn advance_epoch(&mut self, epoch: u64) { + self.response_tx.advance_epoch(epoch); } - /// Draws commands from a single client until disconnected. - fn run_client(&mut self, command_rx: CommandReceiverQueue, mut response_tx: ResponseSender) { - if let Err(_) = self.reconcile(&command_rx, &mut response_tx) { - return; - } + /// Handles commands for a client connection, returns when the epoch changes. + fn run_client(&mut self) -> Result { + self.reconcile()?; // The last time we did periodic maintenance. let mut last_maintenance = Instant::now(); // Commence normal operation. - let mut shutdown = false; - while !shutdown { + loop { // Get the maintenance interval, default to zero if we don't have a compute state. let maintenance_interval = self .compute_state @@ -461,7 +348,7 @@ impl<'w, A: Allocate + 'static> Worker<'w, A> { sleep_duration = None; // Report frontier information back the coordinator. - if let Some(mut compute_state) = self.activate_compute(&mut response_tx) { + if let Some(mut compute_state) = self.activate_compute() { compute_state.compute_state.traces.maintenance(); // Report operator hydration before frontiers, as reporting frontiers may // affect hydration reporting. @@ -485,24 +372,9 @@ impl<'w, A: Allocate + 'static> Worker<'w, A> { self.timely_worker.step_or_park(sleep_duration); timer.observe_duration(); - // Handle any received commands. - let mut cmds = vec![]; - let mut empty = false; - while !empty { - match command_rx.try_recv() { - Ok(cmd) => cmds.push(cmd), - Err(TryRecvError::Empty) => empty = true, - Err(TryRecvError::Disconnected) => { - empty = true; - shutdown = true; - } - } - } - for cmd in cmds { - self.handle_command(&mut response_tx, cmd); - } + self.handle_pending_commands()?; - if let Some(mut compute_state) = self.activate_compute(&mut response_tx) { + if let Some(mut compute_state) = self.activate_compute() { compute_state.process_peeks(); compute_state.process_subscribes(); compute_state.process_copy_tos(); @@ -510,7 +382,14 @@ impl<'w, A: Allocate + 'static> Worker<'w, A> { } } - fn handle_command(&mut self, response_tx: &mut ResponseSender, cmd: ComputeCommand) { + fn handle_pending_commands(&mut self) -> Result<(), EpochChange> { + while let Some(cmd) = self.command_rx.try_recv()? { + self.handle_command(cmd); + } + Ok(()) + } + + fn handle_command(&mut self, cmd: ComputeCommand) { match &cmd { ComputeCommand::CreateInstance(_) => { self.compute_state = Some(ComputeState::new( @@ -524,26 +403,39 @@ impl<'w, A: Allocate + 'static> Worker<'w, A> { } _ => (), } - self.activate_compute(response_tx) - .unwrap() - .handle_compute_command(cmd); + self.activate_compute().unwrap().handle_compute_command(cmd); } - fn activate_compute<'a>( - &'a mut self, - response_tx: &'a mut ResponseSender, - ) -> Option> { + fn activate_compute(&mut self) -> Option> { if let Some(compute_state) = &mut self.compute_state { Some(ActiveComputeState { timely_worker: &mut *self.timely_worker, compute_state, - response_tx, + response_tx: &mut self.response_tx, }) } else { None } } + /// Receive the next compute command. + /// + /// This method blocks if no command is currently available, but takes care to step the Timely + /// worker while doing so. + fn recv_command(&mut self) -> Result { + loop { + if let Some(cmd) = self.command_rx.try_recv()? { + return Ok(cmd); + } + + let start = Instant::now(); + self.timely_worker.step_or_park(None); + self.metrics + .timely_step_duration_seconds + .observe(start.elapsed().as_secs_f64()); + } + } + /// Extract commands until `InitializationComplete`, and make the worker reflect those commands. /// /// This method is meant to be a function of the commands received thus far (as recorded in the @@ -562,18 +454,14 @@ impl<'w, A: Allocate + 'static> Worker<'w, A> { /// Some additional tidying happens, cleaning up pending peeks, reported frontiers, and creating a new /// subscribe response buffer. We will need to be vigilant with future modifications to `ComputeState` to /// line up changes there with clean resets here. - fn reconcile( - &mut self, - command_rx: &CommandReceiverQueue, - response_tx: &mut ResponseSender, - ) -> Result<(), RecvError> { + fn reconcile(&mut self) -> Result<(), EpochChange> { let worker_id = self.timely_worker.index(); // To initialize the connection, we want to drain all commands until we receive a // `ComputeCommand::InitializationComplete` command to form a target command state. let mut new_commands = Vec::new(); loop { - match command_rx.recv(self)? { + match self.recv_command()? { ComputeCommand::InitializationComplete => break, command => new_commands.push(command), } @@ -819,7 +707,7 @@ impl<'w, A: Allocate + 'static> Worker<'w, A> { // Execute the commands to bring us to `new_commands`. for command in todo_commands.into_iter() { - self.handle_command(response_tx, command); + self.handle_command(command); } // Overwrite `self.command_history` to reflect `new_commands`. @@ -835,3 +723,85 @@ impl<'w, A: Allocate + 'static> Worker<'w, A> { Ok(()) } } + +/// Spawn a thread to bridge between `ClusterClient` and [`Worker`] channels. +/// +/// The [`Worker`] expects a pair of persistent channels, with punctuation marking reconnects, +/// while the `ClusterClient` provides a new pair of channels on each reconnect. +fn spawn_channel_adapter( + client_rx: crossbeam_channel::Receiver<( + crossbeam_channel::Receiver, + mpsc::UnboundedSender, + mpsc::UnboundedSender, + )>, + command_tx: command_channel::Sender, + response_rx: crossbeam_channel::Receiver<(ComputeResponse, u64)>, + worker_id: usize, +) { + thread::Builder::new() + // "cca" stands for "compute channel adapter". We need to shorten that because Linux has a + // 15-character limit for thread names. + .name(format!("cca-{worker_id}")) + .spawn(move || { + // To make workers aware of the individual client connections, we tag forwarded + // commands with an epoch that increases on every new client connection. Additionally, + // we use the epoch to filter out responses with a different epoch, which were intended + // for previous clients. + let mut epoch = 0; + + // It's possible that we receive responses with epochs from the future: Worker 0 might + // have increased its epoch before us and broadcasted it to our Timely cluster. When we + // receive a response with a future epoch, we need to wait with forwarding it until we + // have increased our own epoch sufficiently (by observing new client connections). We + // need to stash the response in the meantime. + let mut stashed_response = None; + + while let Ok((command_rx, response_tx, activator_tx)) = client_rx.recv() { + epoch += 1; + + let activator = LocalActivator::new(thread::current()); + if activator_tx.send(activator).is_err() { + continue; + } + + // Wait for a new response while forwarding received commands. + let serve_rx_channels = || loop { + crossbeam_channel::select! { + recv(command_rx) -> msg => match msg { + Ok(cmd) => command_tx.send((cmd, epoch)), + Err(_) => return Err(()), + }, + recv(response_rx) -> msg => { + return Ok(msg.expect("worker connected")); + } + } + }; + + // Serve this connection until we see any of the channels disconnect. + loop { + let (resp, resp_epoch) = match stashed_response.take() { + Some(stashed) => stashed, + None => match serve_rx_channels() { + Ok(response) => response, + Err(()) => break, + }, + }; + + if resp_epoch < epoch { + // Response for a previous connection; discard it. + continue; + } else if resp_epoch > epoch { + // Response for a future connection; stash it and reconnect. + stashed_response = Some((resp, resp_epoch)); + break; + } else { + // Response for the current connection; forward it. + if response_tx.send(resp).is_err() { + break; + } + } + } + } + }) + .unwrap(); +} diff --git a/src/environmentd/BUILD.bazel b/src/environmentd/BUILD.bazel index a4abf5dde1bf8..ecb6cacb767c2 100644 --- a/src/environmentd/BUILD.bazel +++ b/src/environmentd/BUILD.bazel @@ -43,7 +43,7 @@ rust_library( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ ":mz_environmentd_build_script", "//src/adapter:mz_adapter", @@ -55,6 +55,7 @@ rust_library( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/frontegg-auth:mz_frontegg_auth", "//src/frontegg-mock:mz_frontegg_mock", @@ -114,7 +115,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", "//src/adapter-types:mz_adapter_types", @@ -125,6 +126,7 @@ rust_test( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/environmentd:mz_environmentd", "//src/frontegg-auth:mz_frontegg_auth", @@ -172,6 +174,7 @@ rust_doc_test( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/environmentd:mz_environmentd", "//src/frontegg-auth:mz_frontegg_auth", @@ -239,7 +242,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", "//src/adapter-types:mz_adapter_types", @@ -250,6 +253,74 @@ rust_test( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", + "//src/dyncfgs:mz_dyncfgs", + "//src/environmentd:mz_environmentd", + "//src/frontegg-auth:mz_frontegg_auth", + "//src/frontegg-mock:mz_frontegg_mock", + "//src/http-util:mz_http_util", + "//src/interchange:mz_interchange", + "//src/metrics:mz_metrics", + "//src/orchestrator:mz_orchestrator", + "//src/orchestrator-kubernetes:mz_orchestrator_kubernetes", + "//src/orchestrator-process:mz_orchestrator_process", + "//src/orchestrator-tracing:mz_orchestrator_tracing", + "//src/orchestratord:mz_orchestratord", + "//src/ore:mz_ore", + "//src/persist-client:mz_persist_client", + "//src/pgrepr:mz_pgrepr", + "//src/pgtest:mz_pgtest", + "//src/pgwire:mz_pgwire", + "//src/pgwire-common:mz_pgwire_common", + "//src/prof-http:mz_prof_http", + "//src/repr:mz_repr", + "//src/secrets:mz_secrets", + "//src/segment:mz_segment", + "//src/server-core:mz_server_core", + "//src/service:mz_service", + "//src/sql:mz_sql", + "//src/sql-parser:mz_sql_parser", + "//src/storage-types:mz_storage_types", + "//src/tracing:mz_tracing", + ] + all_crate_deps( + normal = True, + normal_dev = True, + ), +) + +rust_test( + name = "mz_environmentd_bootstrap_builtin_clusters_tests", + size = "large", + srcs = ["tests/bootstrap_builtin_clusters.rs"], + aliases = aliases( + normal = True, + normal_dev = True, + proc_macro = True, + proc_macro_dev = True, + ), + compile_data = [], + crate_features = [], + crate_name = "bootstrap_builtin_clusters", + data = [], + env = {}, + proc_macro_deps = [] + all_crate_deps( + proc_macro = True, + proc_macro_dev = True, + ), + rustc_env = {}, + rustc_flags = [], + version = "0.130.13", + deps = [ + "//src/adapter:mz_adapter", + "//src/adapter-types:mz_adapter_types", + "//src/alloc:mz_alloc", + "//src/alloc-default:mz_alloc_default", + "//src/aws-secrets-controller:mz_aws_secrets_controller", + "//src/build-info:mz_build_info", + "//src/catalog:mz_catalog", + "//src/cloud-resources:mz_cloud_resources", + "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/environmentd:mz_environmentd", "//src/frontegg-auth:mz_frontegg_auth", @@ -305,7 +376,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", "//src/adapter-types:mz_adapter_types", @@ -316,6 +387,7 @@ rust_test( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/environmentd:mz_environmentd", "//src/frontegg-auth:mz_frontegg_auth", @@ -371,7 +443,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", "//src/adapter-types:mz_adapter_types", @@ -382,6 +454,7 @@ rust_test( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/environmentd:mz_environmentd", "//src/frontegg-auth:mz_frontegg_auth", @@ -437,7 +510,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", "//src/adapter-types:mz_adapter_types", @@ -448,6 +521,7 @@ rust_test( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/environmentd:mz_environmentd", "//src/frontegg-auth:mz_frontegg_auth", @@ -503,7 +577,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", "//src/adapter-types:mz_adapter_types", @@ -514,6 +588,7 @@ rust_test( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/environmentd:mz_environmentd", "//src/frontegg-auth:mz_frontegg_auth", @@ -569,7 +644,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", "//src/adapter-types:mz_adapter_types", @@ -580,6 +655,7 @@ rust_test( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/environmentd:mz_environmentd", "//src/frontegg-auth:mz_frontegg_auth", @@ -635,7 +711,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", "//src/adapter-types:mz_adapter_types", @@ -646,6 +722,7 @@ rust_test( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/environmentd:mz_environmentd", "//src/frontegg-auth:mz_frontegg_auth", @@ -695,7 +772,7 @@ rust_binary( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ ":mz_environmentd", "//src/adapter:mz_adapter", @@ -707,6 +784,7 @@ rust_binary( "//src/catalog:mz_catalog", "//src/cloud-resources:mz_cloud_resources", "//src/controller:mz_controller", + "//src/dyncfg:mz_dyncfg", "//src/dyncfgs:mz_dyncfgs", "//src/frontegg-auth:mz_frontegg_auth", "//src/frontegg-mock:mz_frontegg_mock", diff --git a/src/environmentd/Cargo.toml b/src/environmentd/Cargo.toml index 0f2834b0acfec..224dedafea510 100644 --- a/src/environmentd/Cargo.toml +++ b/src/environmentd/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mz-environmentd" description = "Manages a single Materialize environment." -version = "0.130.0-dev.0" +version = "0.130.13" authors = ["Materialize, Inc."] license = "proprietary" edition.workspace = true @@ -47,6 +47,7 @@ mz-adapter-types = { path = "../adapter-types" } mz-catalog = { path = "../catalog" } mz-cloud-resources = { path = "../cloud-resources" } mz-controller = { path = "../controller" } +mz-dyncfg = { path = "../dyncfg" } mz-dyncfgs = { path = "../dyncfgs" } mz-frontegg-auth = { path = "../frontegg-auth" } mz-frontegg-mock = { path = "../frontegg-mock", optional = true } diff --git a/src/environmentd/src/environmentd/main.rs b/src/environmentd/src/environmentd/main.rs index 2adb9db7a4ae0..5655568156d07 100644 --- a/src/environmentd/src/environmentd/main.rs +++ b/src/environmentd/src/environmentd/main.rs @@ -28,6 +28,11 @@ use http::header::HeaderValue; use ipnet::IpNet; use itertools::Itertools; use mz_adapter::ResultExt; +use mz_adapter_types::bootstrap_builtin_cluster_config::{ + BootstrapBuiltinClusterConfig, ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR, + CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR, + SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR, +}; use mz_aws_secrets_controller::AwsSecretsController; use mz_build_info::BuildInfo; use mz_catalog::builtin::{ @@ -544,6 +549,46 @@ pub struct Args { default_value = "1" )] bootstrap_builtin_analytics_cluster_replica_size: String, + /// The replication factor of the builtin system cluster replicas if bootstrapping. + #[clap( + long, + env = "BOOTSTRAP_BUILTIN_SYSTEM_CLUSTER_REPLICATION_FACTOR", + default_value = SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR.to_string(), + value_parser = clap::value_parser!(u32).range(0..=2) + )] + bootstrap_builtin_system_cluster_replication_factor: u32, + /// The replication factor of the builtin catalog server cluster replicas if bootstrapping. + #[clap( + long, + env = "BOOTSTRAP_BUILTIN_CATALOG_SERVER_CLUSTER_REPLICATION_FACTOR", + default_value = CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR.to_string(), + value_parser = clap::value_parser!(u32).range(0..=2) + )] + bootstrap_builtin_catalog_server_cluster_replication_factor: u32, + /// The replication factor of the builtin probe cluster replicas if bootstrapping. + #[clap( + long, + env = "BOOTSTRAP_BUILTIN_PROBE_CLUSTER_REPLICATION_FACTOR", + default_value = PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR.to_string(), + value_parser = clap::value_parser!(u32).range(0..=2) + )] + bootstrap_builtin_probe_cluster_replication_factor: u32, + /// The replication factor of the builtin support cluster replicas if bootstrapping. + #[clap( + long, + env = "BOOTSTRAP_BUILTIN_SUPPORT_CLUSTER_REPLICATION_FACTOR", + default_value = SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR.to_string(), + value_parser = clap::value_parser!(u32).range(0..=2) + )] + bootstrap_builtin_support_cluster_replication_factor: u32, + /// The replication factor of the builtin analytics cluster replicas if bootstrapping. + #[clap( + long, + env = "BOOTSTRAP_BUILTIN_ANALYTICS_CLUSTER_REPLICATION_FACTOR", + default_value = ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR.to_string(), + value_parser = clap::value_parser!(u32).range(0..=2) + )] + bootstrap_builtin_analytics_cluster_replication_factor: u32, /// An list of NAME=VALUE pairs used to override static defaults /// for system parameters. #[clap( @@ -1041,16 +1086,27 @@ fn run(mut args: Args) -> Result<(), anyhow::Error> { environment_id: args.environment_id, bootstrap_role: args.bootstrap_role, bootstrap_default_cluster_replica_size: args.bootstrap_default_cluster_replica_size, - bootstrap_builtin_system_cluster_replica_size: args - .bootstrap_builtin_system_cluster_replica_size, - bootstrap_builtin_catalog_server_cluster_replica_size: args - .bootstrap_builtin_catalog_server_cluster_replica_size, - bootstrap_builtin_probe_cluster_replica_size: args - .bootstrap_builtin_probe_cluster_replica_size, - bootstrap_builtin_support_cluster_replica_size: args - .bootstrap_builtin_support_cluster_replica_size, - bootstrap_builtin_analytics_cluster_replica_size: args - .bootstrap_builtin_analytics_cluster_replica_size, + bootstrap_builtin_system_cluster_config: BootstrapBuiltinClusterConfig { + size: args.bootstrap_builtin_system_cluster_replica_size, + replication_factor: args.bootstrap_builtin_system_cluster_replication_factor, + }, + bootstrap_builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig { + size: args.bootstrap_builtin_catalog_server_cluster_replica_size, + replication_factor: args + .bootstrap_builtin_catalog_server_cluster_replication_factor, + }, + bootstrap_builtin_probe_cluster_config: BootstrapBuiltinClusterConfig { + size: args.bootstrap_builtin_probe_cluster_replica_size, + replication_factor: args.bootstrap_builtin_probe_cluster_replication_factor, + }, + bootstrap_builtin_support_cluster_config: BootstrapBuiltinClusterConfig { + size: args.bootstrap_builtin_support_cluster_replica_size, + replication_factor: args.bootstrap_builtin_support_cluster_replication_factor, + }, + bootstrap_builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig { + size: args.bootstrap_builtin_analytics_cluster_replica_size, + replication_factor: args.bootstrap_builtin_analytics_cluster_replication_factor, + }, system_parameter_defaults: args .system_parameter_default .into_iter() diff --git a/src/environmentd/src/lib.rs b/src/environmentd/src/lib.rs index af0f775306faf..fd1ea58cdac47 100644 --- a/src/environmentd/src/lib.rs +++ b/src/environmentd/src/lib.rs @@ -27,6 +27,7 @@ use ipnet::IpNet; use mz_adapter::config::{system_parameter_sync, SystemParameterSyncConfig}; use mz_adapter::webhook::WebhookConcurrencyLimiter; use mz_adapter::{load_remote_system_parameters, AdapterError}; +use mz_adapter_types::bootstrap_builtin_cluster_config::BootstrapBuiltinClusterConfig; use mz_adapter_types::dyncfgs::{ ENABLE_0DT_DEPLOYMENT, ENABLE_0DT_DEPLOYMENT_PANIC_AFTER_TIMEOUT, WITH_0DT_DEPLOYMENT_MAX_WAIT, }; @@ -152,16 +153,16 @@ pub struct Config { pub bootstrap_role: Option, /// The size of the default cluster replica if bootstrapping. pub bootstrap_default_cluster_replica_size: String, - /// The size of the builtin system cluster replicas if bootstrapping. - pub bootstrap_builtin_system_cluster_replica_size: String, - /// The size of the builtin catalog server cluster replicas if bootstrapping. - pub bootstrap_builtin_catalog_server_cluster_replica_size: String, - /// The size of the builtin probe cluster replicas if bootstrapping. - pub bootstrap_builtin_probe_cluster_replica_size: String, - /// The size of the builtin support cluster replicas if bootstrapping. - pub bootstrap_builtin_support_cluster_replica_size: String, - /// The size of the builtin analytics cluster replicas if bootstrapping. - pub bootstrap_builtin_analytics_cluster_replica_size: String, + /// The config of the builtin system cluster replicas if bootstrapping. + pub bootstrap_builtin_system_cluster_config: BootstrapBuiltinClusterConfig, + /// The config of the builtin catalog server cluster replicas if bootstrapping. + pub bootstrap_builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig, + /// The config of the builtin probe cluster replicas if bootstrapping. + pub bootstrap_builtin_probe_cluster_config: BootstrapBuiltinClusterConfig, + /// The config of the builtin support cluster replicas if bootstrapping. + pub bootstrap_builtin_support_cluster_config: BootstrapBuiltinClusterConfig, + /// The config of the builtin analytics cluster replicas if bootstrapping. + pub bootstrap_builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig, /// Values to set for system parameters, if those system parameters have not /// already been set by the system user. pub system_parameter_defaults: BTreeMap, @@ -630,15 +631,12 @@ impl Listeners { secrets_controller: config.secrets_controller, cloud_resource_controller: config.cloud_resource_controller, cluster_replica_sizes: config.cluster_replica_sizes, - builtin_system_cluster_replica_size: config - .bootstrap_builtin_system_cluster_replica_size, - builtin_catalog_server_cluster_replica_size: config - .bootstrap_builtin_catalog_server_cluster_replica_size, - builtin_probe_cluster_replica_size: config.bootstrap_builtin_probe_cluster_replica_size, - builtin_support_cluster_replica_size: config - .bootstrap_builtin_support_cluster_replica_size, - builtin_analytics_cluster_replica_size: config - .bootstrap_builtin_analytics_cluster_replica_size, + builtin_system_cluster_config: config.bootstrap_builtin_system_cluster_config, + builtin_catalog_server_cluster_config: config + .bootstrap_builtin_catalog_server_cluster_config, + builtin_probe_cluster_config: config.bootstrap_builtin_probe_cluster_config, + builtin_support_cluster_config: config.bootstrap_builtin_support_cluster_config, + builtin_analytics_cluster_config: config.bootstrap_builtin_analytics_cluster_config, availability_zones: config.availability_zones, system_parameter_defaults: config.system_parameter_defaults, storage_usage_client, diff --git a/src/environmentd/src/test_util.rs b/src/environmentd/src/test_util.rs index 74c1d905eca38..623bcd60e9647 100644 --- a/src/environmentd/src/test_util.rs +++ b/src/environmentd/src/test_util.rs @@ -25,8 +25,15 @@ use futures::Future; use headers::{Header, HeaderMapExt}; use hyper::http::header::HeaderMap; use mz_adapter::TimestampExplanation; +use mz_adapter_types::bootstrap_builtin_cluster_config::{ + BootstrapBuiltinClusterConfig, ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR, + CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR, + SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR, +}; + use mz_catalog::config::ClusterReplicaSizeMap; use mz_controller::ControllerConfig; +use mz_dyncfg::ConfigUpdates; use mz_orchestrator_process::{ProcessOrchestrator, ProcessOrchestratorConfig}; use mz_orchestrator_tracing::{TracingCliArgs, TracingOrchestrator}; use mz_ore::metrics::MetricsRegistry; @@ -38,7 +45,7 @@ use mz_ore::tracing::{ TracingHandle, }; use mz_persist_client::cache::PersistClientCache; -use mz_persist_client::cfg::PersistConfig; +use mz_persist_client::cfg::{PersistConfig, CONSENSUS_CONNECTION_POOL_MAX_SIZE}; use mz_persist_client::rpc::PersistGrpcPubSubServer; use mz_persist_client::PersistLocation; use mz_secrets::SecretsController; @@ -92,11 +99,12 @@ pub struct TestHarness { storage_usage_collection_interval: Duration, storage_usage_retention_period: Option, default_cluster_replica_size: String, - builtin_system_cluster_replica_size: String, - builtin_catalog_server_cluster_replica_size: String, - builtin_probe_cluster_replica_size: String, - builtin_support_cluster_replica_size: String, - builtin_analytics_cluster_replica_size: String, + builtin_system_cluster_config: BootstrapBuiltinClusterConfig, + builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig, + builtin_probe_cluster_config: BootstrapBuiltinClusterConfig, + builtin_support_cluster_config: BootstrapBuiltinClusterConfig, + builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig, + propagate_crashes: bool, enable_tracing: bool, // This is currently unrelated to enable_tracing, and is used only to disable orchestrator @@ -125,11 +133,26 @@ impl Default for TestHarness { storage_usage_collection_interval: Duration::from_secs(3600), storage_usage_retention_period: None, default_cluster_replica_size: "1".to_string(), - builtin_system_cluster_replica_size: "1".to_string(), - builtin_catalog_server_cluster_replica_size: "1".to_string(), - builtin_probe_cluster_replica_size: "1".to_string(), - builtin_support_cluster_replica_size: "1".to_string(), - builtin_analytics_cluster_replica_size: "1".to_string(), + builtin_system_cluster_config: BootstrapBuiltinClusterConfig { + size: "1".to_string(), + replication_factor: SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig { + size: "1".to_string(), + replication_factor: CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_probe_cluster_config: BootstrapBuiltinClusterConfig { + size: "1".to_string(), + replication_factor: PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_support_cluster_config: BootstrapBuiltinClusterConfig { + size: "1".to_string(), + replication_factor: SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, + builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig { + size: "1".to_string(), + replication_factor: ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR, + }, propagate_crashes: false, enable_tracing: false, bootstrap_role: Some("materialize".into()), @@ -254,14 +277,24 @@ impl TestHarness { mut self, builtin_system_cluster_replica_size: String, ) -> Self { - self.builtin_system_cluster_replica_size = builtin_system_cluster_replica_size; + self.builtin_system_cluster_config.size = builtin_system_cluster_replica_size; + self + } + + pub fn with_builtin_system_cluster_replication_factor( + mut self, + builtin_system_cluster_replication_factor: u32, + ) -> Self { + self.builtin_system_cluster_config.replication_factor = + builtin_system_cluster_replication_factor; self } + pub fn with_builtin_catalog_server_cluster_replica_size( mut self, builtin_catalog_server_cluster_replica_size: String, ) -> Self { - self.builtin_catalog_server_cluster_replica_size = + self.builtin_catalog_server_cluster_config.size = builtin_catalog_server_cluster_replica_size; self } @@ -393,15 +426,16 @@ impl Listeners { // Messing with the clock causes persist to expire leases, causing hangs and // panics. Is it possible/desirable to put this back somehow? let persist_now = SYSTEM_TIME.clone(); - let mut persist_cfg = PersistConfig::new( - &crate::BUILD_INFO, - persist_now.clone(), - mz_dyncfgs::all_dyncfgs(), - ); - persist_cfg.build_version = config.code_version; + let dyncfgs = mz_dyncfgs::all_dyncfgs(); + + let mut updates = ConfigUpdates::default(); // Tune down the number of connections to make this all work a little easier // with local postgres. - persist_cfg.consensus_connection_pool_max_size = 1; + updates.add(&CONSENSUS_CONNECTION_POOL_MAX_SIZE, 1); + updates.apply(&dyncfgs); + + let mut persist_cfg = PersistConfig::new(&crate::BUILD_INFO, persist_now.clone(), dyncfgs); + persist_cfg.build_version = config.code_version; // Stress persist more by writing rollups frequently persist_cfg.set_rollup_threshold(5); @@ -511,16 +545,12 @@ impl Listeners { cors_allowed_origin: AllowOrigin::list([]), cluster_replica_sizes: ClusterReplicaSizeMap::for_tests(), bootstrap_default_cluster_replica_size: config.default_cluster_replica_size, - bootstrap_builtin_system_cluster_replica_size: config - .builtin_system_cluster_replica_size, - bootstrap_builtin_catalog_server_cluster_replica_size: config - .builtin_catalog_server_cluster_replica_size, - bootstrap_builtin_probe_cluster_replica_size: config - .builtin_probe_cluster_replica_size, - bootstrap_builtin_support_cluster_replica_size: config - .builtin_support_cluster_replica_size, - bootstrap_builtin_analytics_cluster_replica_size: config - .builtin_analytics_cluster_replica_size, + bootstrap_builtin_system_cluster_config: config.builtin_system_cluster_config, + bootstrap_builtin_catalog_server_cluster_config: config + .builtin_catalog_server_cluster_config, + bootstrap_builtin_probe_cluster_config: config.builtin_probe_cluster_config, + bootstrap_builtin_support_cluster_config: config.builtin_support_cluster_config, + bootstrap_builtin_analytics_cluster_config: config.builtin_analytics_cluster_config, system_parameter_defaults: config.system_parameter_defaults, availability_zones: Default::default(), tracing_handle, diff --git a/src/environmentd/tests/bootstrap_builtin_clusters.rs b/src/environmentd/tests/bootstrap_builtin_clusters.rs new file mode 100644 index 0000000000000..3ca92611e7f12 --- /dev/null +++ b/src/environmentd/tests/bootstrap_builtin_clusters.rs @@ -0,0 +1,38 @@ +// Copyright Materialize, Inc. and contributors. All rights reserved. +//G +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//! Integration tests for builtin clusters on bootstrap. + +use mz_environmentd::test_util::{self}; + +// Test that a cluster with a replication factor of 0 should not create any replicas. +#[mz_ore::test] +fn test_zero_replication_factor_no_replicas() { + let server = test_util::TestHarness::default() + .with_builtin_system_cluster_replication_factor(0) + .start_blocking(); + + let mut client = server.connect(postgres::NoTls).unwrap(); + let system_cluster = client + .query_one( + r#" + SELECT c.id, c.name, c.replication_factor::integer, COUNT(cr.id)::integer as replica_count + FROM mz_clusters c + LEFT JOIN mz_cluster_replicas cr ON c.id = cr.cluster_id + WHERE c.name = 'mz_system' + GROUP BY c.id, c.name, c.replication_factor"#, + &[], + ) + .unwrap(); + + let replication_factor: i32 = system_cluster.get(2); + let replica_count: i32 = system_cluster.get(3); + assert_eq!(replication_factor, 0); + assert_eq!(replica_count, 0); +} diff --git a/src/environmentd/tests/testdata/http/post b/src/environmentd/tests/testdata/http/post index c388b379b01c2..0ff3b60a0b465 100644 --- a/src/environmentd/tests/testdata/http/post +++ b/src/environmentd/tests/testdata/http/post @@ -324,7 +324,7 @@ http {"queries":[{"query":"select $1+$2::int as col","params":["1"]}]} ---- 200 OK -{"results":[{"error":{"message":"request supplied 1 parameters, but SELECT $1 + ($2)::int4 AS col requires 2","code":"XX000"},"notices":[]}]} +{"results":[{"error":{"message":"request supplied 1 parameters, but SELECT $1 + $2::int4 AS col requires 2","code":"XX000"},"notices":[]}]} # NaN http diff --git a/src/environmentd/tests/testdata/http/ws b/src/environmentd/tests/testdata/http/ws index 0f1bdd464e947..0bb22786bcedc 100644 --- a/src/environmentd/tests/testdata/http/ws +++ b/src/environmentd/tests/testdata/http/ws @@ -122,7 +122,7 @@ ws-text {"queries": [{"query": "select $1::int", "params": []}]} ---- {"type":"CommandStarting","payload":{"has_rows":false,"is_streaming":false}} -{"type":"Error","payload":{"message":"request supplied 0 parameters, but SELECT ($1)::int4 requires 1","code":"XX000"}} +{"type":"Error","payload":{"message":"request supplied 0 parameters, but SELECT $1::int4 requires 1","code":"XX000"}} {"type":"ReadyForQuery","payload":"I"} ws-text @@ -402,7 +402,7 @@ ws-text ws-text {"query": "SELECT 1 FROM mz_sources LIMIT 1"} ---- -{"type":"Notice","payload":{"message":"{\n \"plans\": {\n \"raw\": {\n \"text\": \"Finish limit=1 output=[#0]\\n Project (#15)\\n Map (1)\\n Get mz_catalog.mz_sources\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"Project\": {\n \"input\": {\n \"Map\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"System\": 461\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"Oid\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": {\n \"Array\": \"MzAclItem\"\n },\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n }\n ],\n \"keys\": [\n [\n 0\n ],\n [\n 1\n ]\n ]\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n },\n \"outputs\": [\n 15\n ]\n }\n }\n },\n \"optimized\": {\n \"global\": {\n \"text\": \"t72:\\n Finish limit=1 output=[#0]\\n ArrangeBy keys=[[#0]]\\n ReadGlobalFromSameDataflow t71\\n\\nt71:\\n Project (#15)\\n Map (1)\\n ReadIndex on=mz_sources mz_sources_ind=[*** full scan ***]\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"t72\",\n \"plan\": {\n \"ArrangeBy\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"Transient\": 71\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n },\n \"access_strategy\": \"SameDataflow\"\n }\n },\n \"keys\": [\n [\n {\n \"Column\": 0\n }\n ]\n ]\n }\n }\n },\n {\n \"id\": \"t71\",\n \"plan\": {\n \"Project\": {\n \"input\": {\n \"Map\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"System\": 461\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"Oid\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": {\n \"Array\": \"MzAclItem\"\n },\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n }\n ],\n \"keys\": [\n [\n 0\n ],\n [\n 1\n ]\n ]\n },\n \"access_strategy\": {\n \"Index\": [\n [\n {\n \"System\": 737\n },\n \"FullScan\"\n ]\n ]\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"Ok\": {\n \"data\": [\n 45,\n 1\n ]\n }\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n },\n \"outputs\": [\n 15\n ]\n }\n }\n }\n ],\n \"sources\": []\n }\n },\n \"fast_path\": {\n \"text\": \"Explained Query (fast path):\\n Finish limit=1 output=[#0]\\n Project (#15)\\n Map (1)\\n ReadIndex on=mz_catalog.mz_sources mz_sources_ind=[*** full scan ***]\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"Explained Query (fast path)\",\n \"plan\": {\n \"PeekExisting\": [\n {\n \"System\": 461\n },\n {\n \"System\": 737\n },\n null,\n {\n \"mfp\": {\n \"expressions\": [\n {\n \"Literal\": [\n {\n \"Ok\": {\n \"data\": [\n 45,\n 1\n ]\n }\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ],\n \"predicates\": [],\n \"projection\": [\n 15\n ],\n \"input_arity\": 15\n }\n }\n ]\n }\n }\n ],\n \"sources\": []\n }\n }\n }\n },\n \"insights\": {\n \"imports\": {\n \"s737\": {\n \"name\": {\n \"schema\": \"mz_catalog\",\n \"item\": \"mz_sources_ind\"\n },\n \"type\": \"compute\"\n }\n },\n \"fast_path_clusters\": {},\n \"fast_path_limit\": null,\n \"persist_count\": []\n },\n \"cluster\": {\n \"name\": \"mz_catalog_server\",\n \"id\": {\n \"System\": 2\n }\n },\n \"redacted_sql\": \"SELECT '' FROM [s461 AS mz_catalog.mz_sources] LIMIT ''\"\n}","code":"MZ001","severity":"notice"}} +{"type":"Notice","payload":{"message":"{\n \"plans\": {\n \"raw\": {\n \"text\": \"Finish limit=1 output=[#0]\\n Project (#15)\\n Map (1)\\n Get mz_catalog.mz_sources\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"Project\": {\n \"input\": {\n \"Map\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"System\": 463\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"Oid\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": {\n \"Array\": \"MzAclItem\"\n },\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n }\n ],\n \"keys\": [\n [\n 0\n ],\n [\n 1\n ]\n ]\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n },\n \"outputs\": [\n 15\n ]\n }\n }\n },\n \"optimized\": {\n \"global\": {\n \"text\": \"t72:\\n Finish limit=1 output=[#0]\\n ArrangeBy keys=[[#0]]\\n ReadGlobalFromSameDataflow t71\\n\\nt71:\\n Project (#15)\\n Map (1)\\n ReadIndex on=mz_sources mz_sources_ind=[*** full scan ***]\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"t72\",\n \"plan\": {\n \"ArrangeBy\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"Transient\": 71\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n },\n \"access_strategy\": \"SameDataflow\"\n }\n },\n \"keys\": [\n [\n {\n \"Column\": 0\n }\n ]\n ]\n }\n }\n },\n {\n \"id\": \"t71\",\n \"plan\": {\n \"Project\": {\n \"input\": {\n \"Map\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"System\": 463\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"Oid\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": {\n \"Array\": \"MzAclItem\"\n },\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n }\n ],\n \"keys\": [\n [\n 0\n ],\n [\n 1\n ]\n ]\n },\n \"access_strategy\": {\n \"Index\": [\n [\n {\n \"System\": 739\n },\n \"FullScan\"\n ]\n ]\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"Ok\": {\n \"data\": [\n 45,\n 1\n ]\n }\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n },\n \"outputs\": [\n 15\n ]\n }\n }\n }\n ],\n \"sources\": []\n }\n },\n \"fast_path\": {\n \"text\": \"Explained Query (fast path):\\n Finish limit=1 output=[#0]\\n Project (#15)\\n Map (1)\\n ReadIndex on=mz_catalog.mz_sources mz_sources_ind=[*** full scan ***]\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"Explained Query (fast path)\",\n \"plan\": {\n \"PeekExisting\": [\n {\n \"System\": 463\n },\n {\n \"System\": 739\n },\n null,\n {\n \"mfp\": {\n \"expressions\": [\n {\n \"Literal\": [\n {\n \"Ok\": {\n \"data\": [\n 45,\n 1\n ]\n }\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ],\n \"predicates\": [],\n \"projection\": [\n 15\n ],\n \"input_arity\": 15\n }\n }\n ]\n }\n }\n ],\n \"sources\": []\n }\n }\n }\n },\n \"insights\": {\n \"imports\": {\n \"s739\": {\n \"name\": {\n \"schema\": \"mz_catalog\",\n \"item\": \"mz_sources_ind\"\n },\n \"type\": \"compute\"\n }\n },\n \"fast_path_clusters\": {},\n \"fast_path_limit\": null,\n \"persist_count\": []\n },\n \"cluster\": {\n \"name\": \"mz_catalog_server\",\n \"id\": {\n \"System\": 2\n }\n },\n \"redacted_sql\": \"SELECT '' FROM [s463 AS mz_catalog.mz_sources] LIMIT ''\"\n}","code":"MZ001","severity":"notice"}} {"type":"CommandStarting","payload":{"has_rows":true,"is_streaming":false}} {"type":"Rows","payload":{"columns":[{"name":"?column?","type_oid":23,"type_len":4,"type_mod":-1}]}} {"type":"Row","payload":["1"]} @@ -412,7 +412,7 @@ ws-text ws-text {"query": "SELECT 1 / 0 FROM mz_sources LIMIT 1"} ---- -{"type":"Notice","payload":{"message":"{\n \"plans\": {\n \"raw\": {\n \"text\": \"Finish limit=1 output=[#0]\\n Project (#15)\\n Map ((1 / 0))\\n Get mz_catalog.mz_sources\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"Project\": {\n \"input\": {\n \"Map\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"System\": 461\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"Oid\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": {\n \"Array\": \"MzAclItem\"\n },\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n }\n ],\n \"keys\": [\n [\n 0\n ],\n [\n 1\n ]\n ]\n }\n }\n },\n \"scalars\": [\n {\n \"CallBinary\": {\n \"func\": \"DivInt32\",\n \"expr1\": {\n \"Literal\": [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n },\n \"expr2\": {\n \"Literal\": [\n {\n \"data\": [\n 44\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n }\n }\n ]\n }\n },\n \"outputs\": [\n 15\n ]\n }\n }\n },\n \"optimized\": {\n \"global\": {\n \"text\": \"t75:\\n Finish limit=1 output=[#0]\\n ArrangeBy keys=[[#0]]\\n ReadGlobalFromSameDataflow t74\\n\\nt74:\\n Project (#15)\\n Map (error(\\\"division by zero\\\"))\\n ReadIndex on=mz_sources mz_sources_ind=[*** full scan ***]\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"t75\",\n \"plan\": {\n \"ArrangeBy\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"Transient\": 74\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n },\n \"access_strategy\": \"SameDataflow\"\n }\n },\n \"keys\": [\n [\n {\n \"Column\": 0\n }\n ]\n ]\n }\n }\n },\n {\n \"id\": \"t74\",\n \"plan\": {\n \"Project\": {\n \"input\": {\n \"Map\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"System\": 461\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"Oid\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": {\n \"Array\": \"MzAclItem\"\n },\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n }\n ],\n \"keys\": [\n [\n 0\n ],\n [\n 1\n ]\n ]\n },\n \"access_strategy\": {\n \"Index\": [\n [\n {\n \"System\": 737\n },\n \"FullScan\"\n ]\n ]\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"Err\": \"DivisionByZero\"\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n },\n \"outputs\": [\n 15\n ]\n }\n }\n }\n ],\n \"sources\": []\n }\n },\n \"fast_path\": {\n \"text\": \"Explained Query (fast path):\\n Finish limit=1 output=[#0]\\n Project (#15)\\n Map (error(\\\"division by zero\\\"))\\n ReadIndex on=mz_catalog.mz_sources mz_sources_ind=[*** full scan ***]\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"Explained Query (fast path)\",\n \"plan\": {\n \"PeekExisting\": [\n {\n \"System\": 461\n },\n {\n \"System\": 737\n },\n null,\n {\n \"mfp\": {\n \"expressions\": [\n {\n \"Literal\": [\n {\n \"Err\": \"DivisionByZero\"\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ],\n \"predicates\": [],\n \"projection\": [\n 15\n ],\n \"input_arity\": 15\n }\n }\n ]\n }\n }\n ],\n \"sources\": []\n }\n }\n }\n },\n \"insights\": {\n \"imports\": {\n \"s737\": {\n \"name\": {\n \"schema\": \"mz_catalog\",\n \"item\": \"mz_sources_ind\"\n },\n \"type\": \"compute\"\n }\n },\n \"fast_path_clusters\": {},\n \"fast_path_limit\": null,\n \"persist_count\": []\n },\n \"cluster\": {\n \"name\": \"mz_catalog_server\",\n \"id\": {\n \"System\": 2\n }\n },\n \"redacted_sql\": \"SELECT '' / '' FROM [s461 AS mz_catalog.mz_sources] LIMIT ''\"\n}","code":"MZ001","severity":"notice"}} +{"type":"Notice","payload":{"message":"{\n \"plans\": {\n \"raw\": {\n \"text\": \"Finish limit=1 output=[#0]\\n Project (#15)\\n Map ((1 / 0))\\n Get mz_catalog.mz_sources\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"Project\": {\n \"input\": {\n \"Map\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"System\": 463\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"Oid\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": {\n \"Array\": \"MzAclItem\"\n },\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n }\n ],\n \"keys\": [\n [\n 0\n ],\n [\n 1\n ]\n ]\n }\n }\n },\n \"scalars\": [\n {\n \"CallBinary\": {\n \"func\": \"DivInt32\",\n \"expr1\": {\n \"Literal\": [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n },\n \"expr2\": {\n \"Literal\": [\n {\n \"data\": [\n 44\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n }\n }\n ]\n }\n },\n \"outputs\": [\n 15\n ]\n }\n }\n },\n \"optimized\": {\n \"global\": {\n \"text\": \"t75:\\n Finish limit=1 output=[#0]\\n ArrangeBy keys=[[#0]]\\n ReadGlobalFromSameDataflow t74\\n\\nt74:\\n Project (#15)\\n Map (error(\\\"division by zero\\\"))\\n ReadIndex on=mz_sources mz_sources_ind=[*** full scan ***]\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"t75\",\n \"plan\": {\n \"ArrangeBy\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"Transient\": 74\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n },\n \"access_strategy\": \"SameDataflow\"\n }\n },\n \"keys\": [\n [\n {\n \"Column\": 0\n }\n ]\n ]\n }\n }\n },\n {\n \"id\": \"t74\",\n \"plan\": {\n \"Project\": {\n \"input\": {\n \"Map\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"System\": 463\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"Oid\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": false\n },\n {\n \"scalar_type\": {\n \"Array\": \"MzAclItem\"\n },\n \"nullable\": false\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n },\n {\n \"scalar_type\": \"String\",\n \"nullable\": true\n }\n ],\n \"keys\": [\n [\n 0\n ],\n [\n 1\n ]\n ]\n },\n \"access_strategy\": {\n \"Index\": [\n [\n {\n \"System\": 739\n },\n \"FullScan\"\n ]\n ]\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"Err\": \"DivisionByZero\"\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n },\n \"outputs\": [\n 15\n ]\n }\n }\n }\n ],\n \"sources\": []\n }\n },\n \"fast_path\": {\n \"text\": \"Explained Query (fast path):\\n Finish limit=1 output=[#0]\\n Project (#15)\\n Map (error(\\\"division by zero\\\"))\\n ReadIndex on=mz_catalog.mz_sources mz_sources_ind=[*** full scan ***]\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"Explained Query (fast path)\",\n \"plan\": {\n \"PeekExisting\": [\n {\n \"System\": 463\n },\n {\n \"System\": 739\n },\n null,\n {\n \"mfp\": {\n \"expressions\": [\n {\n \"Literal\": [\n {\n \"Err\": \"DivisionByZero\"\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ],\n \"predicates\": [],\n \"projection\": [\n 15\n ],\n \"input_arity\": 15\n }\n }\n ]\n }\n }\n ],\n \"sources\": []\n }\n }\n }\n },\n \"insights\": {\n \"imports\": {\n \"s739\": {\n \"name\": {\n \"schema\": \"mz_catalog\",\n \"item\": \"mz_sources_ind\"\n },\n \"type\": \"compute\"\n }\n },\n \"fast_path_clusters\": {},\n \"fast_path_limit\": null,\n \"persist_count\": []\n },\n \"cluster\": {\n \"name\": \"mz_catalog_server\",\n \"id\": {\n \"System\": 2\n }\n },\n \"redacted_sql\": \"SELECT '' / '' FROM [s463 AS mz_catalog.mz_sources] LIMIT ''\"\n}","code":"MZ001","severity":"notice"}} {"type":"CommandStarting","payload":{"has_rows":false,"is_streaming":false}} {"type":"Error","payload":{"message":"division by zero","code":"XX000"}} {"type":"ReadyForQuery","payload":"I"} diff --git a/src/expr/src/explain/text.rs b/src/expr/src/explain/text.rs index e128c1d603105..398f73c7bba29 100644 --- a/src/expr/src/explain/text.rs +++ b/src/expr/src/explain/text.rs @@ -484,7 +484,7 @@ impl MirRelationExpr { Project { outputs, input } => { FmtNode { fmt_root: |f, ctx| { - let outputs = mode.seq(outputs, self.column_names(ctx)); + let outputs = mode.seq(outputs, input.column_names(ctx)); let outputs = CompactScalars(outputs); write!(f, "{}Project ({})", ctx.indent, outputs)?; self.fmt_analyses(f, ctx) @@ -496,6 +496,9 @@ impl MirRelationExpr { Map { scalars, input } => { FmtNode { fmt_root: |f, ctx: &mut PlanRenderingContext<'_, MirRelationExpr>| { + // Here, it's better to refer to `self.column_names(ctx)` rather than + // `input.column_names(ctx)`, because then we also get humanization for refs + // to cols introduced earlier by the same `Map`. let scalars = mode.seq(scalars, self.column_names(ctx)); let scalars = CompactScalars(scalars); write!(f, "{}Map ({})", ctx.indent, scalars)?; diff --git a/src/expr/src/relation.rs b/src/expr/src/relation.rs index 1d0e76b353d38..009672e811e83 100644 --- a/src/expr/src/relation.rs +++ b/src/expr/src/relation.rs @@ -3340,16 +3340,9 @@ impl AggregateExpr { match self { AggregateExpr { func: AggregateFunc::Count, - expr: - MirScalarExpr::Literal( - Ok(row), - mz_repr::ColumnType { - scalar_type: mz_repr::ScalarType::Bool, - nullable: false, - }, - ), - .. - } => row.unpack_first() == mz_repr::Datum::True, + expr, + distinct: false, + } => expr.is_literal_true(), _ => false, } } diff --git a/src/expr/src/scalar.proto b/src/expr/src/scalar.proto index c83dc3667f8b1..7d8d9bef04d63 100644 --- a/src/expr/src/scalar.proto +++ b/src/expr/src/scalar.proto @@ -455,6 +455,7 @@ message ProtoUnaryFunc { ProtoToCharTimestamp to_char_timestamp = 331; ProtoToCharTimestamp to_char_timestamp_tz = 332; google.protobuf.Empty cast_date_to_mz_timestamp = 333; + google.protobuf.Empty bit_count_bytes = 334; } } @@ -668,6 +669,7 @@ message ProtoBinaryFunc { bool list_contains_list = 193; bool array_contains_array = 194; google.protobuf.Empty starts_with = 195; + google.protobuf.Empty get_bit = 196; } } diff --git a/src/expr/src/scalar/func.rs b/src/expr/src/scalar/func.rs index 0bd5d5d69ebfc..c6c01731b5156 100644 --- a/src/expr/src/scalar/func.rs +++ b/src/expr/src/scalar/func.rs @@ -1361,6 +1361,27 @@ fn power_numeric<'a>(a: Datum<'a>, b: Datum<'a>) -> Result, EvalError> } } +fn get_bit<'a>(a: Datum<'a>, b: Datum<'a>) -> Result, EvalError> { + let bytes = a.unwrap_bytes(); + let index = b.unwrap_int32(); + let err = EvalError::IndexOutOfRange { + provided: index, + valid_end: i32::try_from(bytes.len().saturating_mul(8)).unwrap() - 1, + }; + + let index = usize::try_from(index).map_err(|_| err.clone())?; + + let byte_index = index / 8; + let bit_index = index % 8; + + let i = bytes + .get(byte_index) + .map(|b| (*b >> bit_index) & 1) + .ok_or(err)?; + assert!(i == 0 || i == 1); + Ok(Datum::from(i32::from(i))) +} + fn get_byte<'a>(a: Datum<'a>, b: Datum<'a>) -> Result, EvalError> { let bytes = a.unwrap_bytes(); let index = b.unwrap_int32(); @@ -2344,6 +2365,7 @@ pub enum BinaryFunc { LogNumeric, Power, PowerNumeric, + GetBit, GetByte, ConstantTimeEqBytes, ConstantTimeEqString, @@ -2607,6 +2629,7 @@ impl BinaryFunc { BinaryFunc::Power => power(a, b), BinaryFunc::PowerNumeric => power_numeric(a, b), BinaryFunc::RepeatString => repeat_string(a, b, temp_storage), + BinaryFunc::GetBit => get_bit(a, b), BinaryFunc::GetByte => get_byte(a, b), BinaryFunc::ConstantTimeEqBytes => constant_time_eq_bytes(a, b), BinaryFunc::ConstantTimeEqString => constant_time_eq_string(a, b), @@ -2804,6 +2827,7 @@ impl BinaryFunc { ScalarType::Numeric { max_scale: None }.nullable(in_nullable) } + GetBit => ScalarType::Int32.nullable(in_nullable), GetByte => ScalarType::Int32.nullable(in_nullable), ConstantTimeEqBytes | ConstantTimeEqString => { @@ -3023,6 +3047,7 @@ impl BinaryFunc { | LogNumeric | Power | PowerNumeric + | GetBit | GetByte | RangeContainsElem { .. } | RangeContainsRange { .. } @@ -3241,6 +3266,7 @@ impl BinaryFunc { | ListRemove | LikeEscape | UuidGenerateV5 + | GetBit | GetByte | MzAclItemContainsPrivilege | ConstantTimeEqBytes @@ -3508,7 +3534,8 @@ impl BinaryFunc { | BinaryFunc::Decode => (false, false), // TODO: it may be safe to treat these as monotone. BinaryFunc::LogNumeric | BinaryFunc::Power | BinaryFunc::PowerNumeric => (false, false), - BinaryFunc::GetByte + BinaryFunc::GetBit + | BinaryFunc::GetByte | BinaryFunc::RangeContainsElem { .. } | BinaryFunc::RangeContainsRange { .. } | BinaryFunc::RangeOverlaps @@ -3716,6 +3743,7 @@ impl fmt::Display for BinaryFunc { BinaryFunc::Power => f.write_str("power"), BinaryFunc::PowerNumeric => f.write_str("power_numeric"), BinaryFunc::RepeatString => f.write_str("repeat"), + BinaryFunc::GetBit => f.write_str("get_bit"), BinaryFunc::GetByte => f.write_str("get_byte"), BinaryFunc::ConstantTimeEqBytes => f.write_str("constant_time_compare_bytes"), BinaryFunc::ConstantTimeEqString => f.write_str("constant_time_compare_strings"), @@ -4140,6 +4168,7 @@ impl RustType for BinaryFunc { BinaryFunc::LogNumeric => LogNumeric(()), BinaryFunc::Power => Power(()), BinaryFunc::PowerNumeric => PowerNumeric(()), + BinaryFunc::GetBit => GetBit(()), BinaryFunc::GetByte => GetByte(()), BinaryFunc::RangeContainsElem { elem_type, rev } => { RangeContainsElem(crate::scalar::proto_binary_func::ProtoRangeContainsInner { @@ -4360,6 +4389,7 @@ impl RustType for BinaryFunc { LogNumeric(()) => Ok(BinaryFunc::LogNumeric), Power(()) => Ok(BinaryFunc::Power), PowerNumeric(()) => Ok(BinaryFunc::PowerNumeric), + GetBit(()) => Ok(BinaryFunc::GetBit), GetByte(()) => Ok(BinaryFunc::GetByte), RangeContainsElem(inner) => Ok(BinaryFunc::RangeContainsElem { elem_type: inner @@ -4799,6 +4829,7 @@ derive_unary!( FloorFloat64, FloorNumeric, Ascii, + BitCountBytes, BitLengthBytes, BitLengthString, ByteLengthBytes, @@ -5209,6 +5240,7 @@ impl Arbitrary for UnaryFunc { FloorFloat64::arbitrary().prop_map_into().boxed(), FloorNumeric::arbitrary().prop_map_into().boxed(), Ascii::arbitrary().prop_map_into().boxed(), + BitCountBytes::arbitrary().prop_map_into().boxed(), BitLengthBytes::arbitrary().prop_map_into().boxed(), BitLengthString::arbitrary().prop_map_into().boxed(), ByteLengthBytes::arbitrary().prop_map_into().boxed(), @@ -5597,6 +5629,7 @@ impl RustType for UnaryFunc { UnaryFunc::FloorFloat64(_) => FloorFloat64(()), UnaryFunc::FloorNumeric(_) => FloorNumeric(()), UnaryFunc::Ascii(_) => Ascii(()), + UnaryFunc::BitCountBytes(_) => BitCountBytes(()), UnaryFunc::BitLengthBytes(_) => BitLengthBytes(()), UnaryFunc::BitLengthString(_) => BitLengthString(()), UnaryFunc::ByteLengthBytes(_) => ByteLengthBytes(()), @@ -6071,6 +6104,7 @@ impl RustType for UnaryFunc { FloorFloat64(_) => Ok(impls::FloorFloat64.into()), FloorNumeric(_) => Ok(impls::FloorNumeric.into()), Ascii(_) => Ok(impls::Ascii.into()), + BitCountBytes(_) => Ok(impls::BitCountBytes.into()), BitLengthBytes(_) => Ok(impls::BitLengthBytes.into()), BitLengthString(_) => Ok(impls::BitLengthString.into()), ByteLengthBytes(_) => Ok(impls::ByteLengthBytes.into()), diff --git a/src/expr/src/scalar/func/impls/byte.rs b/src/expr/src/scalar/func/impls/byte.rs index 227aa8a0227aa..bd2e0c4aab1aa 100644 --- a/src/expr/src/scalar/func/impls/byte.rs +++ b/src/expr/src/scalar/func/impls/byte.rs @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use mz_ore::cast::CastFrom; use mz_repr::strconv; use crate::EvalError; @@ -64,6 +65,14 @@ sqlfunc!( } ); +sqlfunc!( + #[sqlname = "bit_count"] + fn bit_count_bytes<'a>(a: &'a [u8]) -> Result { + let count: u64 = a.iter().map(|b| u64::cast_from(b.count_ones())).sum(); + i64::try_from(count).or(Err(EvalError::Int64OutOfRange(count.to_string().into()))) + } +); + sqlfunc!( #[sqlname = "bit_length"] fn bit_length_bytes<'a>(a: &'a [u8]) -> Result { diff --git a/src/materialized/BUILD.bazel b/src/materialized/BUILD.bazel index 469d3df28f3f1..7c90dcae249f8 100644 --- a/src/materialized/BUILD.bazel +++ b/src/materialized/BUILD.bazel @@ -29,7 +29,7 @@ rust_binary( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/clusterd:mz_clusterd", "//src/environmentd:mz_environmentd", diff --git a/src/materialized/Cargo.toml b/src/materialized/Cargo.toml index c9f7c167f2fa0..d4fe683eff023 100644 --- a/src/materialized/Cargo.toml +++ b/src/materialized/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mz-materialized" description = "Materialize's unified binary." -version = "0.130.0-dev.0" +version = "0.130.13" edition.workspace = true rust-version.workspace = true publish = false diff --git a/src/materialized/ci/mzbuild.yml b/src/materialized/ci/mzbuild.yml index ba673a942ec20..cbac0dc1f8bd6 100644 --- a/src/materialized/ci/mzbuild.yml +++ b/src/materialized/ci/mzbuild.yml @@ -14,4 +14,3 @@ pre-image: bin: [ materialized ] bazel-bin: materialized: "@//src/materialized:materialized" - strip: false diff --git a/src/mysql-util/src/decoding.rs b/src/mysql-util/src/decoding.rs index a3a604403ca29..15076c9ca0d33 100644 --- a/src/mysql-util/src/decoding.rs +++ b/src/mysql-util/src/decoding.rs @@ -95,7 +95,35 @@ fn pack_val_as_datum( ScalarType::Int16 => packer.push(Datum::from(from_value_opt::(value)?)), ScalarType::UInt32 => packer.push(Datum::from(from_value_opt::(value)?)), ScalarType::Int32 => packer.push(Datum::from(from_value_opt::(value)?)), - ScalarType::UInt64 => packer.push(Datum::from(from_value_opt::(value)?)), + ScalarType::UInt64 => { + if let Some(MySqlColumnMeta::Bit(precision)) = &col_desc.meta { + let mut value = from_value_opt::>(value)?; + + // Ensure we have the correct number of bytes. + let precision_bytes = (precision + 7) / 8; + if value.len() != usize::cast_from(precision_bytes) { + return Err(anyhow::anyhow!("'bit' column out of range!")); + } + // Be defensive and prune any bits that come over the wire and are + // greater than our precision. + let bit_index = precision % 8; + if bit_index != 0 { + let mask = !(u8::MAX << bit_index); + if value.len() > 0 { + value[0] &= mask; + } + } + + // Based on experimentation the value coming across the wire is + // encoded in big-endian. + let mut buf = [0u8; 8]; + buf[(8 - value.len())..].copy_from_slice(value.as_slice()); + let value = u64::from_be_bytes(buf); + packer.push(Datum::from(value)) + } else { + packer.push(Datum::from(from_value_opt::(value)?)) + } + } ScalarType::Int64 => packer.push(Datum::from(from_value_opt::(value)?)), ScalarType::Float32 => packer.push(Datum::from(from_value_opt::(value)?)), ScalarType::Float64 => packer.push(Datum::from(from_value_opt::(value)?)), @@ -198,6 +226,7 @@ fn pack_val_as_datum( ))?; } } + Some(MySqlColumnMeta::Bit(_)) => unreachable!("parsed as a u64"), None => { packer.push(Datum::String(&from_value_opt::(value)?)); } diff --git a/src/mysql-util/src/desc.proto b/src/mysql-util/src/desc.proto index a827ac8c5afc4..2e7940d5bf3fa 100644 --- a/src/mysql-util/src/desc.proto +++ b/src/mysql-util/src/desc.proto @@ -34,6 +34,10 @@ message ProtoMySqlColumnMetaTimestamp { uint32 precision = 1; } +message ProtoMySqlColumnMetaBit { + uint32 precision = 1; +} + message ProtoMySqlColumnDesc { string name = 1; optional mz_repr.relation_and_scalar.ProtoColumnType column_type = 2; @@ -44,6 +48,7 @@ message ProtoMySqlColumnDesc { ProtoMySqlColumnMetaYear year = 5; ProtoMySqlColumnMetaDate date = 6; ProtoMySqlColumnMetaTimestamp timestamp = 7; + ProtoMySqlColumnMetaBit bit = 8; } } diff --git a/src/mysql-util/src/desc.rs b/src/mysql-util/src/desc.rs index 2e4c8263aa9a4..b46c89af2223f 100644 --- a/src/mysql-util/src/desc.rs +++ b/src/mysql-util/src/desc.rs @@ -169,6 +169,8 @@ pub enum MySqlColumnMeta { Date, /// The described column is a timestamp value with a set precision. Timestamp(u32), + /// The described column is a `bit` column, with the given possibly precision. + Bit(u32), } impl IsCompatible for Option { @@ -195,6 +197,9 @@ impl IsCompatible for Option { Some(MySqlColumnMeta::Timestamp(precision)), Some(MySqlColumnMeta::Timestamp(other_precision)), ) => precision <= other_precision, + // We always cast bit columns to u64's and the max precision of a bit column + // is 64 bits, so any bit column is always compatible with another. + (Some(MySqlColumnMeta::Bit(_)), Some(MySqlColumnMeta::Bit(_))) => true, _ => false, } } @@ -226,6 +231,9 @@ impl RustType for MySqlColumnDesc { precision: *precision, })) } + MySqlColumnMeta::Bit(precision) => Some(Meta::Bit(ProtoMySqlColumnMetaBit { + precision: *precision, + })), }), } } @@ -245,6 +253,7 @@ impl RustType for MySqlColumnDesc { Meta::Year(_) => Some(Ok(MySqlColumnMeta::Year)), Meta::Date(_) => Some(Ok(MySqlColumnMeta::Date)), Meta::Timestamp(e) => Some(Ok(MySqlColumnMeta::Timestamp(e.precision))), + Meta::Bit(e) => Some(Ok(MySqlColumnMeta::Bit(e.precision))), }) .transpose()?, }) diff --git a/src/mysql-util/src/schemas.rs b/src/mysql-util/src/schemas.rs index a2d0e9e8c4917..e9b9ee29910ac 100644 --- a/src/mysql-util/src/schemas.rs +++ b/src/mysql-util/src/schemas.rs @@ -165,13 +165,13 @@ impl MySqlTableSchema { // Collect the parsed data types or errors for later reporting. match parse_data_type(&info, &self.schema_name, &self.name) { Err(err) => error_cols.push(err), - Ok(scalar_type) => columns.push(MySqlColumnDesc { + Ok((scalar_type, meta)) => columns.push(MySqlColumnDesc { name: info.column_name, column_type: Some(ColumnType { scalar_type, nullable: &info.is_nullable == "YES", }), - meta: None, + meta, }), } } @@ -346,35 +346,35 @@ fn parse_data_type( info: &InfoSchema, schema_name: &str, table_name: &str, -) -> Result { +) -> Result<(ScalarType, Option), UnsupportedDataType> { let unsigned = info.column_type.contains("unsigned"); - match info.data_type.as_str() { + let scalar_type = match info.data_type.as_str() { "tinyint" | "smallint" => { if unsigned { - Ok(ScalarType::UInt16) + ScalarType::UInt16 } else { - Ok(ScalarType::Int16) + ScalarType::Int16 } } "mediumint" | "int" => { if unsigned { - Ok(ScalarType::UInt32) + ScalarType::UInt32 } else { - Ok(ScalarType::Int32) + ScalarType::Int32 } } "bigint" => { if unsigned { - Ok(ScalarType::UInt64) + ScalarType::UInt64 } else { - Ok(ScalarType::Int64) + ScalarType::Int64 } } - "float" => Ok(ScalarType::Float32), - "double" => Ok(ScalarType::Float64), - "date" => Ok(ScalarType::Date), - "datetime" | "timestamp" => Ok(ScalarType::Timestamp { + "float" => ScalarType::Float32, + "double" => ScalarType::Float64, + "date" => ScalarType::Date, + "datetime" | "timestamp" => ScalarType::Timestamp { // both mysql and our scalar type use a max six-digit fractional-second precision // this is bounds-checked in the TryFrom impl precision: info @@ -387,8 +387,8 @@ fn parse_data_type( column_name: info.column_name.clone(), intended_type: None, })?, - }), - "time" => Ok(ScalarType::Time), + }, + "time" => ScalarType::Time, "decimal" | "numeric" => { // validate the precision is within the bounds of our numeric type // here since we don't use this precision on the ScalarType itself @@ -401,7 +401,7 @@ fn parse_data_type( intended_type: None, })? } - Ok(ScalarType::Numeric { + ScalarType::Numeric { max_scale: info .numeric_scale .map(NumericMaxScale::try_from) @@ -412,9 +412,9 @@ fn parse_data_type( column_name: info.column_name.clone(), intended_type: None, })?, - }) + } } - "char" => Ok(ScalarType::Char { + "char" => ScalarType::Char { length: info .character_maximum_length .and_then(|f| Some(CharLength::try_from(f))) @@ -425,8 +425,8 @@ fn parse_data_type( column_name: info.column_name.clone(), intended_type: None, })?, - }), - "varchar" => Ok(ScalarType::VarChar { + }, + "varchar" => ScalarType::VarChar { max_length: info .character_maximum_length .and_then(|f| Some(VarCharMaxLength::try_from(f))) @@ -437,19 +437,37 @@ fn parse_data_type( column_name: info.column_name.clone(), intended_type: None, })?, - }), - "text" | "tinytext" | "mediumtext" | "longtext" => Ok(ScalarType::String), + }, + "text" | "tinytext" | "mediumtext" | "longtext" => ScalarType::String, "binary" | "varbinary" | "tinyblob" | "blob" | "mediumblob" | "longblob" => { - Ok(ScalarType::Bytes) + ScalarType::Bytes } - "json" => Ok(ScalarType::Jsonb), - _ => Err(UnsupportedDataType { - column_type: info.column_type.clone(), - qualified_table_name: format!("{:?}.{:?}", schema_name, table_name), - column_name: info.column_name.clone(), - intended_type: None, - }), - } + "json" => ScalarType::Jsonb, + // TODO(mysql): Support the `bit` type natively in Materialize. + "bit" => { + let precision = match info.numeric_precision { + Some(x @ 0..=64) => u32::try_from(x).expect("known good value"), + prec => { + mz_ore::soft_panic_or_log!( + "found invalid bit precision, {prec:?}, falling back" + ); + 64u32 + } + }; + return Ok((ScalarType::UInt64, Some(MySqlColumnMeta::Bit(precision)))); + } + typ => { + tracing::warn!(?typ, "found unsupported data type"); + return Err(UnsupportedDataType { + column_type: info.column_type.clone(), + qualified_table_name: format!("{:?}.{:?}", schema_name, table_name), + column_name: info.column_name.clone(), + intended_type: None, + }); + } + }; + + Ok((scalar_type, None)) } /// Parse the specified column as a TEXT COLUMN. We only support the set of types that are diff --git a/src/mz/Cargo.toml b/src/mz/Cargo.toml index 340f18cafc756..949f96dbbf321 100644 --- a/src/mz/Cargo.toml +++ b/src/mz/Cargo.toml @@ -25,7 +25,7 @@ mz-ore = { path = "../ore", features = ["async", "cli", "test"] } open = "3.2.0" openssl-probe = "0.1.2" hyper = "1.4.1" -reqwest = { version = "0.11", features = ["blocking", "json"] } +reqwest = { version = "0.12", features = ["blocking", "json", "default-tls", "charset", "http2"], default-features = false } rpassword = "7.2.0" semver = "1.0.16" serde = { version = "1.0.152", features = ["derive"] } diff --git a/src/orchestrator-kubernetes/src/lib.rs b/src/orchestrator-kubernetes/src/lib.rs index 26036dc5593d3..e61f80578faad 100644 --- a/src/orchestrator-kubernetes/src/lib.rs +++ b/src/orchestrator-kubernetes/src/lib.rs @@ -896,11 +896,10 @@ impl NamespacedOrchestrator for NamespacedKubernetesOrchestrator { }; let container_name = image - .splitn(2, '/') - .skip(1) - .next() - .and_then(|name_version| name_version.splitn(2, ':').next()) + .rsplit_once('/') + .and_then(|(_, name_version)| name_version.rsplit_once(':')) .context("`image` is not ORG/NAME:VERSION")? + .0 .to_string(); let container_security_context = if scheduling_config.security_context_enabled { diff --git a/src/orchestrator-process/src/lib.rs b/src/orchestrator-process/src/lib.rs index 0d0da50e97767..8e1c9a295127d 100644 --- a/src/orchestrator-process/src/lib.rs +++ b/src/orchestrator-process/src/lib.rs @@ -32,9 +32,10 @@ use futures::stream::{BoxStream, FuturesUnordered, TryStreamExt}; use itertools::Itertools; use libc::{SIGABRT, SIGBUS, SIGILL, SIGSEGV, SIGTRAP}; use maplit::btreemap; +use mz_orchestrator::scheduling_config::ServiceSchedulingConfig; use mz_orchestrator::{ CpuLimit, DiskLimit, MemoryLimit, NamespacedOrchestrator, Orchestrator, Service, ServiceConfig, - ServiceEvent, ServiceProcessMetrics, ServiceStatus, + ServiceEvent, ServicePort, ServiceProcessMetrics, ServiceStatus, }; use mz_ore::cast::{CastFrom, TryCastFrom}; use mz_ore::error::ErrorExt; @@ -286,6 +287,7 @@ impl Orchestrator for ProcessOrchestrator { services, service_event_rx, command_tx, + scheduling_config: Default::default(), _worker: worker, }) })) @@ -326,6 +328,7 @@ struct NamespacedProcessOrchestrator { services: Arc>>>, service_event_rx: broadcast::Receiver, command_tx: mpsc::UnboundedSender, + scheduling_config: std::sync::RwLock, _worker: AbortOnDropHandle<()>, } @@ -342,10 +345,46 @@ impl NamespacedOrchestrator for NamespacedProcessOrchestrator { id: &str, config: ServiceConfig, ) -> Result, anyhow::Error> { + let always_use_disk = self + .scheduling_config + .read() + .expect("poisoned") + .always_use_disk; + let service = ProcessService { run_dir: self.config.service_run_dir(id), scale: config.scale, }; + // Determining whether to enable disk is subtle because we need to + // support historical sizes in the managed service and custom sizes in + // self hosted deployments. + let disk = { + // Whether the user specified `DISK = TRUE` when creating the + // replica OR whether the feature flag to force disk is enabled. + let user_requested_disk = config.disk || always_use_disk; + // Whether the cluster replica size map provided by the + // administrator explicitly indicates that the size does not support + // disk. + let size_disables_disk = config.disk_limit == Some(DiskLimit::ZERO); + // Enable disk if the user requested it and the size does not + // disable it. + // + // Arguably we should not allow the user to request disk with sizes + // that have a zero disk limit, but configuring disk on a replica by + // replica basis is a legacy option that we hope to remove someday. + user_requested_disk && !size_disables_disk + }; + + let config = EnsureServiceConfig { + image: config.image, + args: config.args, + ports: config.ports, + memory_limit: config.memory_limit, + cpu_limit: config.cpu_limit, + scale: config.scale, + labels: config.labels, + disk, + }; self.send_command(WorkerCommand::EnsureService { id: id.to_string(), @@ -408,9 +447,9 @@ impl NamespacedOrchestrator for NamespacedProcessOrchestrator { fn update_scheduling_config( &self, - _config: mz_orchestrator::scheduling_config::ServiceSchedulingConfig, + config: mz_orchestrator::scheduling_config::ServiceSchedulingConfig, ) { - // This orchestrator ignores scheduling constraints. + *self.scheduling_config.write().expect("poisoned") = config; } } @@ -422,7 +461,7 @@ impl NamespacedOrchestrator for NamespacedProcessOrchestrator { enum WorkerCommand { EnsureService { id: String, - config: ServiceConfig, + config: EnsureServiceConfig, }, DropService { id: String, @@ -436,6 +475,32 @@ enum WorkerCommand { }, } +/// Describes the desired state of a process. +struct EnsureServiceConfig { + /// An opaque identifier for the executable or container image to run. + /// + /// Often names a container on Docker Hub or a path on the local machine. + pub image: String, + /// A function that generates the arguments for each process of the service + /// given the assigned listen addresses for each named port. + pub args: Box) -> Vec + Send + Sync>, + /// Ports to expose. + pub ports: Vec, + /// An optional limit on the memory that the service can use. + pub memory_limit: Option, + /// An optional limit on the CPU that the service can use. + pub cpu_limit: Option, + /// The number of copies of this service to run. + pub scale: u16, + /// Arbitrary key–value pairs to attach to the service in the orchestrator + /// backend. + /// + /// The orchestrator backend may apply a prefix to the key if appropriate. + pub labels: BTreeMap, + /// Whether scratch disk space should be allocated for the service. + pub disk: bool, +} + /// A task executing blocking work for a [`NamespacedProcessOrchestrator`] in the background. /// /// This type exists to enable making [`NamespacedProcessOrchestrator::ensure_service`] and @@ -536,35 +601,19 @@ impl OrchestratorWorker { async fn ensure_service( &self, id: String, - ServiceConfig { + EnsureServiceConfig { image, - init_container_image: _, args, ports: ports_in, memory_limit, cpu_limit, scale, labels, - // Scheduling constraints are entirely ignored by the process orchestrator. - availability_zones: _, - other_replicas_selector: _, - replicas_selector: _, disk, - disk_limit, - node_selector: _, - }: ServiceConfig, + }: EnsureServiceConfig, ) -> Result<(), anyhow::Error> { let full_id = self.config.full_id(&id); - // Enable disk if 1) the user requested it when creating the service - // *and* 2) the size declared by the system administrator does not - // specify a disk limit of zero. - // - // Arguably we should not allow enabling disk for sizes with a zero disk - // limit, but configuring disk on a replica by replica basis is a legacy - // option that we hope to remove someday. - let disk = disk && disk_limit != Some(DiskLimit::ZERO); - let run_dir = self.config.service_run_dir(&id); fs::create_dir_all(&run_dir) .await diff --git a/src/orchestratord/BUILD.bazel b/src/orchestratord/BUILD.bazel index b3b42a4ed6899..58af2ad250a90 100644 --- a/src/orchestratord/BUILD.bazel +++ b/src/orchestratord/BUILD.bazel @@ -31,7 +31,7 @@ rust_library( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/alloc:mz_alloc", "//src/alloc-default:mz_alloc_default", @@ -69,7 +69,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/alloc:mz_alloc", "//src/alloc-default:mz_alloc_default", @@ -120,7 +120,7 @@ rust_binary( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ ":mz_orchestratord", "//src/alloc:mz_alloc", diff --git a/src/orchestratord/Cargo.toml b/src/orchestratord/Cargo.toml index 15a9141717cdf..5cdeb4f176330 100644 --- a/src/orchestratord/Cargo.toml +++ b/src/orchestratord/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mz-orchestratord" description = "Kubernetes operator for Materialize regions" -version = "0.130.0-dev.0" +version = "0.130.13" edition.workspace = true rust-version.workspace = true publish = false diff --git a/src/orchestratord/src/controller/materialize.rs b/src/orchestratord/src/controller/materialize.rs index 762a6ea90c7b2..409a1d904bb4f 100644 --- a/src/orchestratord/src/controller/materialize.rs +++ b/src/orchestratord/src/controller/materialize.rs @@ -76,6 +76,8 @@ pub struct MaterializeControllerArgs { enable_security_context: bool, #[clap(long)] enable_internal_statement_logging: bool, + #[clap(long, default_value = "false")] + disable_statement_logging: bool, #[clap(long)] orchestratord_pod_selector_labels: Vec>, @@ -106,6 +108,14 @@ pub struct MaterializeControllerArgs { bootstrap_builtin_catalog_server_cluster_replica_size: Option, #[clap(long)] bootstrap_builtin_analytics_cluster_replica_size: Option, + #[clap(long)] + bootstrap_builtin_system_cluster_replication_factor: Option, + #[clap(long)] + bootstrap_builtin_probe_cluster_replication_factor: Option, + #[clap(long)] + bootstrap_builtin_support_cluster_replication_factor: Option, + #[clap(long)] + bootstrap_builtin_analytics_cluster_replication_factor: Option, #[clap( long, diff --git a/src/orchestratord/src/controller/materialize/environmentd.rs b/src/orchestratord/src/controller/materialize/environmentd.rs index 91faa079aeb6e..7ec0d5d387f41 100644 --- a/src/orchestratord/src/controller/materialize/environmentd.rs +++ b/src/orchestratord/src/controller/materialize/environmentd.rs @@ -833,13 +833,29 @@ fn create_environmentd_statefulset_object( config .bootstrap_builtin_catalog_server_cluster_replica_size .as_ref() - .map(|size| { - format!("--bootstrap-builtin-catalog-server-cluster-replica-size={size}") - }), + .map(|size| format!("--bootstrap-builtin-catalog-server-cluster-replica-size={size}")), config .bootstrap_builtin_analytics_cluster_replica_size .as_ref() .map(|size| format!("--bootstrap-builtin-analytics-cluster-replica-size={size}")), + config + .bootstrap_builtin_system_cluster_replication_factor + .as_ref() + .map(|replication_factor| { + format!("--bootstrap-builtin-system-cluster-replication-factor={replication_factor}") + }), + config + .bootstrap_builtin_probe_cluster_replication_factor + .as_ref() + .map(|replication_factor| format!("--bootstrap-builtin-probe-cluster-replication-factor={replication_factor}")), + config + .bootstrap_builtin_support_cluster_replication_factor + .as_ref() + .map(|replication_factor| format!("--bootstrap-builtin-support-cluster-replication-factor={replication_factor}")), + config + .bootstrap_builtin_analytics_cluster_replication_factor + .as_ref() + .map(|replication_factor| format!("--bootstrap-builtin-analytics-cluster-replication-factor={replication_factor}")), ] .into_iter() .flatten(), @@ -893,6 +909,11 @@ fn create_environmentd_statefulset_object( if config.enable_internal_statement_logging { args.push("--system-parameter-default=enable_internal_statement_logging=true".into()); } + + if config.disable_statement_logging { + args.push("--system-parameter-default=statement_logging_max_sample_rate=0".into()); + } + if config.disable_authentication { args.push("--system-parameter-default=enable_rbac_checks=false".into()); } diff --git a/src/ore/src/lgbytes.rs b/src/ore/src/lgbytes.rs index 883ce8e427ef9..cb2b3971ae298 100644 --- a/src/ore/src/lgbytes.rs +++ b/src/ore/src/lgbytes.rs @@ -179,6 +179,8 @@ impl Buf for LgBytes { pub struct LgBytesMetrics { /// Metrics for the "persist_s3" usage of [LgBytes]. pub persist_s3: LgBytesOpMetrics, + /// Metrics for the "persist_azure" usage of [LgBytes]. + pub persist_azure: LgBytesOpMetrics, /// Metrics for the "persist_arrow" usage of [LgBytes]. pub persist_arrow: LgBytesOpMetrics, } @@ -272,6 +274,7 @@ impl LgBytesMetrics { }; LgBytesMetrics { persist_s3: op("persist_s3"), + persist_azure: op("persist_azure"), persist_arrow: op("persist_arrow"), } } diff --git a/src/ore/src/region.rs b/src/ore/src/region.rs index 9fcc974dac9e8..c04b4a4273cde 100644 --- a/src/ore/src/region.rs +++ b/src/ore/src/region.rs @@ -19,6 +19,10 @@ use std::fmt::{Debug, Formatter}; use std::mem::ManuallyDrop; use std::ops::{Deref, DerefMut}; +/// Enable allocations through `new_auto` to use lgalloc. `new_mmap` will always use lgalloc. +pub const ENABLE_LGALLOC_REGION: std::sync::atomic::AtomicBool = + std::sync::atomic::AtomicBool::new(false); + /// A region allocator which holds items at stable memory locations. /// /// Items once inserted will not be moved, and their locations in memory @@ -253,11 +257,14 @@ impl Region { /// Returns a [`Region::MMap`] if possible, and falls back to [`Region::Heap`] otherwise. #[must_use] pub fn new_auto(capacity: usize) -> Region { - match Region::new_mmap(capacity) { - Ok(r) => return r, - Err(lgalloc::AllocError::Disabled) | Err(lgalloc::AllocError::InvalidSizeClass(_)) => {} - Err(e) => { - eprintln!("lgalloc error: {e}, falling back to heap"); + if ENABLE_LGALLOC_REGION.load(std::sync::atomic::Ordering::Relaxed) { + match Region::new_mmap(capacity) { + Ok(r) => return r, + Err(lgalloc::AllocError::Disabled) + | Err(lgalloc::AllocError::InvalidSizeClass(_)) => {} + Err(e) => { + eprintln!("lgalloc error: {e}, falling back to heap"); + } } } // Fall-through diff --git a/src/persist-cli/BUILD.bazel b/src/persist-cli/BUILD.bazel index 70f6077f003f5..06a71e3dd6c83 100644 --- a/src/persist-cli/BUILD.bazel +++ b/src/persist-cli/BUILD.bazel @@ -31,6 +31,7 @@ rust_binary( rustc_flags = [], version = "0.0.0", deps = [ + "//src/dyncfg:mz_dyncfg", "//src/http-util:mz_http_util", "//src/orchestrator-tracing:mz_orchestrator_tracing", "//src/ore:mz_ore", diff --git a/src/persist-cli/Cargo.toml b/src/persist-cli/Cargo.toml index 648b829feb944..c387de9553c75 100644 --- a/src/persist-cli/Cargo.toml +++ b/src/persist-cli/Cargo.toml @@ -26,6 +26,7 @@ clap = { version = "4.5.23", features = ["derive", "env"] } differential-dataflow = "0.13.3" futures = "0.3.25" humantime = "2.1.0" +mz-dyncfg = { path = "../dyncfg" } mz-http-util = { path = "../http-util" } mz-orchestrator-tracing = { path = "../orchestrator-tracing" } mz-ore = { path = "../ore", features = ["bytes_", "network", "tracing_", "test"] } diff --git a/src/persist-cli/src/maelstrom/txn_list_append_multi.rs b/src/persist-cli/src/maelstrom/txn_list_append_multi.rs index 2c2bd513f4dbf..9bc661e93c33d 100644 --- a/src/persist-cli/src/maelstrom/txn_list_append_multi.rs +++ b/src/persist-cli/src/maelstrom/txn_list_append_multi.rs @@ -19,6 +19,7 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use async_trait::async_trait; use differential_dataflow::consolidation::consolidate_updates; +use mz_dyncfg::ConfigUpdates; use mz_ore::metrics::MetricsRegistry; use mz_ore::now::{NOW_ZERO, SYSTEM_TIME}; use mz_persist::cfg::{BlobConfig, ConsensusConfig}; @@ -372,6 +373,14 @@ impl Service for TransactorService { let mut config = PersistConfig::new_default_configs(&mz_persist_client::BUILD_INFO, SYSTEM_TIME.clone()); + { + // We only use the Postgres tuned queries when connected to vanilla + // Postgres, so we always want to enable them for testing. + let mut updates = ConfigUpdates::default(); + updates.add(&mz_persist::postgres::USE_POSTGRES_TUNED_QUERIES, true); + config.apply_from(&updates); + } + let metrics_registry = MetricsRegistry::new(); let metrics = Arc::new(PersistMetrics::new(&config, &metrics_registry)); @@ -417,6 +426,7 @@ impl Service for TransactorService { consensus_uri, Box::new(config.clone()), metrics.postgres_consensus.clone(), + Arc::clone(&config.configs), ) .expect("consensus_uri should be valid"); loop { diff --git a/src/persist-cli/src/maelstrom/txn_list_append_single.rs b/src/persist-cli/src/maelstrom/txn_list_append_single.rs index 2cbf5e3703a76..a770c5b051870 100644 --- a/src/persist-cli/src/maelstrom/txn_list_append_single.rs +++ b/src/persist-cli/src/maelstrom/txn_list_append_single.rs @@ -17,6 +17,7 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use async_trait::async_trait; use differential_dataflow::consolidation::consolidate_updates; use differential_dataflow::lattice::Lattice; +use mz_dyncfg::ConfigUpdates; use mz_ore::metrics::MetricsRegistry; use mz_ore::now::SYSTEM_TIME; use mz_persist::cfg::{BlobConfig, ConsensusConfig}; @@ -608,6 +609,14 @@ impl Service for TransactorService { let mut config = PersistConfig::new_default_configs(&mz_persist_client::BUILD_INFO, SYSTEM_TIME.clone()); + { + // We only use the Postgres tuned queries when connected to vanilla + // Postgres, so we always want to enable them for testing. + let mut updates = ConfigUpdates::default(); + updates.add(&mz_persist::postgres::USE_POSTGRES_TUNED_QUERIES, true); + config.apply_from(&updates); + } + let metrics = Arc::new(Metrics::new(&config, &MetricsRegistry::new())); // Construct requested Blob. @@ -652,6 +661,7 @@ impl Service for TransactorService { consensus_uri, Box::new(config.clone()), metrics.postgres_consensus.clone(), + Arc::clone(&config.configs), ) .expect("consensus_uri should be valid"); loop { diff --git a/src/persist-client/BUILD.bazel b/src/persist-client/BUILD.bazel index d43223acc2278..31b86fecb5fc3 100644 --- a/src/persist-client/BUILD.bazel +++ b/src/persist-client/BUILD.bazel @@ -29,7 +29,7 @@ rust_library( proc_macro_deps = ["//src/persist-proc:mz_persist_proc"] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ ":mz_persist_client_build_script", "//src/build-info:mz_build_info", @@ -68,7 +68,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/build-info:mz_build_info", "//src/dyncfg:mz_dyncfg", diff --git a/src/persist-client/Cargo.toml b/src/persist-client/Cargo.toml index 3d7c05a2e9ebb..a29caec2af54d 100644 --- a/src/persist-client/Cargo.toml +++ b/src/persist-client/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mz-persist-client" description = "Client for Materialize pTVC durability system" -version = "0.130.0-dev.0" +version = "0.130.13" edition.workspace = true rust-version.workspace = true publish = false diff --git a/src/persist-client/src/batch.rs b/src/persist-client/src/batch.rs index 41693a1b6983b..61ac3f9e36850 100644 --- a/src/persist-client/src/batch.rs +++ b/src/persist-client/src/batch.rs @@ -61,7 +61,7 @@ use crate::internal::metrics::{BatchWriteMetrics, Metrics, RetryMetrics, ShardMe use crate::internal::paths::{PartId, PartialBatchKey, WriterKey}; use crate::internal::state::{ BatchPart, HollowBatch, HollowBatchPart, HollowRun, HollowRunRef, ProtoInlineBatchPart, - RunMeta, RunOrder, RunPart, WRITE_DIFFS_SUM, + RunMeta, RunOrder, RunPart, }; use crate::stats::{untrimmable_columns, STATS_BUDGET_BYTES, STATS_COLLECTION_ENABLED}; use crate::{PersistConfig, ShardId}; @@ -351,11 +351,9 @@ pub struct BatchBuilderConfig { pub(crate) stats_collection_enabled: bool, pub(crate) stats_budget: usize, pub(crate) stats_untrimmable_columns: Arc, - pub(crate) write_diffs_sum: bool, pub(crate) encoding_config: EncodingConfig, pub(crate) preferred_order: RunOrder, pub(crate) structured_encoding: bool, - pub(crate) record_schema_id: bool, pub(crate) structured_key_lower_len: usize, pub(crate) run_length_limit: usize, /// The number of runs to cap the built batch at, or None if we should @@ -367,7 +365,7 @@ pub struct BatchBuilderConfig { // TODO: Remove this once we're comfortable that there aren't any bugs. pub(crate) const BATCH_DELETE_ENABLED: Config = Config::new( "persist_batch_delete_enabled", - false, + true, "Whether to actually delete blobs when batch delete is called (Materialize).", ); @@ -389,12 +387,6 @@ pub(crate) const ENCODING_COMPRESSION_FORMAT: Config<&'static str> = Config::new "A feature flag to enable compression of Parquet data (Materialize).", ); -pub(crate) const RECORD_SCHEMA_ID: Config = Config::new( - "persist_record_schema_id", - false, - "If set, record the ID for the shard's schema in Part and Run metadata (Materialize).", -); - pub(crate) const STRUCTURED_ORDER: Config = Config::new( "persist_batch_structured_order", true, @@ -473,7 +465,6 @@ impl BatchBuilderConfig { let batch_columnar_format = BatchColumnarFormat::from_str(&BATCH_COLUMNAR_FORMAT.get(value)); - let record_schema_id = RECORD_SCHEMA_ID.get(value); let structured_order = STRUCTURED_ORDER.get(value) && { shard_id.to_string() < STRUCTURED_ORDER_UNTIL_SHARD.get(value) }; @@ -493,14 +484,12 @@ impl BatchBuilderConfig { stats_collection_enabled: STATS_COLLECTION_ENABLED.get(value), stats_budget: STATS_BUDGET_BYTES.get(value), stats_untrimmable_columns: Arc::new(untrimmable_columns(value)), - write_diffs_sum: WRITE_DIFFS_SUM.get(value), encoding_config: EncodingConfig { use_dictionary: ENCODING_ENABLE_DICTIONARY.get(value), compression: CompressionFormat::from_str(&ENCODING_COMPRESSION_FORMAT.get(value)), }, preferred_order, structured_encoding: BUILDER_STRUCTURED.get(value), - record_schema_id, structured_key_lower_len: STRUCTURED_KEY_LOWER_LEN.get(value), run_length_limit: MAX_RUN_LEN.get(value).clamp(2, usize::MAX), max_runs: match MAX_RUNS.get(value) { @@ -688,13 +677,7 @@ where .codecs .val .encode(|| V::encode(val, &mut self.val_buf)); - validate_schema( - &self.builder.write_schemas, - &self.key_buf, - &self.val_buf, - Some(key), - Some(val), - ); + validate_schema(&self.builder.write_schemas, key, val); let update = ( (self.key_buf.as_slice(), self.val_buf.as_slice()), @@ -810,13 +793,6 @@ where ) -> Result, InvalidUsage> { let batch_delete_enabled = self.parts.cfg.batch_delete_enabled; let shard_metrics = Arc::clone(&self.parts.shard_metrics); - // If we haven't switched over to the new schema_id field yet, keep writing the old one. - let (new_schema_id, deprecated_schema_id) = if self.parts.cfg.record_schema_id { - (self.write_schemas.id, None) - } else { - (None, self.write_schemas.id) - }; - let runs = self.parts.finish().await; let mut run_parts = vec![]; @@ -831,8 +807,9 @@ where } run_meta.push(RunMeta { order: Some(order), - schema: new_schema_id, - deprecated_schema: deprecated_schema_id, + schema: self.write_schemas.id, + // Field has been deprecated but kept around to roundtrip state. + deprecated_schema: None, }); run_parts.extend(parts); } @@ -881,32 +858,18 @@ where // inline it at the two callers. pub(crate) fn validate_schema( stats_schemas: &Schemas, - key: &[u8], - val: &[u8], - decoded_key: Option<&K>, - decoded_val: Option<&V>, + decoded_key: &K, + decoded_val: &V, ) { // Attempt to catch any bad schema usage in CI. This is probably too // expensive to run in prod. if !mz_ore::assert::SOFT_ASSERTIONS.load(Ordering::Relaxed) { return; } - let key_valid = match decoded_key { - Some(key) => K::validate(key, &stats_schemas.key), - None => { - let key = K::decode(key, &stats_schemas.key).expect("valid encoded key"); - K::validate(&key, &stats_schemas.key) - } - }; + let key_valid = K::validate(decoded_key, &stats_schemas.key); let () = key_valid .unwrap_or_else(|err| panic!("constructing batch with mismatched key schema: {}", err)); - let val_valid = match decoded_val { - Some(val) => V::validate(val, &stats_schemas.val), - None => { - let val = V::decode(val, &stats_schemas.val).expect("valid encoded val"); - V::validate(&val, &stats_schemas.val) - } - }; + let val_valid = V::validate(decoded_val, &stats_schemas.val); let () = val_valid .unwrap_or_else(|err| panic!("constructing batch with mismatched val schema: {}", err)); } @@ -976,19 +939,15 @@ impl BatchParts { let handle = mz_ore::task::spawn( || "batch::compact_runs", async move { - // If we haven't switched over to the new schema_id field yet, keep writing the old one. - let (new_schema_id, deprecated_schema_id) = if cfg.batch.record_schema_id { - (schemas.id, None) - } else { - (None, schemas.id) - }; let runs: Vec<_> = stream::iter(parts) .then(|(order, parts)| async move { ( RunMeta { order: Some(order), - schema: new_schema_id, - deprecated_schema: deprecated_schema_id, + schema: schemas.id, + // Field has been deprecated but kept around to + // roundtrip state. + deprecated_schema: None, }, parts.into_result().await, ) @@ -1121,11 +1080,6 @@ impl BatchParts { self.next_index += 1; let ts_rewrite = None; let schema_id = write_schemas.id; - - // Decide this once per part and plumb it around as necessary so that we - // use a consistent answer for things like inline threshold. - let record_schema_id = self.cfg.record_schema_id; - let batch_format = self.cfg.batch_columnar_format; // If we're going to encode structured data then halve our limit since we're storing @@ -1135,6 +1089,7 @@ impl BatchParts { BatchColumnarFormat::Both(_) => { self.cfg.inline_writes_single_max_bytes.saturating_div(2) } + BatchColumnarFormat::Structured => self.cfg.inline_writes_single_max_bytes, }; let (name, write_future) = if updates.goodbytes() < inline_threshold { @@ -1162,18 +1117,13 @@ impl BatchParts { batch_metrics .step_inline .inc_by(start.elapsed().as_secs_f64()); - // If we haven't switched over to the new schema_id field yet, keep writing the old one. - let (new_schema_id, deprecated_schema_id) = if record_schema_id { - (schema_id, None) - } else { - (None, schema_id) - }; RunPart::Single(BatchPart::Inline { updates, ts_rewrite, - schema_id: new_schema_id, - deprecated_schema_id, + schema_id, + // Field has been deprecated but kept around to roundtrip state. + deprecated_schema_id: None, }) } .instrument(span) @@ -1408,12 +1358,6 @@ impl BatchParts { } stats }); - // If we haven't switched over to the new schema_id field yet, keep writing the old one. - let (new_schema_id, deprecated_schema_id) = if cfg.record_schema_id { - (schema_id, None) - } else { - (None, schema_id) - }; BatchPart::Hollow(HollowBatchPart { key: partial_key, @@ -1422,10 +1366,11 @@ impl BatchParts { structured_key_lower, stats, ts_rewrite, - diffs_sum: cfg.write_diffs_sum.then_some(diffs_sum), + diffs_sum: Some(diffs_sum), format: Some(cfg.batch_columnar_format), - schema_id: new_schema_id, - deprecated_schema_id, + schema_id, + // Field has been deprecated but kept around to roundtrip state. + deprecated_schema_id: None, }) } diff --git a/src/persist-client/src/cache.rs b/src/persist-client/src/cache.rs index a8ca88902137a..1dcd9da8e7495 100644 --- a/src/persist-client/src/cache.rs +++ b/src/persist-client/src/cache.rs @@ -172,6 +172,7 @@ impl PersistClientCache { x.key(), Box::new(self.cfg.clone()), self.metrics.postgres_consensus.clone(), + Arc::clone(&self.cfg().configs), )?; let consensus = retry_external(&self.metrics.retries.external.consensus_open, || { diff --git a/src/persist-client/src/cfg.rs b/src/persist-client/src/cfg.rs index c708c36030d37..1266f3f67936f 100644 --- a/src/persist-client/src/cfg.rs +++ b/src/persist-client/src/cfg.rs @@ -36,6 +36,11 @@ use crate::operators::STORAGE_SOURCE_DECODE_FUEL; use crate::project::OPTIMIZE_IGNORED_DATA_DECODE; use crate::read::READER_LEASE_DURATION; +const LTS_VERSIONS: &[Version] = &[ + // 25.1 + Version::new(0, 130, 0), +]; + /// The tunable knobs for persist. /// /// Tuning inputs: @@ -120,11 +125,6 @@ pub struct PersistConfig { /// In Compactor::compact_and_apply_background, how many updates to encode or /// decode before voluntarily yielding the task. pub compaction_yield_after_n_updates: usize, - /// The maximum size of the connection pool to Postgres/CRDB when performing - /// consensus reads and writes. - pub consensus_connection_pool_max_size: usize, - /// The maximum time to wait when attempting to obtain a connection from the pool. - pub consensus_connection_pool_max_wait: Option, /// Length of time after a writer's last operation after which the writer /// may be expired. pub writer_lease_duration: Duration, @@ -187,8 +187,6 @@ impl PersistConfig { compaction_concurrency_limit: 5, compaction_queue_size: 20, compaction_yield_after_n_updates: 100_000, - consensus_connection_pool_max_size: 50, - consensus_connection_pool_max_wait: Some(Duration::from_secs(60)), writer_lease_duration: 60 * Duration::from_secs(60), critical_downgrade_interval: Duration::from_secs(30), pubsub_connect_attempt_timeout: Duration::from_secs(5), @@ -315,12 +313,17 @@ pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet { .add(&crate::batch::INLINE_WRITES_SINGLE_MAX_BYTES) .add(&crate::batch::ENCODING_ENABLE_DICTIONARY) .add(&crate::batch::ENCODING_COMPRESSION_FORMAT) - .add(&crate::batch::RECORD_SCHEMA_ID) .add(&crate::batch::STRUCTURED_ORDER) .add(&crate::batch::STRUCTURED_ORDER_UNTIL_SHARD) .add(&crate::batch::STRUCTURED_KEY_LOWER_LEN) .add(&crate::batch::MAX_RUN_LEN) .add(&crate::batch::MAX_RUNS) + .add(&BLOB_OPERATION_TIMEOUT) + .add(&BLOB_OPERATION_ATTEMPT_TIMEOUT) + .add(&BLOB_CONNECT_TIMEOUT) + .add(&BLOB_READ_TIMEOUT) + .add(&crate::cfg::CONSENSUS_CONNECTION_POOL_MAX_SIZE) + .add(&crate::cfg::CONSENSUS_CONNECTION_POOL_MAX_WAIT) .add(&crate::cfg::CONSENSUS_CONNECTION_POOL_TTL_STAGGER) .add(&crate::cfg::CONSENSUS_CONNECTION_POOL_TTL) .add(&crate::cfg::CRDB_CONNECT_TIMEOUT) @@ -357,7 +360,6 @@ pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet { .add(&crate::internal::machine::NEXT_LISTEN_BATCH_RETRYER_MULTIPLIER) .add(&crate::internal::machine::RECORD_COMPACTIONS) .add(&crate::internal::state::ROLLUP_THRESHOLD) - .add(&crate::internal::state::WRITE_DIFFS_SUM) .add(&crate::operators::STORAGE_SOURCE_DECODE_FUEL) .add(&crate::project::OPTIMIZE_IGNORED_DATA_DECODE) .add(&crate::project::OPTIMIZE_IGNORED_DATA_FETCH) @@ -383,6 +385,25 @@ impl PersistConfig { } } +/// Sets the maximum size of the connection pool that is used by consensus. +/// +/// Requires a restart of the process to take effect. +pub const CONSENSUS_CONNECTION_POOL_MAX_SIZE: Config = Config::new( + "persist_consensus_connection_pool_max_size", + 50, + "The maximum size the connection pool to Postgres/CRDB will grow to.", +); + +/// Sets the maximum amount of time we'll wait to acquire a connection from +/// the connection pool. +/// +/// Requires a restart of the process to take effect. +const CONSENSUS_CONNECTION_POOL_MAX_WAIT: Config = Config::new( + "persist_consensus_connection_pool_max_wait", + Duration::from_secs(60), + "The amount of time we'll wait for a connection to become available.", +); + /// The minimum TTL of a connection to Postgres/CRDB before it is proactively /// terminated. Connections are routinely culled to balance load against the /// downstream database. @@ -539,11 +560,11 @@ pub const USAGE_STATE_FETCH_CONCURRENCY_LIMIT: Config = Config::new( impl PostgresClientKnobs for PersistConfig { fn connection_pool_max_size(&self) -> usize { - self.consensus_connection_pool_max_size + CONSENSUS_CONNECTION_POOL_MAX_SIZE.get(self) } fn connection_pool_max_wait(&self) -> Option { - self.consensus_connection_pool_max_wait + Some(CONSENSUS_CONNECTION_POOL_MAX_WAIT.get(self)) } fn connection_pool_ttl(&self) -> Duration { @@ -586,22 +607,45 @@ impl RetryParameters { } } -// TODO: Replace with dynamic values when PersistConfig is integrated with LD +pub(crate) const BLOB_OPERATION_TIMEOUT: Config = Config::new( + "persist_blob_operation_timeout", + Duration::from_secs(180), + "Maximum time allowed for a network call, including retry attempts.", +); + +pub(crate) const BLOB_OPERATION_ATTEMPT_TIMEOUT: Config = Config::new( + "persist_blob_operation_attempt_timeout", + Duration::from_secs(90), + "Maximum time allowed for a single network call.", +); + +pub(crate) const BLOB_CONNECT_TIMEOUT: Config = Config::new( + "persist_blob_connect_timeout", + Duration::from_secs(7), + "Maximum time to wait for a socket connection to be made.", +); + +pub(crate) const BLOB_READ_TIMEOUT: Config = Config::new( + "persist_blob_read_timeout", + Duration::from_secs(10), + "Maximum time to wait to read the first byte of a response, including connection time.", +); + impl BlobKnobs for PersistConfig { fn operation_timeout(&self) -> Duration { - Duration::from_secs(180) + BLOB_OPERATION_TIMEOUT.get(self) } fn operation_attempt_timeout(&self) -> Duration { - Duration::from_secs(90) + BLOB_OPERATION_ATTEMPT_TIMEOUT.get(self) } fn connect_timeout(&self) -> Duration { - Duration::from_secs(7) + BLOB_CONNECT_TIMEOUT.get(self) } fn read_timeout(&self) -> Duration { - Duration::from_secs(10) + BLOB_READ_TIMEOUT.get(self) } fn is_cc_active(&self) -> bool { @@ -609,6 +653,10 @@ impl BlobKnobs for PersistConfig { } } +pub fn check_data_version(code_version: &Version, data_version: &Version) -> Result<(), String> { + check_data_version_with_lts_versions(code_version, data_version, LTS_VERSIONS) +} + // If persist gets some encoded ProtoState from the future (e.g. two versions of // code are running simultaneously against the same shard), it might have a // field that the current code doesn't know about. This would be silently @@ -633,7 +681,44 @@ impl BlobKnobs for PersistConfig { // data we read is going to be because we fetched it using a pointer stored in // some persist state. If we can handle the state, we can handle the blobs it // references, too. -pub fn check_data_version(code_version: &Version, data_version: &Version) -> Result<(), String> { +pub(crate) fn check_data_version_with_lts_versions( + code_version: &Version, + data_version: &Version, + lts_versions: &[Version], +) -> Result<(), String> { + // Allow upgrades specifically between consecutive LTS releases. + let base_code_version = Version { + patch: 0, + ..code_version.clone() + }; + let base_data_version = Version { + patch: 0, + ..data_version.clone() + }; + if data_version >= code_version { + for window in lts_versions.windows(2) { + if base_code_version == window[0] && base_data_version <= window[1] { + return Ok(()); + } + } + + if let Some(last) = lts_versions.last() { + if base_code_version == *last + // kind of arbitrary, but just ensure we don't accidentally + // upgrade too far (the previous check should ensure that a + // new version won't take over from a too-old previous + // version, but we want to make sure the other side also + // doesn't get confused) + && base_data_version + .minor + .saturating_sub(base_code_version.minor) + < 40 + { + return Ok(()); + } + } + } + // Allow one minor version of forward compatibility. We could avoid the // clone with some nested comparisons of the semver fields, but this code // isn't particularly performance sensitive and I find this impl easier to diff --git a/src/persist-client/src/cli/args.rs b/src/persist-client/src/cli/args.rs index 8057cee1d7fda..34a731c9e74cd 100644 --- a/src/persist-client/src/cli/args.rs +++ b/src/persist-client/src/cli/args.rs @@ -130,6 +130,7 @@ pub(super) async fn make_consensus( consensus_uri, Box::new(cfg.clone()), metrics.postgres_consensus.clone(), + Arc::clone(&cfg.configs), )?; let consensus = consensus.clone().open().await?; let consensus = if commit { diff --git a/src/persist-client/src/cli/inspect.rs b/src/persist-client/src/cli/inspect.rs index 29c0d3139463a..d0a1fd480b4eb 100644 --- a/src/persist-client/src/cli/inspect.rs +++ b/src/persist-client/src/cli/inspect.rs @@ -37,7 +37,7 @@ use crate::async_runtime::IsolatedRuntime; use crate::cache::StateCache; use crate::cli::args::{make_blob, make_consensus, StateArgs, NO_COMMIT, READ_ALL_BUILD_INFO}; use crate::error::CodecConcreteType; -use crate::fetch::{Cursor, EncodedPart}; +use crate::fetch::EncodedPart; use crate::internal::encoding::{Rollup, UntypedState}; use crate::internal::paths::{ BlobKey, BlobKeyPrefix, PartialBatchKey, PartialBlobKey, PartialRollupKey, WriterKey, @@ -361,15 +361,19 @@ pub async fn blob_batch_part( desc, updates: Vec::new(), }; - let mut cursor = Cursor::default(); - while let Some(((k, v, t, d), _)) = cursor.pop(&encoded_part) { + let records = encoded_part.normalize(&metrics.columnar); + for ((k, v), t, d) in records + .records() + .expect("only implemented for records") + .iter() + { if out.updates.len() > limit { break; } out.updates.push(BatchPartUpdate { k: format!("{:?}", PrettyBytes(k)), v: format!("{:?}", PrettyBytes(v)), - t, + t: u64::from_le_bytes(t), d: i64::from_le_bytes(d), }); } @@ -408,8 +412,9 @@ async fn consolidated_size(args: &StateArgs) -> Result<(), anyhow::Error> { ) .await .expect("part exists"); - let mut cursor = Cursor::default(); - while let Some(((k, v, mut t, d), _)) = cursor.pop(&encoded_part) { + let part = encoded_part.normalize(&state_versions.metrics.columnar); + for ((k, v), t, d) in part.records().expect("codec records").iter() { + let mut t = ::decode(t); t.advance_by(as_of); let d = ::decode(d); updates.push(((k.to_owned(), v.to_owned()), t, d)); diff --git a/src/persist-client/src/fetch.rs b/src/persist-client/src/fetch.rs index a969327219c41..639023cb19d42 100644 --- a/src/persist-client/src/fetch.rs +++ b/src/persist-client/src/fetch.rs @@ -15,11 +15,12 @@ use std::sync::Arc; use std::time::Instant; use anyhow::anyhow; -use arrow::array::{Array, AsArray, BooleanArray}; +use arrow::array::{Array, AsArray, BooleanArray, Int64Array}; use arrow::compute::FilterBuilder; use differential_dataflow::difference::Semigroup; use differential_dataflow::lattice::Lattice; use differential_dataflow::trace::Description; +use itertools::EitherOrBoth; use mz_dyncfg::{Config, ConfigSet, ConfigValHandle}; use mz_ore::bytes::SegmentedBytes; use mz_ore::cast::CastFrom; @@ -155,7 +156,7 @@ where } let migration = PartMigration::new( - part.part.schema_id(), + &part.part, self.read_schemas.clone(), &mut self.schema_cache, ) @@ -365,7 +366,7 @@ where panic!("{} could not fetch batch part: {}", reader_id, blob_key) }); let part_cfg = BatchFetcherConfig::new(cfg); - let migration = PartMigration::new(part.part.schema_id(), read_schemas, schema_cache) + let migration = PartMigration::new(&part.part, read_schemas, schema_cache) .await .unwrap_or_else(|read_schemas| { panic!( @@ -722,17 +723,21 @@ impl FetchedBlob { metrics: Arc, ts_filter: FetchBatchFilter, - part: EncodedPart, // If migration is Either, then the columnar one will have already been - // applied here. - structured_part: ( - Option<>::Decoder>, - Option<>::Decoder>, - ), - part_decode_format: PartDecodeFormat, + // applied here on the structured data only. + part: EitherOrBoth< + ColumnarRecords, + ( + >::Decoder, + >::Decoder, + ), + >, + timestamps: Int64Array, + diffs: Int64Array, migration: PartMigration, filter_pushdown_audit: Option, - part_cursor: Cursor, + peek_stash: Option<((Result, Result), T, D)>, + part_cursor: usize, key_storage: Option, val_storage: Option, @@ -781,96 +786,91 @@ impl FetchedPart validate_structured, - PartDecodeFormat::Arrow => true, - }; - let structured_part = match (&part.part.updates, should_downcast) { - // Only downcast and create decoders if we have structured data AND - // (an audit of the data is requested OR we'd like to decode - // directly from the structured data). - (BlobTraceUpdates::Both(_codec, structured), true) => { - fn decode( - name: &str, - schema: &C::Schema, - array: &Arc, - ) -> Option<>::Decoder> { - match Schema2::decoder_any(schema, array) { - Ok(x) => Some(x), - Err(err) => { - tracing::error!(?err, "failed to create {} decoder", name); - None - } - } + let downcast_structured = |structured: ColumnarRecordsStructuredExt| { + let key_size_before = ArrayOrd::new(&structured.key).goodbytes(); + + let structured = match &migration { + PartMigration::SameSchema { .. } => structured, + PartMigration::Codec { .. } => { + return None; } - match &migration { - PartMigration::SameSchema { both } => { - let key_size_before = ArrayOrd::new(&structured.key).goodbytes(); - - let key = decode::("key", &*both.key, &structured.key); - let val = decode::("val", &*both.val, &structured.val); - - if let Some(key_decoder) = key.as_ref() { - let key_size_after = key_decoder.goodbytes(); - let key_diff = key_size_before.saturating_sub(key_size_after); - metrics - .pushdown - .parts_projection_trimmed_bytes - .inc_by(u64::cast_from(key_diff)); - } - - (key, val) - } - PartMigration::Codec { .. } => (None, None), - PartMigration::Either { - _write, - read, - key_migration, - val_migration, - } => { - let start = Instant::now(); - let key = key_migration.migrate(Arc::clone(&structured.key)); - let val = val_migration.migrate(Arc::clone(&structured.val)); - metrics - .schema - .migration_migrate_seconds - .inc_by(start.elapsed().as_secs_f64()); - - let key_before_size = ArrayOrd::new(&structured.key).goodbytes(); - let key_after_size = ArrayOrd::new(&key).goodbytes(); - let key_diff = key_before_size.saturating_sub(key_after_size); - metrics - .pushdown - .parts_projection_trimmed_bytes - .inc_by(u64::cast_from(key_diff)); - - ( - decode::("key", &*read.key, &key), - decode::("val", &*read.val, &val), - ) - } + PartMigration::Either { + write: _, + read: _, + key_migration, + val_migration, + } => { + let start = Instant::now(); + let key = key_migration.migrate(structured.key); + let val = val_migration.migrate(structured.val); + metrics + .schema + .migration_migrate_seconds + .inc_by(start.elapsed().as_secs_f64()); + ColumnarRecordsStructuredExt { key, val } + } + }; + + let read_schema = migration.codec_read(); + let key = K::Schema::decoder_any(&*read_schema.key, &*structured.key); + let val = V::Schema::decoder_any(&*read_schema.val, &*structured.val); + + match &key { + Ok(key_decoder) => { + let key_size_after = key_decoder.goodbytes(); + let key_diff = key_size_before.saturating_sub(key_size_after); + metrics + .pushdown + .parts_projection_trimmed_bytes + .inc_by(u64::cast_from(key_diff)); + } + Err(e) => { + soft_panic_or_log!("failed to create decoder: {e:#?}"); } } - _ => (None, None), + + Some((key.ok()?, val.ok()?)) + }; + + let updates = part.normalize(&metrics.columnar); + let timestamps = updates.timestamps().clone(); + let diffs = updates.diffs().clone(); + let part = match updates { + // If only one encoding is available, decode via that encoding. + BlobTraceUpdates::Row(records) => EitherOrBoth::Left(records), + BlobTraceUpdates::Structured { key_values, .. } => EitherOrBoth::Right( + // The structured-only data format was added after schema ids were recorded everywhere, + // so we expect this data to be present. + downcast_structured(key_values).expect("valid schemas for structured data"), + ), + // If both are available, respect the specified part decode format. + BlobTraceUpdates::Both(records, ext) => match part_decode_format { + PartDecodeFormat::Row { + validate_structured: false, + } => EitherOrBoth::Left(records), + PartDecodeFormat::Row { + validate_structured: true, + } => match downcast_structured(ext) { + Some(decoders) => EitherOrBoth::Both(records, decoders), + None => EitherOrBoth::Left(records), + }, + PartDecodeFormat::Arrow => match downcast_structured(ext) { + Some(decoders) => EitherOrBoth::Right(decoders), + None => EitherOrBoth::Left(records), + }, + }, }; FetchedPart { metrics, ts_filter, part, - structured_part, - part_decode_format, + peek_stash: None, + timestamps, + diffs, migration, filter_pushdown_audit, - part_cursor: Cursor::default(), + part_cursor: 0, key_storage: None, val_storage: None, _phantom: PhantomData, @@ -915,74 +915,88 @@ where val: &mut Option, result_override: Option<(K, V)>, ) -> Option<((Result, Result), T, D)> { - while let Some(((k, v, mut t, d), idx)) = self.part_cursor.pop(&self.part) { - if !self.ts_filter.filter_ts(&mut t) { - continue; - } - - let mut d = D::decode(d); - - // If `filter_ts` advances our timestamp, we may end up with the same K, V, T in successive - // records. If so, opportunistically consolidate those out. - while let Some((k_next, v_next, mut t_next, d_next)) = self.part_cursor.peek(&self.part) - { - if (k, v) != (k_next, v_next) { - break; + let mut consolidated = self.peek_stash.take(); + loop { + // Fetch and decode the next tuple in the sequence. (Or break if there is none.) + let next = if self.part_cursor < self.timestamps.len() { + let next_idx = self.part_cursor; + self.part_cursor += 1; + // These `to_le_bytes` calls were previously encapsulated by `ColumnarRecords`. + // TODO(structured): re-encapsulate these once we've finished the structured migration. + let mut t = T::decode(self.timestamps.values()[next_idx].to_le_bytes()); + if !self.ts_filter.filter_ts(&mut t) { + continue; } - - if !self.ts_filter.filter_ts(&mut t_next) { - break; + let d = D::decode(self.diffs.values()[next_idx].to_le_bytes()); + if d.is_zero() { + continue; } - if t != t_next { + let kv = if result_override.is_none() { + self.decode_kv(next_idx, key, val) + } else { + // This will be overridden immediately below - just leave a placeholder here for now. + (Err("".to_string()), Err("".to_string())) + }; + (kv, t, d) + } else { + break; + }; + + // Attempt to consolidate in the next tuple, stashing it if that's not possible. + if let Some((kv, t, d)) = &mut consolidated { + let (kv_next, t_next, d_next) = &next; + if kv == kv_next && t == t_next { + d.plus_equals(d_next); + if d.is_zero() { + consolidated = None; + } + } else { + self.peek_stash = Some(next); break; } - - // All equal... consolidate! - self.part_cursor.idx += 1; - d.plus_equals(&D::decode(d_next)); + } else { + consolidated = Some(next); } + } - // If multiple updates consolidate out entirely, drop the record. - if d.is_zero() { - continue; - } + let (kv, t, d) = consolidated?; - if let Some((key, val)) = result_override { - return Some(((Ok(key), Ok(val)), t, d)); - } + // Override the placeholder result we set above with the true value. + if let Some((key, val)) = result_override { + return Some(((Ok(key), Ok(val)), t, d)); + } - // TODO: Putting this here relies on the Codec data still being - // populated (i.e. for the consolidate optimization above). - // Eventually we'll have to rewrite this path to work entirely - // without Codec data, but in the meantime, putting in here allows - // us to see the performance impact of decoding from arrow instead - // of Codec. - // - // Plus, it'll likely be easier to port all the logic here to work - // solely on arrow data once we finish migrating things like the - // ConsolidatingIter. - if let ((Some(keys), Some(vals)), PartDecodeFormat::Arrow) = - (&self.structured_part, self.part_decode_format) - { - let (k, v) = self.decode_structured(idx, keys, vals, key, val); - return Some(((k, v), t, d)); - } + Some((kv, t, d)) + } - let (k, v) = Self::decode_codec( - &self.metrics, - self.migration.codec_read(), - k, - v, - key, - val, - &mut self.key_storage, - &mut self.val_storage, - ); - // Note: We only provide structured columns, if they were originally written, and a - // dyncfg was specified to run validation. - if let (Some(keys), Some(vals)) = &self.structured_part { - let (k_s, v_s) = self.decode_structured(idx, keys, vals, &mut None, &mut None); + fn decode_kv( + &mut self, + index: usize, + key: &mut Option, + val: &mut Option, + ) -> (Result, Result) { + let decoded = self + .part + .as_ref() + .map_left(|codec| { + let ((ck, cv), _, _) = codec.get(index).expect("valid index"); + Self::decode_codec( + &*self.metrics, + self.migration.codec_read(), + ck, + cv, + key, + val, + &mut self.key_storage, + &mut self.val_storage, + ) + }) + .map_right(|(structured_key, structured_val)| { + self.decode_structured(index, structured_key, structured_val, key, val) + }); + match decoded { + EitherOrBoth::Both((k, v), (k_s, v_s)) => { // Purposefully do not trace to prevent blowing up Sentry. let is_valid = self .metrics @@ -1003,11 +1017,12 @@ where if !is_valid { soft_panic_no_log!("structured val did not match, {v_s:?} != {v:?}"); } - } - return Some(((k, v), t, d)); + (k, v) + } + EitherOrBoth::Left(kv) => kv, + EitherOrBoth::Right(kv) => kv, } - None } fn decode_codec( @@ -1078,7 +1093,7 @@ where fn size_hint(&self) -> (usize, Option) { // We don't know in advance how restrictive the filter will be. - let max_len = self.part.part.updates.len(); + let max_len = self.timestamps.len(); (0, Some(max_len)) } } @@ -1287,6 +1302,12 @@ where timestamps = realloc_array(×tamps, metrics); } + if self.ts_rewrite.is_some() { + self.metrics + .ts_rewrite + .inc_by(u64::cast_from(timestamps.len())); + } + match (codec, structured) { (Some((key, value)), None) => { BlobTraceUpdates::Row(ColumnarRecords::new(key, value, timestamps, diffs)) @@ -1304,103 +1325,6 @@ where } } -/// A pointer into a particular encoded part, with methods for fetching an update and -/// scanning forward to the next. It is an error to use the same cursor for distinct -/// parts. -/// -/// We avoid implementing copy to make it hard to accidentally duplicate a cursor. However, -/// clone is very cheap. -#[derive(Debug, Clone, Default)] -pub(crate) struct Cursor { - idx: usize, -} - -impl Cursor { - /// Get the tuple at the specified pair of indices. If there is no such tuple, - /// either because we are out of range or because this tuple has been filtered out, - /// this returns `None`. - pub fn get<'a, T: Timestamp + Lattice + Codec64>( - &self, - encoded: &'a EncodedPart, - ) -> Option<(&'a [u8], &'a [u8], T, [u8; 8])> { - // TODO(structured): replace before allowing structured-only parts - let part = encoded - .part - .updates - .records() - .expect("created cursor for non-codec data"); - let ((k, v), t, d) = part.get(self.idx)?; - - let mut t = T::decode(t); - // We assert on the write side that at most one of rewrite or - // truncation is used, so it shouldn't matter which is run first. - // - // That said, my (Dan's) intuition here is that rewrite goes first, - // though I don't particularly have a justification for it. - if let Some(ts_rewrite) = encoded.ts_rewrite.as_ref() { - t.advance_by(ts_rewrite.borrow()); - encoded.metrics.ts_rewrite.inc(); - } - - // This filtering is really subtle, see the comment above for - // what's going on here. - let truncated_t = encoded.needs_truncation && { - !encoded.registered_desc.lower().less_equal(&t) - || encoded.registered_desc.upper().less_equal(&t) - }; - if truncated_t { - return None; - } - Some((k, v, t, d)) - } - - /// A cursor points to a particular update in the backing part data. - /// If the update it points to is not valid, advance it to the next valid update - /// if there is one, and return the pointed-to data. - pub fn peek<'a, T: Timestamp + Lattice + Codec64>( - &mut self, - part: &'a EncodedPart, - ) -> Option<(&'a [u8], &'a [u8], T, [u8; 8])> { - while !self.is_exhausted(part) { - let current = self.get(part); - if current.is_some() { - return current; - } - self.advance(part); - } - None - } - - /// Similar to peek, but advance the cursor just past the end of the most recent update. - /// Returns the update and the `(part_idx, idx)` that is was popped at. - pub fn pop<'a, T: Timestamp + Lattice + Codec64>( - &mut self, - part: &'a EncodedPart, - ) -> Option<((&'a [u8], &'a [u8], T, [u8; 8]), usize)> { - while !self.is_exhausted(part) { - let current = self.get(part); - let popped_idx = self.idx; - self.advance(part); - if current.is_some() { - return current.map(|p| (p, popped_idx)); - } - } - None - } - - /// Returns true if the cursor is past the end of the part data. - pub fn is_exhausted(&self, part: &EncodedPart) -> bool { - self.idx >= part.part.updates.len() - } - - /// Advance the cursor just past the end of the most recent update, if there is one. - pub fn advance(&mut self, part: &EncodedPart) { - if !self.is_exhausted(part) { - self.idx += 1; - } - } -} - /// This represents the serde encoding for [`LeasedBatchPart`]. We expose the struct /// itself (unlike other encodable structs) to attempt to provide stricter drop /// semantics on `LeasedBatchPart`, i.e. `SerdeLeasedBatchPart` is exchangeable diff --git a/src/persist-client/src/internal/encoding.rs b/src/persist-client/src/internal/encoding.rs index fb41da45549e5..c21d8866fde83 100644 --- a/src/persist-client/src/internal/encoding.rs +++ b/src/persist-client/src/internal/encoding.rs @@ -1524,6 +1524,7 @@ impl RustType for BatchColumnarFormat { BatchColumnarFormat::Both(version) => { proto_hollow_batch_part::Format::RowAndColumnar((*version).cast_into()) } + BatchColumnarFormat::Structured => proto_hollow_batch_part::Format::Structured(()), } } @@ -1533,6 +1534,7 @@ impl RustType for BatchColumnarFormat { proto_hollow_batch_part::Format::RowAndColumnar(version) => { BatchColumnarFormat::Both(version.cast_into()) } + proto_hollow_batch_part::Format::Structured(_) => BatchColumnarFormat::Structured, }; Ok(format) } @@ -2059,6 +2061,152 @@ mod tests { proptest!(|(state in any_state::(0..3))| testcase(state)); } + #[mz_ore::test] + fn check_data_versions_with_lts_versions() { + #[track_caller] + fn testcase(code: &str, data: &str, lts_versions: &[Version], expected: Result<(), ()>) { + let code = Version::parse(code).unwrap(); + let data = Version::parse(data).unwrap(); + let actual = cfg::check_data_version_with_lts_versions(&code, &data, lts_versions) + .map_err(|_| ()); + assert_eq!(actual, expected); + } + + let none = []; + let one = [Version::new(0, 130, 0)]; + let two = [Version::new(0, 130, 0), Version::new(0, 140, 0)]; + let three = [ + Version::new(0, 130, 0), + Version::new(0, 140, 0), + Version::new(0, 150, 0), + ]; + + testcase("0.130.0", "0.128.0", &none, Ok(())); + testcase("0.130.0", "0.129.0", &none, Ok(())); + testcase("0.130.0", "0.130.0", &none, Ok(())); + testcase("0.130.0", "0.130.1", &none, Ok(())); + testcase("0.130.1", "0.130.0", &none, Ok(())); + testcase("0.130.0", "0.131.0", &none, Ok(())); + testcase("0.130.0", "0.132.0", &none, Err(())); + + testcase("0.129.0", "0.127.0", &none, Ok(())); + testcase("0.129.0", "0.128.0", &none, Ok(())); + testcase("0.129.0", "0.129.0", &none, Ok(())); + testcase("0.129.0", "0.129.1", &none, Ok(())); + testcase("0.129.1", "0.129.0", &none, Ok(())); + testcase("0.129.0", "0.130.0", &none, Ok(())); + testcase("0.129.0", "0.131.0", &none, Err(())); + + testcase("0.130.0", "0.128.0", &one, Ok(())); + testcase("0.130.0", "0.129.0", &one, Ok(())); + testcase("0.130.0", "0.130.0", &one, Ok(())); + testcase("0.130.0", "0.130.1", &one, Ok(())); + testcase("0.130.1", "0.130.0", &one, Ok(())); + testcase("0.130.0", "0.131.0", &one, Ok(())); + testcase("0.130.0", "0.132.0", &one, Ok(())); + + testcase("0.129.0", "0.127.0", &one, Ok(())); + testcase("0.129.0", "0.128.0", &one, Ok(())); + testcase("0.129.0", "0.129.0", &one, Ok(())); + testcase("0.129.0", "0.129.1", &one, Ok(())); + testcase("0.129.1", "0.129.0", &one, Ok(())); + testcase("0.129.0", "0.130.0", &one, Ok(())); + testcase("0.129.0", "0.131.0", &one, Err(())); + + testcase("0.131.0", "0.129.0", &one, Ok(())); + testcase("0.131.0", "0.130.0", &one, Ok(())); + testcase("0.131.0", "0.131.0", &one, Ok(())); + testcase("0.131.0", "0.131.1", &one, Ok(())); + testcase("0.131.1", "0.131.0", &one, Ok(())); + testcase("0.131.0", "0.132.0", &one, Ok(())); + testcase("0.131.0", "0.133.0", &one, Err(())); + + testcase("0.130.0", "0.128.0", &two, Ok(())); + testcase("0.130.0", "0.129.0", &two, Ok(())); + testcase("0.130.0", "0.130.0", &two, Ok(())); + testcase("0.130.0", "0.130.1", &two, Ok(())); + testcase("0.130.1", "0.130.0", &two, Ok(())); + testcase("0.130.0", "0.131.0", &two, Ok(())); + testcase("0.130.0", "0.132.0", &two, Ok(())); + testcase("0.130.0", "0.135.0", &two, Ok(())); + testcase("0.130.0", "0.138.0", &two, Ok(())); + testcase("0.130.0", "0.139.0", &two, Ok(())); + testcase("0.130.0", "0.140.0", &two, Ok(())); + testcase("0.130.9", "0.140.0", &two, Ok(())); + testcase("0.130.0", "0.140.1", &two, Ok(())); + testcase("0.130.3", "0.140.1", &two, Ok(())); + testcase("0.130.3", "0.140.9", &two, Ok(())); + testcase("0.130.0", "0.141.0", &two, Err(())); + testcase("0.129.0", "0.133.0", &two, Err(())); + testcase("0.129.0", "0.140.0", &two, Err(())); + testcase("0.131.0", "0.133.0", &two, Err(())); + testcase("0.131.0", "0.140.0", &two, Err(())); + + testcase("0.130.0", "0.128.0", &three, Ok(())); + testcase("0.130.0", "0.129.0", &three, Ok(())); + testcase("0.130.0", "0.130.0", &three, Ok(())); + testcase("0.130.0", "0.130.1", &three, Ok(())); + testcase("0.130.1", "0.130.0", &three, Ok(())); + testcase("0.130.0", "0.131.0", &three, Ok(())); + testcase("0.130.0", "0.132.0", &three, Ok(())); + testcase("0.130.0", "0.135.0", &three, Ok(())); + testcase("0.130.0", "0.138.0", &three, Ok(())); + testcase("0.130.0", "0.139.0", &three, Ok(())); + testcase("0.130.0", "0.140.0", &three, Ok(())); + testcase("0.130.9", "0.140.0", &three, Ok(())); + testcase("0.130.0", "0.140.1", &three, Ok(())); + testcase("0.130.3", "0.140.1", &three, Ok(())); + testcase("0.130.3", "0.140.9", &three, Ok(())); + testcase("0.130.0", "0.141.0", &three, Err(())); + testcase("0.129.0", "0.133.0", &three, Err(())); + testcase("0.129.0", "0.140.0", &three, Err(())); + testcase("0.131.0", "0.133.0", &three, Err(())); + testcase("0.131.0", "0.140.0", &three, Err(())); + testcase("0.130.0", "0.150.0", &three, Err(())); + + testcase("0.140.0", "0.138.0", &three, Ok(())); + testcase("0.140.0", "0.139.0", &three, Ok(())); + testcase("0.140.0", "0.140.0", &three, Ok(())); + testcase("0.140.0", "0.140.1", &three, Ok(())); + testcase("0.140.1", "0.140.0", &three, Ok(())); + testcase("0.140.0", "0.141.0", &three, Ok(())); + testcase("0.140.0", "0.142.0", &three, Ok(())); + testcase("0.140.0", "0.145.0", &three, Ok(())); + testcase("0.140.0", "0.148.0", &three, Ok(())); + testcase("0.140.0", "0.149.0", &three, Ok(())); + testcase("0.140.0", "0.150.0", &three, Ok(())); + testcase("0.140.9", "0.150.0", &three, Ok(())); + testcase("0.140.0", "0.150.1", &three, Ok(())); + testcase("0.140.3", "0.150.1", &three, Ok(())); + testcase("0.140.3", "0.150.9", &three, Ok(())); + testcase("0.140.0", "0.151.0", &three, Err(())); + testcase("0.139.0", "0.143.0", &three, Err(())); + testcase("0.139.0", "0.150.0", &three, Err(())); + testcase("0.141.0", "0.143.0", &three, Err(())); + testcase("0.141.0", "0.150.0", &three, Err(())); + + testcase("0.150.0", "0.148.0", &three, Ok(())); + testcase("0.150.0", "0.149.0", &three, Ok(())); + testcase("0.150.0", "0.150.0", &three, Ok(())); + testcase("0.150.0", "0.150.1", &three, Ok(())); + testcase("0.150.1", "0.150.0", &three, Ok(())); + testcase("0.150.0", "0.151.0", &three, Ok(())); + testcase("0.150.0", "0.152.0", &three, Ok(())); + testcase("0.150.0", "0.155.0", &three, Ok(())); + testcase("0.150.0", "0.158.0", &three, Ok(())); + testcase("0.150.0", "0.159.0", &three, Ok(())); + testcase("0.150.0", "0.160.0", &three, Ok(())); + testcase("0.150.9", "0.160.0", &three, Ok(())); + testcase("0.150.0", "0.160.1", &three, Ok(())); + testcase("0.150.3", "0.160.1", &three, Ok(())); + testcase("0.150.3", "0.160.9", &three, Ok(())); + testcase("0.150.0", "0.161.0", &three, Ok(())); + testcase("0.149.0", "0.153.0", &three, Err(())); + testcase("0.149.0", "0.160.0", &three, Err(())); + testcase("0.151.0", "0.153.0", &three, Err(())); + testcase("0.151.0", "0.160.0", &three, Err(())); + } + #[mz_ore::test] fn check_data_versions() { #[track_caller] diff --git a/src/persist-client/src/internal/machine.rs b/src/persist-client/src/internal/machine.rs index b9774e8760c77..f983cfc0dbbc1 100644 --- a/src/persist-client/src/internal/machine.rs +++ b/src/persist-client/src/internal/machine.rs @@ -1422,7 +1422,7 @@ pub mod datadriven { BatchParts, BLOB_TARGET_SIZE, BUILDER_STRUCTURED, STRUCTURED_ORDER, }; use crate::cfg::COMPACTION_MEMORY_BOUND_BYTES; - use crate::fetch::{Cursor, EncodedPart}; + use crate::fetch::EncodedPart; use crate::internal::compact::{CompactConfig, CompactReq, Compactor}; use crate::internal::datadriven::DirectiveArgs; use crate::internal::encoding::Schemas; @@ -1894,9 +1894,10 @@ pub mod datadriven { ) .await .expect("invalid batch part"); - let mut cursor = Cursor::default(); - while let Some(((k, _v, t, d), _)) = cursor.pop(&part) { + let part = part.normalize(&datadriven.client.metrics.columnar); + for ((k, _v), t, d) in part.records().expect("codec records").iter() { let (k, d) = (String::decode(k, &StringSchema).unwrap(), i64::decode(d)); + let t = u64::from_le_bytes(t); write!(s, "{k} {t} {d}\n"); } } @@ -2148,10 +2149,11 @@ pub mod datadriven { ) .await .expect("invalid batch part"); + let part = part.normalize(&datadriven.client.metrics.columnar); let mut updates = Vec::new(); - let mut cursor = Cursor::default(); - while let Some(((k, _v, mut t, d), _)) = cursor.pop(&part) { + for ((k, _v), t, d) in part.records().expect("codec data").iter() { + let mut t = u64::decode(t); t.advance_by(as_of.borrow()); updates.push(( String::decode(k, &StringSchema).unwrap(), diff --git a/src/persist-client/src/internal/state.proto b/src/persist-client/src/internal/state.proto index 6064e2e3d6846..4ef289724ac19 100644 --- a/src/persist-client/src/internal/state.proto +++ b/src/persist-client/src/internal/state.proto @@ -56,6 +56,7 @@ message ProtoHollowBatchPart { oneof format { google.protobuf.Empty row = 7; uint64 row_and_columnar = 8; + google.protobuf.Empty structured = 13; } optional uint64 schema_id = 12; diff --git a/src/persist-client/src/internal/state.rs b/src/persist-client/src/internal/state.rs index cc8c4fdc3923c..802bbc45e5694 100644 --- a/src/persist-client/src/internal/state.rs +++ b/src/persist-client/src/internal/state.rs @@ -9,6 +9,7 @@ use anyhow::{ensure, Context}; use async_stream::{stream, try_stream}; +use mz_persist::metrics::ColumnarMetrics; use std::borrow::Cow; use std::cmp::Ordering; use std::collections::BTreeMap; @@ -32,7 +33,7 @@ use mz_ore::cast::CastFrom; use mz_ore::now::EpochMillis; use mz_ore::soft_panic_or_log; use mz_ore::vec::PartialOrdVecExt; -use mz_persist::indexed::encoding::BatchColumnarFormat; +use mz_persist::indexed::encoding::{BatchColumnarFormat, BlobTraceUpdates}; use mz_persist::location::{Blob, SeqNo}; use mz_persist_types::arrow::{ArrayBound, ProtoArrayData}; use mz_persist_types::columnar::{ColumnEncoder, Schema2}; @@ -89,12 +90,6 @@ pub(crate) const ROLLUP_THRESHOLD: Config = Config::new( "The number of seqnos between rollups.", ); -pub(crate) const WRITE_DIFFS_SUM: Config = Config::new( - "persist_write_diffs_sum", - true, - "CYA to skip writing the diffs_sum field on HollowBatchPart", -); - /// A token to disambiguate state commands that could not otherwise be /// idempotent. #[derive(Arbitrary, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] @@ -318,6 +313,28 @@ impl BatchPart { BatchPart::Inline { schema_id, .. } => *schema_id, } } + + pub fn deprecated_schema_id(&self) -> Option { + match self { + BatchPart::Hollow(x) => x.deprecated_schema_id, + BatchPart::Inline { + deprecated_schema_id, + .. + } => *deprecated_schema_id, + } + } +} + +impl BatchPart { + pub fn is_structured_only(&self, metrics: &ColumnarMetrics) -> bool { + match self { + BatchPart::Hollow(x) => matches!(x.format, Some(BatchColumnarFormat::Structured)), + BatchPart::Inline { updates, .. } => { + let inline_part = updates.decode::(metrics).expect("valid inline part"); + matches!(inline_part.updates, BlobTraceUpdates::Structured { .. }) + } + } + } } /// An ordered list of parts, generally stored as part of a larger run. diff --git a/src/persist-client/src/lib.rs b/src/persist-client/src/lib.rs index 6cb638e7c77e2..9ed03c652dcec 100644 --- a/src/persist-client/src/lib.rs +++ b/src/persist-client/src/lib.rs @@ -826,8 +826,12 @@ mod tests { .await .expect("failed to fetch part") .expect("missing part"); - let part = + let mut part = BlobTraceBatchPart::decode(&value, &metrics.columnar).expect("failed to decode part"); + // Ensure codec data is present even if it was not generated at write time. + let _ = part + .updates + .get_or_make_codec::(&read_schemas.key, &read_schemas.val); let mut updates = Vec::new(); // TODO(bkirwi): switch to structured data in tests for ((k, v), t, d) in part.updates.records().expect("codec data").iter() { diff --git a/src/persist-client/src/schema.rs b/src/persist-client/src/schema.rs index d7e1be8635a77..b2a2f3209b349 100644 --- a/src/persist-client/src/schema.rs +++ b/src/persist-client/src/schema.rs @@ -27,7 +27,7 @@ use timely::progress::Timestamp; use crate::internal::apply::Applier; use crate::internal::encoding::Schemas; use crate::internal::metrics::{SchemaCacheMetrics, SchemaMetrics}; -use crate::internal::state::EncodedSchemas; +use crate::internal::state::{BatchPart, EncodedSchemas}; /// The result returned by [crate::PersistClient::compare_and_evolve_schema]. #[derive(Debug)] @@ -306,7 +306,7 @@ pub(crate) enum PartMigration { Codec { read: Schemas }, /// We have both write and read schemas, and they don't match. Either { - _write: Schemas, + write: Schemas, read: Schemas, key_migration: Arc, val_migration: Arc, @@ -319,12 +319,12 @@ impl Clone for PartMigration { Self::SameSchema { both } => Self::SameSchema { both: both.clone() }, Self::Codec { read } => Self::Codec { read: read.clone() }, Self::Either { - _write, + write, read, key_migration, val_migration, } => Self::Either { - _write: _write.clone(), + write: write.clone(), read: read.clone(), key_migration: Arc::clone(key_migration), val_migration: Arc::clone(val_migration), @@ -339,7 +339,7 @@ where V: Debug + Codec, { pub(crate) async fn new( - write: Option, + part: &BatchPart, read: Schemas, schema_cache: &mut SchemaCache, ) -> Result> @@ -347,6 +347,27 @@ where T: Timestamp + Lattice + Codec64 + Sync, D: Semigroup + Codec64, { + // At one point in time during our structured data migration, we deprecated the + // already written schema IDs because we made all columns at the Arrow/Parquet + // level nullable, thus changing the schema parts were written with. + // + // _After_ this deprecation, we've observed at least one instance where a + // structured only Part was written with the schema ID in the _old_ deprecated + // field. While unexpected, given the ordering of our releases it is safe to + // use the deprecated schema ID if we have a structured only part. + let write = match part.schema_id() { + Some(write_id) => Some(write_id), + None => { + if part.is_structured_only(&schema_cache.applier.metrics.columnar) { + let deprecated_id = part.deprecated_schema_id(); + tracing::warn!(?deprecated_id, "falling back to deprecated schema ID"); + deprecated_id + } else { + None + } + } + }; + match (write, read.id) { (None, _) => Ok(PartMigration::Codec { read }), (Some(w), Some(r)) if w == r => Ok(PartMigration::SameSchema { both: read }), @@ -384,7 +405,7 @@ where .inc_by(start.elapsed().as_secs_f64()); Ok(PartMigration::Either { - _write: write, + write, read, key_migration, val_migration, diff --git a/src/persist/Cargo.toml b/src/persist/Cargo.toml index 5e4d2f3cca93c..174e14d8a333f 100644 --- a/src/persist/Cargo.toml +++ b/src/persist/Cargo.toml @@ -30,6 +30,10 @@ aws-config = { version = "1.2.0", default-features = false } aws-credential-types = { version = "1.1.1", features = ["hardcoded-credentials"] } aws-sdk-s3 = { version = "1.23.0", default-features = false, features = ["rt-tokio"] } aws-types = "1.1.1" +azure_identity = { version = "0.21.0" } +azure_storage = { version = "0.21.0" } +azure_storage_blobs = { version = "0.21.0" } +azure_core = "0.21.0" base64 = "0.13.1" bytes = "1.3.0" deadpool-postgres = "0.10.3" @@ -53,6 +57,7 @@ proptest = { version = "1.6.0", default-features = false, features = ["std"] } proptest-derive = { version = "0.5.1", features = ["boxed_union"] } prost = { version = "0.13.2", features = ["no-recursion-limit"] } rand = { version = "0.8.5", features = ["small_rng"] } +reqwest = "0.12.4" serde = { version = "1.0.152", features = ["derive"] } timely = "0.16.0" tokio = { version = "1.38.0", default-features = false, features = ["fs", "macros", "sync", "rt", "rt-multi-thread"] } diff --git a/src/persist/src/azure.rs b/src/persist/src/azure.rs new file mode 100644 index 0000000000000..f6d0f9f844a34 --- /dev/null +++ b/src/persist/src/azure.rs @@ -0,0 +1,467 @@ +// Copyright Materialize, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//! An Azure Blob Storage implementation of [Blob] storage. + +use anyhow::{anyhow, Context}; +use async_trait::async_trait; +use azure_core::{ExponentialRetryOptions, RetryOptions, StatusCode, TransportOptions}; +use azure_identity::create_default_credential; +use azure_storage::{prelude::*, CloudLocation, EMULATOR_ACCOUNT}; +use azure_storage_blobs::blob::operations::GetBlobResponse; +use azure_storage_blobs::prelude::*; +use bytes::Bytes; +use futures_util::stream::FuturesOrdered; +use futures_util::StreamExt; +use std::fmt::{Debug, Formatter}; +use std::sync::Arc; +use std::time::Duration; +use tracing::{info, warn}; +use url::Url; +use uuid::Uuid; + +use mz_dyncfg::ConfigSet; +use mz_ore::bytes::{MaybeLgBytes, SegmentedBytes}; +use mz_ore::cast::CastFrom; +use mz_ore::lgbytes::{LgBytes, MetricsRegion}; +use mz_ore::metrics::MetricsRegistry; + +use crate::cfg::BlobKnobs; +use crate::error::Error; +use crate::location::{Blob, BlobMetadata, Determinate, ExternalError}; +use crate::metrics::S3BlobMetrics; + +/// Configuration for opening an [AzureBlob]. +#[derive(Clone, Debug)] +pub struct AzureBlobConfig { + // The metrics struct here is a bit of a misnomer. We only need access + // to the LgBytes metrics, which has an Azure-specific field. For now, + // it saves considerable plumbing to reuse [S3BlobMetrics]. + // + // TODO: spin up an AzureBlobMetrics and do the plumbing. + metrics: S3BlobMetrics, + client: ContainerClient, + prefix: String, + cfg: Arc, +} + +impl AzureBlobConfig { + const EXTERNAL_TESTS_AZURE_CONTAINER: &'static str = + "MZ_PERSIST_EXTERNAL_STORAGE_TEST_AZURE_CONTAINER"; + + /// Returns a new [AzureBlobConfig] for use in production. + /// + /// Stores objects in the given container prepended with the (possibly empty) + /// prefix. Azure credentials must be available in the process or environment. + pub fn new( + account: String, + container: String, + prefix: String, + metrics: S3BlobMetrics, + url: Url, + knobs: Box, + cfg: Arc, + ) -> Result { + let client = if account == EMULATOR_ACCOUNT { + info!("Connecting to Azure emulator"); + ClientBuilder::with_location( + CloudLocation::Emulator { + address: url.domain().expect("domain for Azure emulator").to_string(), + port: url.port().expect("port for Azure emulator"), + }, + StorageCredentials::emulator(), + ) + .transport({ + // Azure uses reqwest / hyper internally, but we specify a client explicitly to + // plumb through our timeouts. + TransportOptions::new(Arc::new( + reqwest::ClientBuilder::new() + .timeout(knobs.operation_attempt_timeout()) + .read_timeout(knobs.read_timeout()) + .connect_timeout(knobs.connect_timeout()) + .build() + .expect("valid config for azure HTTP client"), + )) + }) + .retry(RetryOptions::exponential( + ExponentialRetryOptions::default().max_total_elapsed(knobs.operation_timeout()), + )) + .blob_service_client() + .container_client(container) + } else { + let sas_credentials = match url.query() { + Some(query) => Some(StorageCredentials::sas_token(query)), + None => None, + }; + + let credentials = match sas_credentials { + Some(Ok(credentials)) => credentials, + Some(Err(err)) => { + warn!("Failed to parse SAS token: {err}"); + // TODO: should we fallback here? Or can we fully rely on query params + // to determine whether a SAS token was provided? + StorageCredentials::token_credential( + create_default_credential().expect("Azure default credentials"), + ) + } + None => { + // Fall back to default credential stack to support auth modes like + // workload identity that are injected into the environment + StorageCredentials::token_credential( + create_default_credential().expect("Azure default credentials"), + ) + } + }; + + let service_client = BlobServiceClient::new(account, credentials); + service_client.container_client(container) + }; + + // TODO: some auth modes like user-delegated SAS tokens are time-limited + // and need to be refreshed. This can be done through `service_client.update_credentials` + // but there'll be a fair bit of plumbing needed to make each mode work + + Ok(AzureBlobConfig { + metrics, + client, + cfg, + prefix, + }) + } + + /// Returns a new [AzureBlobConfig] for use in unit tests. + pub fn new_for_test() -> Result, Error> { + struct TestBlobKnobs; + impl Debug for TestBlobKnobs { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TestBlobKnobs").finish_non_exhaustive() + } + } + impl BlobKnobs for TestBlobKnobs { + fn operation_timeout(&self) -> Duration { + Duration::from_secs(30) + } + + fn operation_attempt_timeout(&self) -> Duration { + Duration::from_secs(10) + } + + fn connect_timeout(&self) -> Duration { + Duration::from_secs(5) + } + + fn read_timeout(&self) -> Duration { + Duration::from_secs(5) + } + + fn is_cc_active(&self) -> bool { + false + } + } + + let container_name = match std::env::var(Self::EXTERNAL_TESTS_AZURE_CONTAINER) { + Ok(container) => container, + Err(_) => { + if mz_ore::env::is_var_truthy("CI") { + panic!("CI is supposed to run this test but something has gone wrong!"); + } + return Ok(None); + } + }; + + let prefix = Uuid::new_v4().to_string(); + let metrics = S3BlobMetrics::new(&MetricsRegistry::new()); + + let config = AzureBlobConfig::new( + EMULATOR_ACCOUNT.to_string(), + container_name.clone(), + prefix, + metrics, + Url::parse(&format!("http://localhost:40111/{}", container_name)).expect("valid url"), + Box::new(TestBlobKnobs), + Arc::new(ConfigSet::default()), + )?; + + Ok(Some(config)) + } +} + +/// Implementation of [Blob] backed by Azure Blob Storage. +#[derive(Debug)] +pub struct AzureBlob { + metrics: S3BlobMetrics, + client: ContainerClient, + prefix: String, + _cfg: Arc, +} + +impl AzureBlob { + /// Opens the given location for non-exclusive read-write access. + pub async fn open(config: AzureBlobConfig) -> Result { + if config.client.service_client().account() == EMULATOR_ACCOUNT { + // TODO: we could move this logic into the test harness. + // it's currently here because it's surprisingly annoying to + // create the container out-of-band + if let Err(e) = config.client.create().await { + warn!("Failed to create container: {e}"); + } + } + + let ret = AzureBlob { + metrics: config.metrics, + client: config.client, + prefix: config.prefix, + _cfg: config.cfg, + }; + + Ok(ret) + } + + fn get_path(&self, key: &str) -> String { + format!("{}/{}", self.prefix, key) + } +} + +#[async_trait] +impl Blob for AzureBlob { + async fn get(&self, key: &str) -> Result, ExternalError> { + let path = self.get_path(key); + let blob = self.client.blob_client(path); + + /// Fetch a the body of a single [`GetBlobResponse`]. + async fn fetch_chunk( + response: GetBlobResponse, + metrics: S3BlobMetrics, + ) -> Result { + let content_length = response.blob.properties.content_length; + + // Here we're being quite defensive. If `content_length` comes back + // as 0 it's most likely incorrect. In that case we'll copy bytes + // of the network into a growable buffer, then copy the entire + // buffer into lgalloc. + let mut buffer = match content_length { + 1.. => { + let region = metrics + .lgbytes + .persist_azure + .new_region(usize::cast_from(content_length)); + PreSizedBuffer::Sized(region) + } + 0 => PreSizedBuffer::Unknown(SegmentedBytes::new()), + }; + + let mut body = response.data; + while let Some(value) = body.next().await { + let value = value.map_err(|e| { + ExternalError::from(anyhow!("Azure blob get body error: {}", e)) + })?; + + match &mut buffer { + PreSizedBuffer::Sized(region) => region.extend_from_slice(&value), + PreSizedBuffer::Unknown(segments) => segments.push(value), + } + } + + // Spill our bytes to lgalloc, if they aren't already. + let lgbytes = match buffer { + PreSizedBuffer::Sized(region) => LgBytes::from(Arc::new(region)), + // Now that we've collected all of the segments, we know the size of our region. + PreSizedBuffer::Unknown(segments) => { + let mut region = metrics.lgbytes.persist_azure.new_region(segments.len()); + for segment in segments.into_segments() { + region.extend_from_slice(segment.as_ref()); + } + LgBytes::from(Arc::new(region)) + } + }; + + // Report if the content-length header didn't match the number of + // bytes we read from the network. + if content_length != u64::cast_from(lgbytes.len()) { + metrics.get_invalid_resp.inc(); + } + + Ok(MaybeLgBytes::LgBytes(lgbytes)) + } + + let mut requests = FuturesOrdered::new(); + // TODO: the default chunk size is 1MB. We have not tried tuning it, + // but making this configurable / running some benchmarks could be + // valuable. + let mut stream = blob.get().into_stream(); + + while let Some(value) = stream.next().await { + // Return early if any of the individual fetch requests return an error. + let response = match value { + Ok(v) => v, + Err(e) => { + if let Some(e) = e.as_http_error() { + if e.status() == StatusCode::NotFound { + return Ok(None); + } + } + + return Err(ExternalError::from(anyhow!( + "Azure blob get error: {:?}", + e + ))); + } + }; + + // Drive all of the fetch requests concurrently. + let metrics = self.metrics.clone(); + requests.push_back(fetch_chunk(response, metrics)); + } + + // Await on all of our chunks. + let mut segments = SegmentedBytes::with_capacity(requests.len()); + while let Some(body) = requests.next().await { + let segment = body.context("azure get body err")?; + segments.push(segment); + } + + Ok(Some(segments)) + } + + async fn list_keys_and_metadata( + &self, + key_prefix: &str, + f: &mut (dyn FnMut(BlobMetadata) + Send + Sync), + ) -> Result<(), ExternalError> { + let blob_key_prefix = self.get_path(key_prefix); + let strippable_root_prefix = format!("{}/", self.prefix); + + let mut stream = self + .client + .list_blobs() + .prefix(blob_key_prefix.clone()) + .into_stream(); + + while let Some(response) = stream.next().await { + let response = response + .map_err(|e| ExternalError::from(anyhow!("Azure list blobs error: {}", e)))?; + + for blob in response.blobs.items { + let azure_storage_blobs::container::operations::list_blobs::BlobItem::Blob(blob) = + blob + else { + continue; + }; + + if let Some(key) = blob.name.strip_prefix(&strippable_root_prefix) { + let size_in_bytes = blob.properties.content_length; + f(BlobMetadata { key, size_in_bytes }); + } + } + } + + Ok(()) + } + + async fn set(&self, key: &str, value: Bytes) -> Result<(), ExternalError> { + let path = self.get_path(key); + let blob = self.client.blob_client(path); + + blob.put_block_blob(value) + .await + .map_err(|e| ExternalError::from(anyhow!("Azure blob put error: {}", e)))?; + + Ok(()) + } + + async fn delete(&self, key: &str) -> Result, ExternalError> { + let path = self.get_path(key); + let blob = self.client.blob_client(path); + + match blob.get_properties().await { + Ok(props) => { + let size = usize::cast_from(props.blob.properties.content_length); + blob.delete() + .await + .map_err(|e| ExternalError::from(anyhow!("Azure blob delete error: {}", e)))?; + Ok(Some(size)) + } + Err(e) => { + if let Some(e) = e.as_http_error() { + if e.status() == StatusCode::NotFound { + return Ok(None); + } + } + + Err(ExternalError::from(anyhow!("Azure blob error: {}", e))) + } + } + } + + async fn restore(&self, key: &str) -> Result<(), ExternalError> { + let path = self.get_path(key); + let blob = self.client.blob_client(&path); + + match blob.get_properties().await { + Ok(_) => Ok(()), + Err(e) => { + if let Some(e) = e.as_http_error() { + if e.status() == StatusCode::NotFound { + return Err(Determinate::new(anyhow!( + "unable to restore {key} in Azure Blob Storage: blob does not exist" + )) + .into()); + } + } + + Err(ExternalError::from(anyhow!("Azure blob error: {}", e))) + } + } + } +} + +/// If possible we'll pre-allocate a chunk of memory in lgalloc and write into +/// that as we read bytes off the network. +enum PreSizedBuffer { + Sized(MetricsRegion), + Unknown(SegmentedBytes), +} + +#[cfg(test)] +mod tests { + use tracing::info; + + use crate::location::tests::blob_impl_test; + + use super::*; + + #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `TLS_method` on OS `linux` + #[mz_ore::test(tokio::test(flavor = "multi_thread"))] + async fn azure_blob() -> Result<(), ExternalError> { + let config = match AzureBlobConfig::new_for_test()? { + Some(client) => client, + None => { + info!( + "{} env not set: skipping test that uses external service", + AzureBlobConfig::EXTERNAL_TESTS_AZURE_CONTAINER + ); + return Ok(()); + } + }; + + blob_impl_test(move |_path| { + let config = config.clone(); + async move { + let config = AzureBlobConfig { + metrics: config.metrics.clone(), + client: config.client.clone(), + cfg: Arc::new(ConfigSet::default()), + prefix: config.prefix.clone(), + }; + AzureBlob::open(config).await + } + }) + .await + } +} diff --git a/src/persist/src/cfg.rs b/src/persist/src/cfg.rs index a191d09fd8b84..8ef7cfe8d01d3 100644 --- a/src/persist/src/cfg.rs +++ b/src/persist/src/cfg.rs @@ -21,6 +21,7 @@ use tracing::warn; use mz_postgres_client::metrics::PostgresClientMetrics; use mz_postgres_client::PostgresClientKnobs; +use crate::azure::{AzureBlob, AzureBlobConfig}; use crate::file::{FileBlob, FileBlobConfig}; use crate::location::{Blob, Consensus, Determinate, ExternalError}; use crate::mem::{MemBlob, MemBlobConfig, MemConsensus}; @@ -35,7 +36,7 @@ pub fn all_dyn_configs(configs: ConfigSet) -> ConfigSet { .add(&crate::indexed::columnar::arrow::ENABLE_ARROW_LGALLOC_NONCC_SIZES) .add(&crate::s3::ENABLE_S3_LGALLOC_CC_SIZES) .add(&crate::s3::ENABLE_S3_LGALLOC_NONCC_SIZES) - .add(&crate::s3::ENABLE_ONE_ALLOC_PER_REQUEST) + .add(&crate::postgres::USE_POSTGRES_TUNED_QUERIES) } /// Config for an implementation of [Blob]. @@ -48,6 +49,8 @@ pub enum BlobConfig { /// Config for [MemBlob], only available in testing to prevent /// footguns. Mem(bool), + /// Config for [AzureBlob]. + Azure(AzureBlobConfig), } /// Configuration knobs for [Blob]. @@ -70,6 +73,7 @@ impl BlobConfig { match self { BlobConfig::File(config) => Ok(Arc::new(FileBlob::open(config).await?)), BlobConfig::S3(config) => Ok(Arc::new(S3Blob::open(config).await?)), + BlobConfig::Azure(config) => Ok(Arc::new(AzureBlob::open(config).await?)), BlobConfig::Mem(tombstone) => { Ok(Arc::new(MemBlob::open(MemBlobConfig::new(tombstone)))) } @@ -148,6 +152,40 @@ impl BlobConfig { query_params.clear(); Ok(BlobConfig::Mem(tombstone)) } + "http" | "https" => match url + .host() + .ok_or_else(|| anyhow!("missing protocol: {}", &url.as_str()))? + .to_string() + .split_once('.') + { + // The Azurite emulator always uses the well-known account name devstoreaccount1 + Some((account, root)) + if account == "devstoreaccount1" || root == "blob.core.windows.net" => + { + if let Some(container) = url + .path_segments() + .expect("azure blob storage container") + .next() + { + query_params.clear(); + Ok(BlobConfig::Azure(AzureBlobConfig::new( + account.to_string(), + container.to_string(), + // Azure doesn't support prefixes in the way S3 does. + // This is always empty, but we leave the field for + // compatibility with our existing test suite. + "".to_string(), + metrics, + url.clone().into_redacted(), + knobs, + cfg, + )?)) + } else { + Err(anyhow!("unknown persist blob scheme: {}", url.as_str())) + } + } + _ => Err(anyhow!("unknown persist blob scheme: {}", url.as_str())), + }, p => Err(anyhow!( "unknown persist blob scheme {}: {}", p, @@ -196,10 +234,11 @@ impl ConsensusConfig { url: &SensitiveUrl, knobs: Box, metrics: PostgresClientMetrics, + dyncfg: Arc, ) -> Result { let config = match url.scheme() { "postgres" | "postgresql" => Ok(ConsensusConfig::Postgres( - PostgresConsensusConfig::new(url, knobs, metrics)?, + PostgresConsensusConfig::new(url, knobs, metrics, dyncfg)?, )), "mem" => { if !cfg!(debug_assertions) { diff --git a/src/persist/src/indexed/columnar/arrow.rs b/src/persist/src/indexed/columnar/arrow.rs index 5c0648578c7ba..a7ef8d2f8f239 100644 --- a/src/persist/src/indexed/columnar/arrow.rs +++ b/src/persist/src/indexed/columnar/arrow.rs @@ -11,18 +11,18 @@ use std::ptr::NonNull; use std::sync::Arc; -use std::sync::LazyLock; -use arrow::array::{make_array, Array, ArrayData, ArrayRef, AsArray}; +use anyhow::anyhow; +use arrow::array::{make_array, Array, ArrayData, ArrayRef, BinaryArray, Int64Array, RecordBatch}; use arrow::buffer::{BooleanBuffer, Buffer, NullBuffer}; -use arrow::datatypes::{DataType, Field, Schema, ToByteSlice}; +use arrow::datatypes::ToByteSlice; use mz_dyncfg::Config; -use mz_ore::iter::IteratorExt; use crate::indexed::columnar::{ColumnarRecords, ColumnarRecordsStructuredExt}; +use crate::indexed::encoding::BlobTraceUpdates; use crate::metrics::ColumnarMetrics; -/// The Arrow schema we use to encode ((K, V), T, D) tuples. +/// Converts a [`ColumnarRecords`] into [`arrow`] columns. /// /// Both Time and Diff are presented externally to persist users as a type /// parameter that implements [mz_persist_types::Codec64]. Our columnar format @@ -37,55 +37,28 @@ use crate::metrics::ColumnarMetrics; /// time after year 2200). Using a i64 might be a pessimization for a /// non-realtime mz source with u64 timestamps in the range `(i64::MAX, /// u64::MAX]`, but realtime sources are overwhelmingly the common case. -pub static SCHEMA_ARROW_RS_KVTD: LazyLock> = LazyLock::new(|| { - let schema = Schema::new(vec![ - Field::new("k", DataType::Binary, false), - Field::new("v", DataType::Binary, false), - Field::new("t", DataType::Int64, false), - Field::new("d", DataType::Int64, false), - ]); - Arc::new(schema) -}); - -/// Converts a [`ColumnarRecords`] into `(K, V, T, D)` [`arrow`] columns. -pub fn encode_arrow_batch_kvtd(x: &ColumnarRecords) -> Vec { - let key = x.key_data.clone(); - let val = x.val_data.clone(); - let ts = x.timestamps.clone(); - let diff = x.diffs.clone(); - - vec![Arc::new(key), Arc::new(val), Arc::new(ts), Arc::new(diff)] -} - -/// Converts a [`ColumnarRecords`] and [`ColumnarRecordsStructuredExt`] pair -/// (aka [`BlobTraceUpdates::Both`]) into [`arrow::array::Array`]s with columns -/// [(K, V, T, D, K_S, V_S)]. -/// -/// [`BlobTraceUpdates::Both`]: crate::indexed::encoding::BlobTraceUpdates::Both -pub fn encode_arrow_batch_kvtd_ks_vs( - records: &ColumnarRecords, - structured: &ColumnarRecordsStructuredExt, -) -> (Vec>, Vec>) { - let mut fields: Vec<_> = (*SCHEMA_ARROW_RS_KVTD).fields().iter().cloned().collect(); - let mut arrays = encode_arrow_batch_kvtd(records); - - { - let key_array = &structured.key; - let key_field = Field::new("k_s", key_array.data_type().clone(), false); - - fields.push(Arc::new(key_field)); - arrays.push(Arc::clone(key_array)); +pub fn encode_arrow_batch(updates: &BlobTraceUpdates) -> RecordBatch { + fn array_ref(a: &A) -> ArrayRef { + Arc::new(a.clone()) } - - { - let val_array = &structured.val; - let val_field = Field::new("v_s", val_array.data_type().clone(), false); - - fields.push(Arc::new(val_field)); - arrays.push(Arc::clone(val_array)); - } - - (fields, arrays) + // For historical reasons, the codec-encoded columns are placed before T/D, + // and the structured-encoding columns are placed after. + let kv = updates + .records() + .into_iter() + .flat_map(|x| [("k", array_ref(&x.key_data)), ("v", array_ref(&x.val_data))]); + let td = [ + ("t", array_ref(updates.timestamps())), + ("d", array_ref(updates.diffs())), + ]; + let ks_vs = updates + .structured() + .into_iter() + .flat_map(|x| [("k_s", Arc::clone(&x.key)), ("v_s", Arc::clone(&x.val))]); + + // We expect all the top-level fields to be fully defined. + let fields = kv.chain(td).chain(ks_vs).map(|(f, a)| (f, a, false)); + RecordBatch::try_from_iter_with_nullable(fields).expect("valid field definitions") } pub(crate) const ENABLE_ARROW_LGALLOC_CC_SIZES: Config = Config::new( @@ -194,84 +167,71 @@ fn realloc_buffer(buffer: &Buffer, metrics: &ColumnarMetrics) -> Buffer { unsafe { Buffer::from_custom_allocation(ptr, bytes.len(), Arc::new(region)) } } -/// Converts an [`arrow`] [(K, V, T, D)] [`RecordBatch`] into a [`ColumnarRecords`]. -/// -/// [`RecordBatch`]: `arrow::array::RecordBatch` -pub fn decode_arrow_batch_kvtd( - columns: &[Arc], - metrics: &ColumnarMetrics, -) -> Result { - let (key_col, val_col, ts_col, diff_col) = match &columns { - x @ &[k, v, t, d] => { - // The columns need to all have the same logical length. - if !x.iter().map(|col| col.len()).all_equal() { - return Err(format!( - "columns don't all have equal length {k_len}, {v_len}, {t_len}, {d_len}", - k_len = k.len(), - v_len = v.len(), - t_len = t.len(), - d_len = d.len() - )); - } - - (k, v, t, d) - } - _ => return Err(format!("expected 4 columns got {}", columns.len())), - }; - - let key = key_col - .as_binary_opt::() - .ok_or_else(|| "key column is wrong type".to_string())?; - - let val = val_col - .as_binary_opt::() - .ok_or_else(|| "val column is wrong type".to_string())?; - - let time = ts_col - .as_primitive_opt::() - .ok_or_else(|| "time column is wrong type".to_string())?; - - let diff = diff_col - .as_primitive_opt::() - .ok_or_else(|| "diff column is wrong type".to_string())?; - - let len = key.len(); - let ret = ColumnarRecords { - len, - key_data: realloc_array(key, metrics), - val_data: realloc_array(val, metrics), - timestamps: realloc_array(time, metrics), - diffs: realloc_array(diff, metrics), - }; - ret.validate()?; - - Ok(ret) -} - -/// Converts an arrow [(K, V, T, D)] Chunk into a ColumnarRecords. -pub fn decode_arrow_batch_kvtd_ks_vs( - cols: &[Arc], - key_col: Arc, - val_col: Arc, +/// Converts an [`arrow`] [RecordBatch] into a [BlobTraceUpdates] and reallocate the backing data. +pub fn decode_arrow_batch( + batch: &RecordBatch, metrics: &ColumnarMetrics, -) -> Result<(ColumnarRecords, ColumnarRecordsStructuredExt), String> { - let same_length = cols - .iter() - .map(|col| col.as_ref()) - .chain([&*key_col]) - .chain([&*val_col]) - .map(|col| col.len()) - .all_equal(); - if !same_length { - return Err("not all columns (included structured) have the same length".to_string()); +) -> anyhow::Result { + fn try_downcast + 'static>( + batch: &RecordBatch, + name: &'static str, + metrics: &ColumnarMetrics, + ) -> anyhow::Result> { + let Some(array_ref) = batch.column_by_name(name) else { + return Ok(None); + }; + let col_ref = array_ref + .as_any() + .downcast_ref::() + .ok_or_else(|| anyhow!("wrong datatype for column {}", name))?; + let col = realloc_array(col_ref, metrics); + Ok(Some(col)) } - // We always have (K, V, T, D) columns. - let primary_records = decode_arrow_batch_kvtd(cols, metrics)?; - let structured_ext = ColumnarRecordsStructuredExt { - key: key_col, - val: val_col, + let codec_key = try_downcast::(batch, "k", metrics)?; + let codec_val = try_downcast::(batch, "v", metrics)?; + let timestamps = try_downcast::(batch, "t", metrics)? + .ok_or_else(|| anyhow!("missing timestamp column"))?; + let diffs = try_downcast::(batch, "d", metrics)? + .ok_or_else(|| anyhow!("missing diff column"))?; + let structured_key = batch + .column_by_name("k_s") + .map(|a| realloc_any(Arc::clone(a), metrics)); + let structured_val = batch + .column_by_name("v_s") + .map(|a| realloc_any(Arc::clone(a), metrics)); + + let updates = match (codec_key, codec_val, structured_key, structured_val) { + (Some(codec_key), Some(codec_val), Some(structured_key), Some(structured_val)) => { + BlobTraceUpdates::Both( + ColumnarRecords::new(codec_key, codec_val, timestamps, diffs), + ColumnarRecordsStructuredExt { + key: structured_key, + val: structured_val, + }, + ) + } + (Some(codec_key), Some(codec_val), None, None) => BlobTraceUpdates::Row( + ColumnarRecords::new(codec_key, codec_val, timestamps, diffs), + ), + (None, None, Some(structured_key), Some(structured_val)) => BlobTraceUpdates::Structured { + key_values: ColumnarRecordsStructuredExt { + key: structured_key, + val: structured_val, + }, + timestamps, + diffs, + }, + (k, v, ks, vs) => { + anyhow::bail!( + "unexpected mix of key/value columns: k={:?}, v={}, k_s={}, v_s={}", + k.is_some(), + v.is_some(), + ks.is_some(), + vs.is_some(), + ); + } }; - Ok((primary_records, structured_ext)) + Ok(updates) } diff --git a/src/persist/src/indexed/columnar/parquet.rs b/src/persist/src/indexed/columnar/parquet.rs index 9d54ab3c830a7..f6f45f715aba9 100644 --- a/src/persist/src/indexed/columnar/parquet.rs +++ b/src/persist/src/indexed/columnar/parquet.rs @@ -12,8 +12,6 @@ use std::io::Write; use std::sync::Arc; -use arrow::datatypes::Schema; -use arrow::record_batch::RecordBatch; use differential_dataflow::trace::Description; use mz_ore::bytes::SegmentedBytes; use mz_ore::cast::CastFrom; @@ -30,11 +28,7 @@ use tracing::warn; use crate::error::Error; use crate::gen::persist::proto_batch_part_inline::FormatMetadata as ProtoFormatMetadata; use crate::gen::persist::ProtoBatchFormat; -use crate::indexed::columnar::arrow::{ - decode_arrow_batch_kvtd, decode_arrow_batch_kvtd_ks_vs, encode_arrow_batch_kvtd, - encode_arrow_batch_kvtd_ks_vs, realloc_any, SCHEMA_ARROW_RS_KVTD, -}; -use crate::indexed::columnar::ColumnarRecords; +use crate::indexed::columnar::arrow::{decode_arrow_batch, encode_arrow_batch}; use crate::indexed::encoding::{ decode_trace_inline_meta, encode_trace_inline_meta, BlobTraceBatchPart, BlobTraceUpdates, }; @@ -128,24 +122,14 @@ pub fn encode_parquet_kvtd( .set_key_value_metadata(Some(vec![metadata])) .build(); - let (columns, schema, format) = match updates { - BlobTraceUpdates::Row(updates) => ( - encode_arrow_batch_kvtd(updates), - Arc::clone(&*SCHEMA_ARROW_RS_KVTD), - "k,v,t,d", - ), - BlobTraceUpdates::Both(codec_updates, structured_updates) => { - let (fields, arrays) = encode_arrow_batch_kvtd_ks_vs(codec_updates, structured_updates); - let schema = Schema::new(fields); - (arrays, Arc::new(schema), "k,v,t,d,k_s,v_s") - } - BlobTraceUpdates::Structured { .. } => { - unimplemented!("codec data should exist before reaching parquet encoding") - } + let batch = encode_arrow_batch(updates); + let format = match updates { + BlobTraceUpdates::Row(_) => "k,v,t,d", + BlobTraceUpdates::Both(_, _) => "k,v,t,d,k_s,v_s", + BlobTraceUpdates::Structured { .. } => "t,d,k_s,v_s", }; - let mut writer = ArrowWriter::try_new(w, Arc::clone(&schema), Some(properties))?; - let batch = RecordBatch::try_new(Arc::clone(&schema), columns)?; + let mut writer = ArrowWriter::try_new(w, batch.schema(), Some(properties))?; writer.write(&batch)?; writer.flush()?; @@ -179,31 +163,20 @@ pub fn decode_parquet_file_kvtd( match format_metadata { None => { - // Make sure we have all of the expected columns. - if SCHEMA_ARROW_RS_KVTD.fields() != schema.fields() { - return Err(format!("found invalid schema {:?}", schema).into()); - } - let mut ret = Vec::new(); for batch in reader { let batch = batch.map_err(|e| Error::String(e.to_string()))?; - ret.push(decode_arrow_batch_kvtd(batch.columns(), metrics)?); + ret.push(batch); } if ret.len() != 1 { warn!("unexpected number of row groups: {}", ret.len()); } - Ok(BlobTraceUpdates::Row(ColumnarRecords::concat( - &ret, metrics, - ))) + let batch = ::arrow::compute::concat_batches(&schema, &ret)?; + let updates = decode_arrow_batch(&batch, metrics).map_err(|e| e.to_string())?; + Ok(updates) } - Some(ProtoFormatMetadata::StructuredMigration(v @ 1 | v @ 2)) => { - if schema.fields().len() > 6 { - return Err( - format!("expected at most 6 columns, got {}", schema.fields().len()).into(), - ); - } - - let batch = reader + Some(ProtoFormatMetadata::StructuredMigration(v @ 1..=3)) => { + let mut batch = reader .next() .ok_or_else(|| Error::String("found empty batch".to_string()))??; @@ -211,45 +184,14 @@ pub fn decode_parquet_file_kvtd( if reader.next().is_some() { return Err(Error::String("found more than one RowGroup".to_string())); } - let columns = batch.columns(); - - // The first 4 columns are our primary (K, V, T, D) and optionally - // we also have K_S and/or V_S if we wrote structured data. - let primary_columns = &columns[..4]; // Version 1 is a deprecated format so we just ignored the k_s and v_s columns. - if *v == 1 { - let records = decode_arrow_batch_kvtd(primary_columns, metrics)?; - return Ok(BlobTraceUpdates::Row(records)); + if *v == 1 && batch.num_columns() > 4 { + batch = batch.project(&[0, 1, 2, 3])?; } - let k_s_column = schema - .fields() - .iter() - .position(|field| field.name() == "k_s") - .map(|idx| realloc_any(Arc::clone(&columns[idx]), metrics)); - let v_s_column = schema - .fields() - .iter() - .position(|field| field.name() == "v_s") - .map(|idx| realloc_any(Arc::clone(&columns[idx]), metrics)); - - match (k_s_column, v_s_column) { - (Some(ks), Some(vs)) => { - let (records, structured_ext) = - decode_arrow_batch_kvtd_ks_vs(primary_columns, ks, vs, metrics)?; - Ok(BlobTraceUpdates::Both(records, structured_ext)) - } - (ks, vs) => { - warn!( - "unable to read back structured data! version={v} ks={} vs={}", - ks.is_some(), - vs.is_some() - ); - let records = decode_arrow_batch_kvtd(primary_columns, metrics)?; - Ok(BlobTraceUpdates::Row(records)) - } - } + let updates = decode_arrow_batch(&batch, metrics).map_err(|e| e.to_string())?; + Ok(updates) } unknown => Err(format!("unkown ProtoFormatMetadata, {unknown:?}"))?, } diff --git a/src/persist/src/indexed/encoding.rs b/src/persist/src/indexed/encoding.rs index 44ecb7ccc0de1..9f43d2f10e910 100644 --- a/src/persist/src/indexed/encoding.rs +++ b/src/persist/src/indexed/encoding.rs @@ -60,6 +60,9 @@ pub enum BatchColumnarFormat { /// with a schema of `(k, k_c, v, v_c, t, d)`, where `k` are the serialized bytes and `k_c` is /// nested columnar data. Both(usize), + /// Rows are encoded to a columnar struct. The batch is written down as Parquet + /// with a schema of `(t, d, k_s, v_s)`, where `k_s` is nested columnar data. + Structured, } impl BatchColumnarFormat { @@ -75,6 +78,7 @@ impl BatchColumnarFormat { "row" => BatchColumnarFormat::Row, "both" => BatchColumnarFormat::Both(0), "both_v2" => BatchColumnarFormat::Both(2), + "structured" => BatchColumnarFormat::Structured, x => { let default = BatchColumnarFormat::default(); soft_panic_or_log!("Invalid batch columnar type: {x}, falling back to {default}"); @@ -100,6 +104,7 @@ impl BatchColumnarFormat { // The V0 format has been deprecated and we ignore its structured columns. BatchColumnarFormat::Both(0 | 1) => false, BatchColumnarFormat::Both(_) => true, + BatchColumnarFormat::Structured => true, } } } @@ -406,29 +411,51 @@ impl BlobTraceUpdates { lgbytes: &ColumnarMetrics, proto: ProtoColumnarRecords, ) -> Result { - let binary_array = |data: Bytes, offsets: Vec| match BinaryArray::try_new( - OffsetBuffer::new(offsets.into()), - ::arrow::buffer::Buffer::from_bytes(data.into()), - None, - ) { - Ok(data) => Ok(realloc_array(&data, lgbytes)), - Err(e) => Err(TryFromProtoError::InvalidFieldError(format!( - "Unable to decode binary array from repeated proto fields: {e:?}" - ))), + let binary_array = |data: Bytes, offsets: Vec| { + if offsets.is_empty() && proto.len > 0 { + return Ok(None); + }; + match BinaryArray::try_new( + OffsetBuffer::new(offsets.into()), + ::arrow::buffer::Buffer::from_bytes(data.into()), + None, + ) { + Ok(data) => Ok(Some(realloc_array(&data, lgbytes))), + Err(e) => Err(TryFromProtoError::InvalidFieldError(format!( + "Unable to decode binary array from repeated proto fields: {e:?}" + ))), + } }; - let ret = ColumnarRecords::new( - binary_array(proto.key_data, proto.key_offsets)?, - binary_array(proto.val_data, proto.val_offsets)?, - realloc_array(&proto.timestamps.into(), lgbytes), - realloc_array(&proto.diffs.into(), lgbytes), - ); + let codec_key = binary_array(proto.key_data, proto.key_offsets)?; + let codec_val = binary_array(proto.val_data, proto.val_offsets)?; + + let timestamps = realloc_array(&proto.timestamps.into(), lgbytes); + let diffs = realloc_array(&proto.diffs.into(), lgbytes); let ext = ColumnarRecordsStructuredExt::from_proto(proto.key_structured, proto.val_structured)?; - let updates = match ext { - None => Self::Row(ret), - Some(ext) => Self::Both(ret, ext), + let updates = match (codec_key, codec_val, ext) { + (Some(codec_key), Some(codec_val), Some(ext)) => BlobTraceUpdates::Both( + ColumnarRecords::new(codec_key, codec_val, timestamps, diffs), + ext, + ), + (Some(codec_key), Some(codec_val), None) => BlobTraceUpdates::Row( + ColumnarRecords::new(codec_key, codec_val, timestamps, diffs), + ), + (None, None, Some(ext)) => BlobTraceUpdates::Structured { + key_values: ext, + timestamps, + diffs, + }, + (k, v, ext) => { + return Err(TryFromProtoError::InvalidPersistState(format!( + "unexpected mix of key/value columns: k={:?}, v={}, ext={}", + k.is_some(), + v.is_some(), + ext.is_some(), + ))) + } }; Ok(updates) @@ -488,6 +515,16 @@ impl BlobTraceUpdates { .clone(), ) } + BatchColumnarFormat::Structured => { + let mut this = self.clone(); + Self::Structured { + key_values: this + .get_or_make_structured::(key_schema, val_schema) + .clone(), + timestamps: this.timestamps().clone(), + diffs: this.diffs().clone(), + } + } } } } @@ -686,9 +723,9 @@ pub fn encode_trace_inline_meta(batch: &BlobTraceBatchPa let metadata = ProtoFormatMetadata::StructuredMigration(2); (ProtoBatchFormat::ParquetStructured, Some(metadata)) } - BlobTraceUpdates::Structured { .. } => { - unimplemented!("codec data should exist before reaching parquet encoding") + let metadata = ProtoFormatMetadata::StructuredMigration(3); + (ProtoBatchFormat::ParquetStructured, Some(metadata)) } }; diff --git a/src/persist/src/lib.rs b/src/persist/src/lib.rs index 3d9c94fc49eeb..3a5ce218d39cd 100644 --- a/src/persist/src/lib.rs +++ b/src/persist/src/lib.rs @@ -17,6 +17,7 @@ clippy::clone_on_ref_ptr )] +pub mod azure; pub mod cfg; pub mod error; pub mod file; diff --git a/src/persist/src/postgres.rs b/src/persist/src/postgres.rs index c15a461189a26..ec8f9b9db94c7 100644 --- a/src/persist/src/postgres.rs +++ b/src/persist/src/postgres.rs @@ -22,6 +22,7 @@ use deadpool_postgres::tokio_postgres::types::{to_sql_checked, FromSql, IsNull, use deadpool_postgres::tokio_postgres::Config; use deadpool_postgres::{Object, PoolError}; use futures_util::StreamExt; +use mz_dyncfg::ConfigSet; use mz_ore::cast::CastFrom; use mz_ore::metrics::MetricsRegistry; use mz_ore::url::SensitiveUrl; @@ -34,6 +35,14 @@ use tracing::{info, warn}; use crate::error::Error; use crate::location::{CaSResult, Consensus, ExternalError, ResultStream, SeqNo, VersionedData}; +/// Flag to use concensus queries that are tuned for vanilla Postgres. +pub const USE_POSTGRES_TUNED_QUERIES: mz_dyncfg::Config = mz_dyncfg::Config::new( + "persist_use_postgres_tuned_queries", + false, + "Use a set of queries for consensus that have specifically been tuned against + Postgres to ensure we acquire a minimal number of locks.", +); + const SCHEMA: &str = " CREATE TABLE IF NOT EXISTS consensus ( shard text NOT NULL, @@ -101,6 +110,7 @@ pub struct PostgresConsensusConfig { url: SensitiveUrl, knobs: Arc, metrics: PostgresClientMetrics, + dyncfg: Arc, } impl From for PostgresClientConfig { @@ -118,11 +128,13 @@ impl PostgresConsensusConfig { url: &SensitiveUrl, knobs: Box, metrics: PostgresClientMetrics, + dyncfg: Arc, ) -> Result { Ok(PostgresConsensusConfig { url: url.clone(), knobs: Arc::from(knobs), metrics, + dyncfg, }) } @@ -175,18 +187,31 @@ impl PostgresConsensusConfig { } } + let dyncfg = ConfigSet::default().add(&USE_POSTGRES_TUNED_QUERIES); let config = PostgresConsensusConfig::new( &url, Box::new(TestConsensusKnobs), PostgresClientMetrics::new(&MetricsRegistry::new(), "mz_persist"), + Arc::new(dyncfg), )?; Ok(Some(config)) } } +/// What flavor of Postgres are we connected to for consensus. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PostgresMode { + /// CockroachDB, used in our cloud offering. + CockroachDB, + /// Vanilla Postgres, the default for our self-hosted offering. + Postgres, +} + /// Implementation of [Consensus] over a Postgres database. pub struct PostgresConsensus { postgres_client: PostgresClient, + dyncfg: Arc, + mode: PostgresMode, } impl std::fmt::Debug for PostgresConsensus { @@ -207,39 +232,51 @@ impl PostgresConsensus { escape_identifier(role), ); + let dyncfg = Arc::clone(&config.dyncfg); let postgres_client = PostgresClient::open(config.into())?; let client = postgres_client.get_connection().await?; - let crdb_mode = match client + let mode = match client .batch_execute(&format!( "{}; {}{}; {};", create_schema, SCHEMA, CRDB_SCHEMA_OPTIONS, CRDB_CONFIGURE_ZONE, )) .await { - Ok(()) => true, + Ok(()) => PostgresMode::CockroachDB, Err(e) if e.code() == Some(&SqlState::INSUFFICIENT_PRIVILEGE) => { - warn!("unable to ALTER TABLE consensus, this is expected and OK when connecting with a read-only user"); - true + warn!( + "unable to ALTER TABLE consensus, this is expected and OK when connecting with a read-only user" + ); + PostgresMode::CockroachDB } + // Vanilla Postgres doesn't support the Cockroach zone configuration + // that we attempted, so we use that to determine what mode we're in. Err(e) if e.code() == Some(&SqlState::INVALID_PARAMETER_VALUE) || e.code() == Some(&SqlState::SYNTAX_ERROR) => { - info!("unable to initiate consensus with CRDB params, this is expected and OK when running against Postgres: {:?}", e); - false + info!( + "unable to initiate consensus with CRDB params, this is expected and OK when running against Postgres: {:?}", + e + ); + PostgresMode::Postgres } Err(e) => return Err(e.into()), }; - if !crdb_mode { + if mode != PostgresMode::CockroachDB { client .batch_execute(&format!("{}; {};", create_schema, SCHEMA)) .await?; } - Ok(PostgresConsensus { postgres_client }) + Ok(PostgresConsensus { + postgres_client, + dyncfg, + mode, + }) } /// Drops and recreates the `consensus` table in Postgres @@ -338,19 +375,45 @@ impl Consensus for PostgresConsensus { } let result = if let Some(expected) = expected { - // This query has been written to execute within a single - // network round-trip. The insert performance has been tuned - // against CockroachDB, ensuring it goes through the fast-path - // 1-phase commit of CRDB. Any changes to this query should - // confirm an EXPLAIN ANALYZE (VERBOSE) query plan contains - // `auto commit` - let q = r#" + /// This query has been written to execute within a single + /// network round-trip. The insert performance has been tuned + /// against CockroachDB, ensuring it goes through the fast-path + /// 1-phase commit of CRDB. Any changes to this query should + /// confirm an EXPLAIN ANALYZE (VERBOSE) query plan contains + /// `auto commit` + static CRDB_CAS_QUERY: &str = " INSERT INTO consensus (shard, sequence_number, data) SELECT $1, $2, $3 WHERE (SELECT sequence_number FROM consensus WHERE shard = $1 ORDER BY sequence_number DESC LIMIT 1) = $4; - "#; + "; + + /// This query has been written to ensure we only get row level + /// locks on the `(shard, seq_no)` we're trying to update. The insert + /// performance has been tuned against Postgres 15 to ensure it + /// minimizes possible serialization conflicts. + static POSTGRES_CAS_QUERY: &str = " + WITH last_seq AS ( + SELECT sequence_number FROM consensus + WHERE shard = $1 + ORDER BY sequence_number DESC + LIMIT 1 + FOR UPDATE + ) + INSERT INTO consensus (shard, sequence_number, data) + SELECT $1, $2, $3 + FROM last_seq + WHERE last_seq.sequence_number = $4; + "; + + let q = if USE_POSTGRES_TUNED_QUERIES.get(&self.dyncfg) + && self.mode == PostgresMode::Postgres + { + POSTGRES_CAS_QUERY + } else { + CRDB_CAS_QUERY + }; let client = self.get_connection().await?; let statement = client.prepare_cached(q).await?; client @@ -414,12 +477,54 @@ impl Consensus for PostgresConsensus { } async fn truncate(&self, key: &str, seqno: SeqNo) -> Result { - let q = "DELETE FROM consensus - WHERE shard = $1 AND sequence_number < $2 AND - EXISTS( - SELECT * FROM consensus WHERE shard = $1 AND sequence_number >= $2 - )"; - + static CRDB_TRUNCATE_QUERY: &str = " + DELETE FROM consensus + WHERE shard = $1 AND sequence_number < $2 AND + EXISTS ( + SELECT * FROM consensus WHERE shard = $1 AND sequence_number >= $2 + ) + "; + + /// This query has been specifically tuned to ensure we get the minimal + /// number of __row__ locks possible, and that it doesn't conflict with + /// concurrently running compare and swap operations that are trying to + /// evolve the shard. + /// + /// It's performance has been benchmarked against Postgres 15. + /// + /// Note: The `ORDER BY` in the newer_exists CTE exists so we obtain a + /// row lock on the lowest possible sequence number. This ensures + /// minimal conflict between concurrently running truncate and append + /// operations. + static POSTGRES_TRUNCATE_QUERY: &str = " + WITH newer_exists AS ( + SELECT * FROM consensus + WHERE shard = $1 + AND sequence_number >= $2 + ORDER BY sequence_number ASC + LIMIT 1 + FOR UPDATE + ), + to_lock AS ( + SELECT ctid FROM consensus + WHERE shard = $1 + AND sequence_number < $2 + AND EXISTS (SELECT * FROM newer_exists) + ORDER BY sequence_number DESC + FOR UPDATE + ) + DELETE FROM consensus + USING to_lock + WHERE consensus.ctid = to_lock.ctid; + "; + + let q = if USE_POSTGRES_TUNED_QUERIES.get(&self.dyncfg) + && self.mode == PostgresMode::Postgres + { + POSTGRES_TRUNCATE_QUERY + } else { + CRDB_TRUNCATE_QUERY + }; let result = { let client = self.get_connection().await?; let statement = client.prepare_cached(q).await?; diff --git a/src/persist/src/s3.rs b/src/persist/src/s3.rs index 52199c18223e8..9c63eefd5bf38 100644 --- a/src/persist/src/s3.rs +++ b/src/persist/src/s3.rs @@ -276,8 +276,7 @@ impl S3BlobConfig { Arc::new( ConfigSet::default() .add(&ENABLE_S3_LGALLOC_CC_SIZES) - .add(&ENABLE_S3_LGALLOC_NONCC_SIZES) - .add(&ENABLE_ONE_ALLOC_PER_REQUEST), + .add(&ENABLE_S3_LGALLOC_NONCC_SIZES), ), ) .await?; @@ -345,12 +344,6 @@ pub(crate) const ENABLE_S3_LGALLOC_NONCC_SIZES: Config = Config::new( "A feature flag to enable copying fetched s3 data into lgalloc on non-cc sized clusters.", ); -pub(crate) const ENABLE_ONE_ALLOC_PER_REQUEST: Config = Config::new( - "persist_enable_one_alloc_per_request", - true, - "An incident flag to disable making only one lgalloc allocation per multi-part request.", -); - #[async_trait] impl Blob for S3Blob { async fn get(&self, key: &str) -> Result, ExternalError> { @@ -485,25 +478,18 @@ impl Blob for S3Blob { ENABLE_S3_LGALLOC_NONCC_SIZES.get(&self.cfg) }; - // Ideally we write all of the copy all of the bytes into a - // single allocation, but we retain a CYA fallback case. - let enable_one_allocation = - ENABLE_ONE_ALLOC_PER_REQUEST.get(&self.cfg) && enable_s3_lgalloc; + // Copy all of the bytes off the network and into a single allocation. let mut buffer = match object.content_length() { Some(len @ 1..) => { - if enable_one_allocation { - let len: u64 = len.try_into().expect("positive integer"); - // N.B. `lgalloc` cannot reallocate so we need to make sure the initial - // allocation is large enough to fit then entire blob. - let buf: MetricsRegion = self - .metrics - .lgbytes - .persist_s3 - .new_region(usize::cast_from(len)); - Some(buf) - } else { - None - } + let len: u64 = len.try_into().expect("positive integer"); + // N.B. `lgalloc` cannot reallocate so we need to make sure the initial + // allocation is large enough to fit then entire blob. + let buf: MetricsRegion = self + .metrics + .lgbytes + .persist_s3 + .new_region(usize::cast_from(len)); + Some(buf) } // content-length of 0 isn't necessarily invalid. Some(len @ ..=-1) => { @@ -1119,8 +1105,7 @@ mod tests { cfg: Arc::new( ConfigSet::default() .add(&ENABLE_S3_LGALLOC_CC_SIZES) - .add(&ENABLE_S3_LGALLOC_NONCC_SIZES) - .add(&ENABLE_ONE_ALLOC_PER_REQUEST), + .add(&ENABLE_S3_LGALLOC_NONCC_SIZES), ), is_cc_active: true, }; diff --git a/src/repr/src/optimize.rs b/src/repr/src/optimize.rs index 364cfdd502e1c..26a9c5129efcb 100644 --- a/src/repr/src/optimize.rs +++ b/src/repr/src/optimize.rs @@ -118,6 +118,10 @@ optimizer_feature_flags!({ reoptimize_imported_views: bool, // See the feature flag of the same name. enable_reduce_reduction: bool, + // See the feature flag of the same name. + enable_let_prefix_extraction: bool, + // See the feature flag of the same name. + enable_projection_pushdown_after_relation_cse: bool, }); /// A trait used to implement layered config construction. diff --git a/src/service/src/local.rs b/src/service/src/local.rs index fa839a3e9db01..1a44d31a133ab 100644 --- a/src/service/src/local.rs +++ b/src/service/src/local.rs @@ -20,7 +20,8 @@ use tokio::sync::mpsc::UnboundedReceiver; use crate::client::{GenericClient, Partitionable, Partitioned}; -pub trait Activatable { +/// A trait for types that can be used to activate threads. +pub trait Activatable: fmt::Debug + Send { fn activate(&self); } @@ -36,23 +37,52 @@ impl Activatable for Thread { } } +/// An activator for a thread. +/// +/// This wraps any `Activatable` and has a `Drop` impl to ensure the thread is always activated +/// when the activator is dropped. This is important to ensure workers have a chance to observe +/// that their command channel has closed and prepare for reconnection. +#[derive(Debug)] +pub struct LocalActivator { + inner: Box, +} + +impl LocalActivator { + pub fn new(inner: A) -> Self { + Self { + inner: Box::new(inner), + } + } + + fn activate(&self) { + self.inner.activate(); + } +} + +impl Drop for LocalActivator { + fn drop(&mut self) { + self.inner.activate(); + } +} + /// A client to a thread in the same process. /// /// The thread is unparked on every call to [`send`](LocalClient::send) and on /// `Drop`. #[derive(Debug)] -pub struct LocalClient { +pub struct LocalClient { + // Order is important here: We need to drop the `tx` before the activator so when the thread is + // unparked by the dropping of the activator it can observed that the sender has disconnected. rx: UnboundedReceiver, tx: Sender, - tx_activator: A, + tx_activator: LocalActivator, } #[async_trait] -impl GenericClient for LocalClient +impl GenericClient for LocalClient where C: fmt::Debug + Send, R: fmt::Debug + Send, - A: fmt::Debug + Activatable + Send, { async fn send(&mut self, cmd: C) -> Result<(), anyhow::Error> { self.tx @@ -75,9 +105,9 @@ where } } -impl LocalClient { +impl LocalClient { /// Create a new instance of [`LocalClient`] from its parts. - pub fn new(rx: UnboundedReceiver, tx: Sender, tx_activator: A) -> Self { + pub fn new(rx: UnboundedReceiver, tx: Sender, tx_activator: LocalActivator) -> Self { Self { rx, tx, @@ -89,7 +119,7 @@ impl LocalClient { pub fn new_partitioned( rxs: Vec>, txs: Vec>, - tx_activators: Vec, + tx_activators: Vec, ) -> Partitioned where (C, R): Partitionable, @@ -103,16 +133,3 @@ impl LocalClient { Partitioned::new(clients) } } - -// We implement `Drop` so that we can wake each of the threads and have them -// notice the drop. -impl Drop for LocalClient { - fn drop(&mut self) { - // Drop the thread handle. - let (tx, _rx) = crossbeam_channel::unbounded(); - self.tx = tx; - // Unpark the thread once the handle is dropped, so that it can observe - // the emptiness. - self.tx_activator.activate(); - } -} diff --git a/src/sql-lexer/src/keywords.txt b/src/sql-lexer/src/keywords.txt index d58bf1815a484..76c7db5ccee74 100644 --- a/src/sql-lexer/src/keywords.txt +++ b/src/sql-lexer/src/keywords.txt @@ -29,6 +29,7 @@ Add Added Address Addresses +After Aggregate Aggregation Aligned @@ -110,6 +111,7 @@ Createnetworkpolicy Createrole Creation Cross +Cse Csv Current Cursor @@ -336,6 +338,7 @@ Primary Privatelink Privileges Progress +Projection Protobuf Protocol Public @@ -362,6 +365,7 @@ Refresh Regex Region Registry +Relation Rename Reoptimize Repeatable diff --git a/src/sql-parser/src/ast/defs/expr.rs b/src/sql-parser/src/ast/defs/expr.rs index ecf6dfb7064fd..432a07813f17f 100644 --- a/src/sql-parser/src/ast/defs/expr.rs +++ b/src/sql-parser/src/ast/defs/expr.rs @@ -321,34 +321,7 @@ impl AstDisplay for Expr { } } Expr::Cast { expr, data_type } => { - // We are potentially rewriting an expression like - // CAST( OP AS ) - // to - // OP :: - // which could incorrectly change the meaning of the expression - // as the `::` binds tightly. To be safe, we wrap the inner - // expression in parentheses - // ( OP ):: - // unless the inner expression is of a type that we know is - // safe to follow with a `::` to without wrapping. - let needs_wrap = !matches!( - **expr, - Expr::Nested(_) - | Expr::Value(_) - | Expr::Cast { .. } - | Expr::Function { .. } - | Expr::Identifier { .. } - | Expr::Collate { .. } - | Expr::HomogenizingFunction { .. } - | Expr::NullIf { .. } - ); - if needs_wrap { - f.write_str('('); - } f.write_node(&expr); - if needs_wrap { - f.write_str(')'); - } f.write_str("::"); f.write_node(data_type); } diff --git a/src/sql-parser/src/ast/defs/statement.rs b/src/sql-parser/src/ast/defs/statement.rs index 8891a63dd9494..0e09fb978f843 100644 --- a/src/sql-parser/src/ast/defs/statement.rs +++ b/src/sql-parser/src/ast/defs/statement.rs @@ -2331,6 +2331,7 @@ pub enum ClusterFeatureName { EnableEagerDeltaJoins, EnableVariadicLeftJoinLowering, EnableLetrecFixpointAnalysis, + EnableProjectionPushdownAfterRelationCse, } impl WithOptionName for ClusterFeatureName { @@ -2345,7 +2346,8 @@ impl WithOptionName for ClusterFeatureName { | Self::EnableNewOuterJoinLowering | Self::EnableEagerDeltaJoins | Self::EnableVariadicLeftJoinLowering - | Self::EnableLetrecFixpointAnalysis => false, + | Self::EnableLetrecFixpointAnalysis + | Self::EnableProjectionPushdownAfterRelationCse => false, } } } @@ -3552,98 +3554,134 @@ impl AstDisplay for ShowColumnsStatement { } impl_display_t!(ShowColumnsStatement); -/// `SHOW CREATE VIEW ` +/// `SHOW [REDACTED] CREATE VIEW ` #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ShowCreateViewStatement { pub view_name: T::ItemName, + pub redacted: bool, } impl AstDisplay for ShowCreateViewStatement { fn fmt(&self, f: &mut AstFormatter) { - f.write_str("SHOW CREATE VIEW "); + f.write_str("SHOW "); + if self.redacted { + f.write_str("REDACTED "); + } + f.write_str("CREATE VIEW "); f.write_node(&self.view_name); } } impl_display_t!(ShowCreateViewStatement); -/// `SHOW CREATE MATERIALIZED VIEW ` +/// `SHOW [REDACTED] CREATE MATERIALIZED VIEW ` #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ShowCreateMaterializedViewStatement { pub materialized_view_name: T::ItemName, + pub redacted: bool, } impl AstDisplay for ShowCreateMaterializedViewStatement { fn fmt(&self, f: &mut AstFormatter) { - f.write_str("SHOW CREATE MATERIALIZED VIEW "); + f.write_str("SHOW "); + if self.redacted { + f.write_str("REDACTED "); + } + f.write_str("CREATE MATERIALIZED VIEW "); f.write_node(&self.materialized_view_name); } } impl_display_t!(ShowCreateMaterializedViewStatement); -/// `SHOW CREATE SOURCE ` +/// `SHOW [REDACTED] CREATE SOURCE ` #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ShowCreateSourceStatement { pub source_name: T::ItemName, + pub redacted: bool, } impl AstDisplay for ShowCreateSourceStatement { fn fmt(&self, f: &mut AstFormatter) { - f.write_str("SHOW CREATE SOURCE "); + f.write_str("SHOW "); + if self.redacted { + f.write_str("REDACTED "); + } + f.write_str("CREATE SOURCE "); f.write_node(&self.source_name); } } impl_display_t!(ShowCreateSourceStatement); -/// `SHOW CREATE TABLE ` +/// `SHOW [REDACTED] CREATE TABLE
` #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ShowCreateTableStatement { pub table_name: T::ItemName, + pub redacted: bool, } impl AstDisplay for ShowCreateTableStatement { fn fmt(&self, f: &mut AstFormatter) { - f.write_str("SHOW CREATE TABLE "); + f.write_str("SHOW "); + if self.redacted { + f.write_str("REDACTED "); + } + f.write_str("CREATE TABLE "); f.write_node(&self.table_name); } } impl_display_t!(ShowCreateTableStatement); -/// `SHOW CREATE SINK ` +/// `SHOW [REDACTED] CREATE SINK ` #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ShowCreateSinkStatement { pub sink_name: T::ItemName, + pub redacted: bool, } impl AstDisplay for ShowCreateSinkStatement { fn fmt(&self, f: &mut AstFormatter) { - f.write_str("SHOW CREATE SINK "); + f.write_str("SHOW "); + if self.redacted { + f.write_str("REDACTED "); + } + f.write_str("CREATE SINK "); f.write_node(&self.sink_name); } } impl_display_t!(ShowCreateSinkStatement); -/// `SHOW CREATE INDEX ` +/// `SHOW [REDACTED] CREATE INDEX ` #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ShowCreateIndexStatement { pub index_name: T::ItemName, + pub redacted: bool, } impl AstDisplay for ShowCreateIndexStatement { fn fmt(&self, f: &mut AstFormatter) { - f.write_str("SHOW CREATE INDEX "); + f.write_str("SHOW "); + if self.redacted { + f.write_str("REDACTED "); + } + f.write_str("CREATE INDEX "); f.write_node(&self.index_name); } } impl_display_t!(ShowCreateIndexStatement); +/// `SHOW [REDACTED] CREATE CONNECTION ` #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ShowCreateConnectionStatement { pub connection_name: T::ItemName, + pub redacted: bool, } impl AstDisplay for ShowCreateConnectionStatement { fn fmt(&self, f: &mut AstFormatter) { - f.write_str("SHOW CREATE CONNECTION "); + f.write_str("SHOW "); + if self.redacted { + f.write_str("REDACTED "); + } + f.write_str("CREATE CONNECTION "); f.write_node(&self.connection_name); } } @@ -3892,6 +3930,7 @@ pub enum ExplainPlanOptionName { EnableEagerDeltaJoins, EnableVariadicLeftJoinLowering, EnableLetrecFixpointAnalysis, + EnableProjectionPushdownAfterRelationCse, } impl WithOptionName for ExplainPlanOptionName { @@ -3926,7 +3965,8 @@ impl WithOptionName for ExplainPlanOptionName { | Self::EnableNewOuterJoinLowering | Self::EnableEagerDeltaJoins | Self::EnableVariadicLeftJoinLowering - | Self::EnableLetrecFixpointAnalysis => false, + | Self::EnableLetrecFixpointAnalysis + | Self::EnableProjectionPushdownAfterRelationCse => false, } } } diff --git a/src/sql-parser/src/ast/display.rs b/src/sql-parser/src/ast/display.rs index 4cce93217e4e4..d205588246b75 100644 --- a/src/sql-parser/src/ast/display.rs +++ b/src/sql-parser/src/ast/display.rs @@ -67,12 +67,15 @@ where } /// Describes the context in which to print an AST. +/// +/// TODO: Currently, only the simple format can be redacted, but, ideally, whether it's redacted and +/// whether it's stable would be orthogonal settings. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum FormatMode { /// Simple is the normal way of printing for human consumption. Identifiers are quoted only if - /// necessary and sensative information is redacted. + /// necessary and sensitive information is not redacted. Simple, - /// SimpleRedacted is like Simple, but strips out string and number literals. + /// SimpleRedacted is like Simple, but strips out literals, e.g. strings and numbers. /// This makes SQL queries be "usage data", rather than "customer data" according to our /// data management policy, allowing us to introspect it. SimpleRedacted, diff --git a/src/sql-parser/src/lib.rs b/src/sql-parser/src/lib.rs index 7cde030f3f814..dc90d4ef154d3 100644 --- a/src/sql-parser/src/lib.rs +++ b/src/sql-parser/src/lib.rs @@ -94,6 +94,17 @@ pub fn datadriven_testcase(tc: &datadriven::TestCase) -> String { ); } } + + // Also check that the redacted version of the statement can be reparsed. This is + // important so that we are still able to pretty-print redacted statements, which + // helps during debugging. + let redacted = stmt.to_ast_string_redacted(); + let res = parser::parse_statements(&redacted); + assert!( + res.is_ok(), + "redacted statement could not be reparsed: {res:?}\noriginal:\n{stmt}\nredacted:\n{redacted}" + ); + if tc.args.contains_key("roundtrip") { format!("{}\n", stmt) } else { diff --git a/src/sql-parser/src/parser.rs b/src/sql-parser/src/parser.rs index 86cfb96f3fe3c..4f7757df02f91 100644 --- a/src/sql-parser/src/parser.rs +++ b/src/sql-parser/src/parser.rs @@ -888,10 +888,40 @@ impl<'a> Parser<'a> { self.expect_keyword(AS)?; let data_type = self.parse_data_type()?; self.expect_token(&Token::RParen)?; - Ok(Expr::Cast { - expr: Box::new(expr), - data_type, - }) + // We are potentially rewriting an expression like + // CAST( OP AS ) + // to + // OP :: + // (because we print Expr::Cast always as a Postgres-style cast, i.e. `::`) + // which could incorrectly change the meaning of the expression + // as the `::` binds tightly. To be safe, we wrap the inner + // expression in parentheses + // ( OP ):: + // unless the inner expression is of a kind that we know is + // safe to follow with a `::` without wrapping. + if !matches!( + expr, + Expr::Nested(_) + | Expr::Value(_) + | Expr::Cast { .. } + | Expr::Function { .. } + | Expr::Identifier { .. } + | Expr::Collate { .. } + | Expr::HomogenizingFunction { .. } + | Expr::NullIf { .. } + | Expr::Subquery { .. } + | Expr::Parameter(..) + ) { + Ok(Expr::Cast { + expr: Box::new(Expr::Nested(Box::new(expr))), + data_type, + }) + } else { + Ok(Expr::Cast { + expr: Box::new(expr), + data_type, + }) + } } /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. @@ -1299,10 +1329,21 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RBracket)?; } - Ok(Expr::Subscript { - expr: Box::new(expr), - positions, - }) + // If the expression that is being cast can end with a type name, then let's parenthesize + // it. Otherwise, the `[...]` would melt into the type name (making it an array type). + // Specifically, the only expressions whose printing can end with a type name are casts, so + // check for that. + if matches!(expr, Expr::Cast { .. }) { + Ok(Expr::Subscript { + expr: Box::new(Expr::Nested(Box::new(expr))), + positions, + }) + } else { + Ok(Expr::Subscript { + expr: Box::new(expr), + positions, + }) + } } // Parse calls to substring(), which can take the form: @@ -7635,6 +7676,14 @@ impl<'a> Parser<'a> { } fn parse_show(&mut self) -> Result, ParserError> { + let redacted = self.parse_keyword(REDACTED); + if redacted && !self.peek_keyword(CREATE) { + return parser_err!( + self, + self.peek_pos(), + "SHOW REDACTED is only supported for SHOW REDACTED CREATE ..." + ); + } if self.parse_one_of_keywords(&[COLUMNS, FIELDS]).is_some() { self.parse_show_columns() } else if self.parse_keyword(OBJECTS) { @@ -7775,36 +7824,50 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[CREATE, VIEW]) { Ok(ShowStatement::ShowCreateView(ShowCreateViewStatement { view_name: self.parse_raw_name()?, + redacted, })) } else if self.parse_keywords(&[CREATE, MATERIALIZED, VIEW]) { Ok(ShowStatement::ShowCreateMaterializedView( ShowCreateMaterializedViewStatement { materialized_view_name: self.parse_raw_name()?, + redacted, }, )) } else if self.parse_keywords(&[CREATE, SOURCE]) { Ok(ShowStatement::ShowCreateSource(ShowCreateSourceStatement { source_name: self.parse_raw_name()?, + redacted, })) } else if self.parse_keywords(&[CREATE, TABLE]) { Ok(ShowStatement::ShowCreateTable(ShowCreateTableStatement { table_name: self.parse_raw_name()?, + redacted, })) } else if self.parse_keywords(&[CREATE, SINK]) { Ok(ShowStatement::ShowCreateSink(ShowCreateSinkStatement { sink_name: self.parse_raw_name()?, + redacted, })) } else if self.parse_keywords(&[CREATE, INDEX]) { Ok(ShowStatement::ShowCreateIndex(ShowCreateIndexStatement { index_name: self.parse_raw_name()?, + redacted, })) } else if self.parse_keywords(&[CREATE, CONNECTION]) { Ok(ShowStatement::ShowCreateConnection( ShowCreateConnectionStatement { connection_name: self.parse_raw_name()?, + redacted, }, )) } else if self.parse_keywords(&[CREATE, CLUSTER]) { + if redacted { + return parser_err!( + self, + self.peek_prev_pos(), + "SHOW REDACTED CREATE CLUSTER is not supported" + ); + } Ok(ShowStatement::ShowCreateCluster( ShowCreateClusterStatement { cluster_name: RawClusterName::Unresolved(self.parse_identifier()?), diff --git a/src/sql-parser/tests/sqlparser_common.rs b/src/sql-parser/tests/sqlparser_common.rs index 21fd6752da6d0..367ba4ae0e30b 100644 --- a/src/sql-parser/tests/sqlparser_common.rs +++ b/src/sql-parser/tests/sqlparser_common.rs @@ -37,33 +37,10 @@ use mz_sql_parser::parser::{ #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux` fn datadriven() { walk("tests/testdata", |f| { - f.run(|tc| -> String { - if tc.directive == "parse-statement" { - // Verify that redacted statements can be parsed. This is important so that we are - // still able to pretty-print redacted statements which helps out during debugging. - verify_parse_redacted(&tc.input); - } - datadriven_testcase(tc) - }) + f.run(|tc| -> String { datadriven_testcase(tc) }) }); } -fn verify_parse_redacted(stmt: &str) { - let stmt = match parse_statements(stmt) { - Ok(stmt) => match stmt.into_iter().next() { - Some(stmt) => stmt.ast, - None => return, - }, - Err(_) => return, - }; - let redacted = stmt.to_ast_string_redacted(); - let res = parse_statements(&redacted); - assert!( - res.is_ok(), - "redacted statement could not be parsed: {res:?}\noriginal:\n{stmt}\nredacted:\n{redacted}" - ); -} - #[mz_ore::test] #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux` fn op_precedence() -> Result<(), Box> { diff --git a/src/sql-parser/tests/testdata/select b/src/sql-parser/tests/testdata/select index 55a58bcbf91e4..cf7398b0505da 100644 --- a/src/sql-parser/tests/testdata/select +++ b/src/sql-parser/tests/testdata/select @@ -1453,6 +1453,137 @@ SELECT MAP[1 => a, b => 1 + 2, a || 'b' => (SELECT 1), c => LIST[4, 5]] => Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Map([MapEntry { key: Value(Number("1")), value: Identifier([Ident("a")]) }, MapEntry { key: Identifier([Ident("b")]), value: Op { op: Op { namespace: None, op: "+" }, expr1: Value(Number("1")), expr2: Some(Value(Number("2"))) } }, MapEntry { key: Op { op: Op { namespace: None, op: "||" }, expr1: Identifier([Ident("a")]), expr2: Some(Value(String("b"))) }, value: Subquery(Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Value(Number("1")), alias: None }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }) }, MapEntry { key: Identifier([Ident("c")]), value: List([Value(Number("4")), Value(Number("5"))]) }]), alias: None }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) +# While maybe unexpected, list[] gets parsed as an array type. + +parse-statement +SELECT foo::uuid list[2] FROM fake_table; +---- +SELECT foo::uuid list[] FROM fake_table +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Cast { expr: Identifier([Ident("foo")]), data_type: Array(List(Other { name: Name(UnresolvedItemName([Ident("uuid")])), typ_mod: [] })) }, alias: None }], from: [TableWithJoins { relation: Table { name: Name(UnresolvedItemName([Ident("fake_table")])), alias: None }, joins: [] }], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +# Subscripting after a cast. The danger here is that if the cast is to list or array, then the subscript could melt into +# the type as if it were creating an array type if the parser and/or AST printer is not careful. + +parse-statement +SELECT CAST(my_json AS uuid list)[my_index] FROM fake_table; +---- +SELECT (my_json::uuid list)[my_index] FROM fake_table +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Subscript { expr: Nested(Cast { expr: Identifier([Ident("my_json")]), data_type: List(Other { name: Name(UnresolvedItemName([Ident("uuid")])), typ_mod: [] }) }), positions: [SubscriptPosition { start: Some(Identifier([Ident("my_index")])), end: None, explicit_slice: false }] }, alias: None }], from: [TableWithJoins { relation: Table { name: Name(UnresolvedItemName([Ident("fake_table")])), alias: None }, joins: [] }], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +parse-statement +SELECT CAST(my_json AS uuid[])[my_index] FROM fake_table; +---- +SELECT (my_json::uuid[])[my_index] FROM fake_table +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Subscript { expr: Nested(Cast { expr: Identifier([Ident("my_json")]), data_type: Array(Other { name: Name(UnresolvedItemName([Ident("uuid")])), typ_mod: [] }) }), positions: [SubscriptPosition { start: Some(Identifier([Ident("my_index")])), end: None, explicit_slice: false }] }, alias: None }], from: [TableWithJoins { relation: Table { name: Name(UnresolvedItemName([Ident("fake_table")])), alias: None }, joins: [] }], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +# This would not plan, because we are subscripting into a uuid, but it's still nice if it roundtrips through the parser. +parse-statement +SELECT CAST(my_json AS uuid)[my_index] FROM fake_table; +---- +SELECT (my_json::uuid)[my_index] FROM fake_table +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Subscript { expr: Nested(Cast { expr: Identifier([Ident("my_json")]), data_type: Other { name: Name(UnresolvedItemName([Ident("uuid")])), typ_mod: [] } }), positions: [SubscriptPosition { start: Some(Identifier([Ident("my_index")])), end: None, explicit_slice: false }] }, alias: None }], from: [TableWithJoins { relation: Table { name: Name(UnresolvedItemName([Ident("fake_table")])), alias: None }, joins: [] }], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +# Same as above, but with Postgres-style casts. + +parse-statement +SELECT (my_json::uuid list)[my_index] FROM fake_table; +---- +SELECT (my_json::uuid list)[my_index] FROM fake_table +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Subscript { expr: Nested(Cast { expr: Identifier([Ident("my_json")]), data_type: List(Other { name: Name(UnresolvedItemName([Ident("uuid")])), typ_mod: [] }) }), positions: [SubscriptPosition { start: Some(Identifier([Ident("my_index")])), end: None, explicit_slice: false }] }, alias: None }], from: [TableWithJoins { relation: Table { name: Name(UnresolvedItemName([Ident("fake_table")])), alias: None }, joins: [] }], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +parse-statement +SELECT (my_json::uuid[])[my_index] FROM fake_table; +---- +SELECT (my_json::uuid[])[my_index] FROM fake_table +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Subscript { expr: Nested(Cast { expr: Identifier([Ident("my_json")]), data_type: Array(Other { name: Name(UnresolvedItemName([Ident("uuid")])), typ_mod: [] }) }), positions: [SubscriptPosition { start: Some(Identifier([Ident("my_index")])), end: None, explicit_slice: false }] }, alias: None }], from: [TableWithJoins { relation: Table { name: Name(UnresolvedItemName([Ident("fake_table")])), alias: None }, joins: [] }], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +# This would not plan, because we are subscripting into a uuid, but it's still nice if it roundtrips through the parser. +parse-statement +SELECT (my_json::uuid)[my_index] FROM fake_table; +---- +SELECT (my_json::uuid)[my_index] FROM fake_table +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Subscript { expr: Nested(Cast { expr: Identifier([Ident("my_json")]), data_type: Other { name: Name(UnresolvedItemName([Ident("uuid")])), typ_mod: [] } }), positions: [SubscriptPosition { start: Some(Identifier([Ident("my_index")])), end: None, explicit_slice: false }] }, alias: None }], from: [TableWithJoins { relation: Table { name: Name(UnresolvedItemName([Ident("fake_table")])), alias: None }, joins: [] }], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +# Casts are always printed as Postgres-style casts, that is, with `::`. When going from `CAST` to `::`, we have to avoid +# the daner of `::` binding to just part of the expression that was originally being cast. E.g. rewriting +# CAST(a + b AS integer) +# to +# a + b::integer +# without adding a parenthesis would be bad. +parse-statement +SELECT CAST(a + b AS integer) FROM fake_table; +---- +SELECT (a + b)::int4 FROM fake_table +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Cast { expr: Nested(Op { op: Op { namespace: None, op: "+" }, expr1: Identifier([Ident("a")]), expr2: Some(Identifier([Ident("b")])) }), data_type: Other { name: Name(UnresolvedItemName([Ident("int4")])), typ_mod: [] } }, alias: None }], from: [TableWithJoins { relation: Table { name: Name(UnresolvedItemName([Ident("fake_table")])), alias: None }, joins: [] }], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +# We should avoid inserting superfluous parenthesis, or it won't roundtrip after printing the cast in the Postgres +# style, because some parser code paths eat superfluous parenthesis, including the code path that handles +# Postgres-style casts. +parse-statement +SELECT round(1.5678, CAST((SELECT n FROM nums) AS integer)); +---- +SELECT round(1.5678, (SELECT n FROM nums)::int4) +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Function(Function { name: Name(UnresolvedItemName([Ident("round")])), args: Args { args: [Value(Number("1.5678")), Cast { expr: Subquery(Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Identifier([Ident("n")]), alias: None }], from: [TableWithJoins { relation: Table { name: Name(UnresolvedItemName([Ident("nums")])), alias: None }, joins: [] }], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }), data_type: Other { name: Name(UnresolvedItemName([Ident("int4")])), typ_mod: [] } }], order_by: [] }, filter: None, over: None, distinct: false }), alias: None }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +# Prepared statement parameter handling in casts. (Note: some extra wrapping parens here are currently not removed.) +parse-statement +select $2::int as col +---- +SELECT $2::int4 AS col +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Cast { expr: Parameter(2), data_type: Other { name: Name(UnresolvedItemName([Ident("int4")])), typ_mod: [] } }, alias: Some(Ident("col")) }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +parse-statement +select CAST($2 AS int) as col +---- +SELECT $2::int4 AS col +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Cast { expr: Parameter(2), data_type: Other { name: Name(UnresolvedItemName([Ident("int4")])), typ_mod: [] } }, alias: Some(Ident("col")) }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +parse-statement +select $1 + ($2)::int as col +---- +SELECT $1 + ($2)::int4 AS col +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Op { op: Op { namespace: None, op: "+" }, expr1: Parameter(1), expr2: Some(Cast { expr: Nested(Parameter(2)), data_type: Other { name: Name(UnresolvedItemName([Ident("int4")])), typ_mod: [] } }) }, alias: Some(Ident("col")) }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +parse-statement +select $1 + $2::int as col +---- +SELECT $1 + $2::int4 AS col +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Op { op: Op { namespace: None, op: "+" }, expr1: Parameter(1), expr2: Some(Cast { expr: Parameter(2), data_type: Other { name: Name(UnresolvedItemName([Ident("int4")])), typ_mod: [] } }) }, alias: Some(Ident("col")) }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +parse-statement +select ($1 + $2)::int as col +---- +SELECT ($1 + $2)::int4 AS col +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Cast { expr: Nested(Op { op: Op { namespace: None, op: "+" }, expr1: Parameter(1), expr2: Some(Parameter(2)) }), data_type: Other { name: Name(UnresolvedItemName([Ident("int4")])), typ_mod: [] } }, alias: Some(Ident("col")) }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +parse-statement +select ((($1)))::int as col +---- +SELECT ((($1)))::int4 AS col +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Cast { expr: Nested(Nested(Nested(Parameter(1)))), data_type: Other { name: Name(UnresolvedItemName([Ident("int4")])), typ_mod: [] } }, alias: Some(Ident("col")) }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + +parse-statement +select ((($1 + $2)))::int as col +---- +SELECT ((($1 + $2)))::int4 AS col +=> +Select(SelectStatement { query: Query { ctes: Simple([]), body: Select(Select { distinct: None, projection: [Expr { expr: Cast { expr: Nested(Nested(Nested(Op { op: Op { namespace: None, op: "+" }, expr1: Parameter(1), expr2: Some(Parameter(2)) }))), data_type: Other { name: Name(UnresolvedItemName([Ident("int4")])), typ_mod: [] } }, alias: Some(Ident("col")) }], from: [], selection: None, group_by: [], having: None, qualify: None, options: [] }), order_by: [], limit: None, offset: None }, as_of: None }) + # Map subqueries parse-statement SELECT MAP[] diff --git a/src/sql-parser/tests/testdata/show b/src/sql-parser/tests/testdata/show index a90984fcec100..6564dfc7530c6 100644 --- a/src/sql-parser/tests/testdata/show +++ b/src/sql-parser/tests/testdata/show @@ -340,49 +340,49 @@ SHOW CREATE CONNECTION "FOO" ---- SHOW CREATE CONNECTION "FOO" => -Show(ShowCreateConnection(ShowCreateConnectionStatement { connection_name: Name(UnresolvedItemName([Ident("FOO")])) })) +Show(ShowCreateConnection(ShowCreateConnectionStatement { connection_name: Name(UnresolvedItemName([Ident("FOO")])), redacted: false })) parse-statement SHOW CREATE TABLE "FOO" ---- SHOW CREATE TABLE "FOO" => -Show(ShowCreateTable(ShowCreateTableStatement { table_name: Name(UnresolvedItemName([Ident("FOO")])) })) +Show(ShowCreateTable(ShowCreateTableStatement { table_name: Name(UnresolvedItemName([Ident("FOO")])), redacted: false })) parse-statement SHOW CREATE VIEW foo ---- SHOW CREATE VIEW foo => -Show(ShowCreateView(ShowCreateViewStatement { view_name: Name(UnresolvedItemName([Ident("foo")])) })) +Show(ShowCreateView(ShowCreateViewStatement { view_name: Name(UnresolvedItemName([Ident("foo")])), redacted: false })) parse-statement SHOW CREATE MATERIALIZED VIEW foo ---- SHOW CREATE MATERIALIZED VIEW foo => -Show(ShowCreateMaterializedView(ShowCreateMaterializedViewStatement { materialized_view_name: Name(UnresolvedItemName([Ident("foo")])) })) +Show(ShowCreateMaterializedView(ShowCreateMaterializedViewStatement { materialized_view_name: Name(UnresolvedItemName([Ident("foo")])), redacted: false })) parse-statement SHOW CREATE SINK foo ---- SHOW CREATE SINK foo => -Show(ShowCreateSink(ShowCreateSinkStatement { sink_name: Name(UnresolvedItemName([Ident("foo")])) })) +Show(ShowCreateSink(ShowCreateSinkStatement { sink_name: Name(UnresolvedItemName([Ident("foo")])), redacted: false })) parse-statement SHOW CREATE INDEX foo ---- SHOW CREATE INDEX foo => -Show(ShowCreateIndex(ShowCreateIndexStatement { index_name: Name(UnresolvedItemName([Ident("foo")])) })) +Show(ShowCreateIndex(ShowCreateIndexStatement { index_name: Name(UnresolvedItemName([Ident("foo")])), redacted: false })) parse-statement SHOW CREATE SOURCE foo ---- SHOW CREATE SOURCE foo => -Show(ShowCreateSource(ShowCreateSourceStatement { source_name: Name(UnresolvedItemName([Ident("foo")])) })) +Show(ShowCreateSource(ShowCreateSourceStatement { source_name: Name(UnresolvedItemName([Ident("foo")])), redacted: false })) parse-statement SHOW CREATE CLUSTER foo @@ -805,3 +805,178 @@ SHOW ROLE MEMBERSHIP FOR joe SHOW ROLE MEMBERSHIP FOR joe => Show(ShowObjects(ShowObjectsStatement { object_type: RoleMembership { role: Some(Ident("joe")) }, from: None, filter: None })) + +parse-statement +SHOW REDACTED CREATE VIEW foo +---- +SHOW REDACTED CREATE VIEW foo +=> +Show(ShowCreateView(ShowCreateViewStatement { view_name: Name(UnresolvedItemName([Ident("foo")])), redacted: true })) + +parse-statement +SHOW REDACTED CREATE MATERIALIZED VIEW foo +---- +SHOW REDACTED CREATE MATERIALIZED VIEW foo +=> +Show(ShowCreateMaterializedView(ShowCreateMaterializedViewStatement { materialized_view_name: Name(UnresolvedItemName([Ident("foo")])), redacted: true })) + +parse-statement +SHOW REDACTED CREATE SOURCE foo +---- +SHOW REDACTED CREATE SOURCE foo +=> +Show(ShowCreateSource(ShowCreateSourceStatement { source_name: Name(UnresolvedItemName([Ident("foo")])), redacted: true })) + +parse-statement +SHOW REDACTED CREATE TABLE foo +---- +SHOW REDACTED CREATE TABLE foo +=> +Show(ShowCreateTable(ShowCreateTableStatement { table_name: Name(UnresolvedItemName([Ident("foo")])), redacted: true })) + +parse-statement +SHOW REDACTED CREATE SINK foo +---- +SHOW REDACTED CREATE SINK foo +=> +Show(ShowCreateSink(ShowCreateSinkStatement { sink_name: Name(UnresolvedItemName([Ident("foo")])), redacted: true })) + +parse-statement +SHOW REDACTED CREATE INDEX foo +---- +SHOW REDACTED CREATE INDEX foo +=> +Show(ShowCreateIndex(ShowCreateIndexStatement { index_name: Name(UnresolvedItemName([Ident("foo")])), redacted: true })) + +parse-statement +SHOW REDACTED CREATE CONNECTION foo +---- +SHOW REDACTED CREATE CONNECTION foo +=> +Show(ShowCreateConnection(ShowCreateConnectionStatement { connection_name: Name(UnresolvedItemName([Ident("foo")])), redacted: true })) + +parse-statement +SHOW REDACTED COLUMNS foo +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED COLUMNS foo + ^ + +parse-statement +SHOW REDACTED OBJECTS +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED OBJECTS + ^ + +parse-statement +SHOW REDACTED VIEWS +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED VIEWS + ^ + +parse-statement +SHOW REDACTED MATERIALIZED VIEWS +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED MATERIALIZED VIEWS + ^ + +parse-statement +SHOW REDACTED CLUSTER +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED CLUSTER + ^ + +parse-statement +SHOW REDACTED PRIVILEGES +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED PRIVILEGES + ^ + +parse-statement +SHOW REDACTED DEFAULT PRIVILEGES +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED DEFAULT PRIVILEGES + ^ + +parse-statement +SHOW REDACTED ROLE MEMBERSHIP +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED ROLE MEMBERSHIP + ^ + +parse-statement +SHOW REDACTED CREATE CLUSTER +---- +error: SHOW REDACTED CREATE CLUSTER is not supported +SHOW REDACTED CREATE CLUSTER + ^ + +parse-statement +SHOW REDACTED TRANSACTION ISOLATION LEVEL +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED TRANSACTION ISOLATION LEVEL + ^ + +parse-statement +SHOW REDACTED TIME ZONE +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED TIME ZONE + ^ + +parse-statement +SHOW REDACTED foo +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED foo + ^ + +parse-statement +SHOW +---- +error: Expected identifier, found EOF +SHOW + ^ + +parse-statement +SHOW REDACTED +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED + ^ + +parse-statement +SHOW CREATE VIEW REDACTED v +---- +error: Expected end of statement, found identifier "v" +SHOW CREATE VIEW REDACTED v + ^ + +parse-statement +SHOW VIEW +---- +SHOW view +=> +Show(ShowVariable(ShowVariableStatement { variable: Ident("view") })) + +parse-statement +SHOW REDACTED MATERIALIZED mv1 +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED MATERIALIZED mv1 + ^ + +parse-statement +SHOW REDACTED MATERIALIZED +---- +error: SHOW REDACTED is only supported for SHOW REDACTED CREATE ... +SHOW REDACTED MATERIALIZED + ^ diff --git a/src/sql-pretty/src/doc.rs b/src/sql-pretty/src/doc.rs index fa32d1934abf3..1d6308b799aa7 100644 --- a/src/sql-pretty/src/doc.rs +++ b/src/sql-pretty/src/doc.rs @@ -596,22 +596,7 @@ pub fn doc_expr(v: &Expr) -> RcDoc { bracket_doc(RcDoc::text("CASE"), doc, RcDoc::text("END"), RcDoc::line()) } Expr::Cast { expr, data_type } => { - // See AstDisplay for Expr for an explanation of this. - let needs_wrap = !matches!( - **expr, - Expr::Nested(_) - | Expr::Value(_) - | Expr::Cast { .. } - | Expr::Function { .. } - | Expr::Identifier { .. } - | Expr::Collate { .. } - | Expr::HomogenizingFunction { .. } - | Expr::NullIf { .. } - ); - let mut doc = doc_expr(expr); - if needs_wrap { - doc = bracket("(", doc, ")"); - } + let doc = doc_expr(expr); RcDoc::concat([doc, RcDoc::text(format!("::{}", data_type.to_ast_string()))]) } Expr::Nested(ast) => bracket("(", doc_expr(ast), ")"), diff --git a/src/sql/src/func.rs b/src/sql/src/func.rs index 8719c8673e483..bbb932c0bfbf6 100644 --- a/src/sql/src/func.rs +++ b/src/sql/src/func.rs @@ -1866,6 +1866,9 @@ pub static PG_CATALOG_BUILTINS: LazyLock> = LazyLoc params!(Float64) => Operation::nullary(|_ecx| catalog_name_only!("avg")) => Float64, 2105; params!(Interval) => Operation::nullary(|_ecx| catalog_name_only!("avg")) => Interval, 2106; }, + "bit_count" => Scalar { + params!(Bytes) => UnaryFunc::BitCountBytes(func::BitCountBytes) => Int64, 6163; + }, "bit_length" => Scalar { params!(Bytes) => UnaryFunc::BitLengthBytes(func::BitLengthBytes) => Int32, 1810; params!(String) => UnaryFunc::BitLengthString(func::BitLengthString) => Int32, 1811; @@ -2085,6 +2088,9 @@ pub static PG_CATALOG_BUILTINS: LazyLock> = LazyLoc END" ) => String, 1081; }, + "get_bit" => Scalar { + params!(Bytes, Int32) => BinaryFunc::GetBit => Int32, 723; + }, "get_byte" => Scalar { params!(Bytes, Int32) => BinaryFunc::GetByte => Int32, 721; }, @@ -3040,6 +3046,8 @@ pub static PG_CATALOG_BUILTINS: LazyLock> = LazyLoc "count" => Aggregate { params!() => Operation::nullary(|_ecx| { // COUNT(*) is equivalent to COUNT(true). + // This is mirrored in `AggregateExpr::is_count_asterisk`, so if you modify this, + // then attend to that code also. Ok((HirScalarExpr::literal_true(), AggregateFunc::Count)) }) => Int64, 2803; params!(Any) => AggregateFunc::Count => Int64, 2147; diff --git a/src/sql/src/plan/statement/ddl.rs b/src/sql/src/plan/statement/ddl.rs index a4ad1b7bf9c7f..c1da64ce2e3a3 100644 --- a/src/sql/src/plan/statement/ddl.rs +++ b/src/sql/src/plan/statement/ddl.rs @@ -4359,7 +4359,12 @@ generate_extracted_config!( (EnableEagerDeltaJoins, Option, Default(None)), (EnableNewOuterJoinLowering, Option, Default(None)), (EnableVariadicLeftJoinLowering, Option, Default(None)), - (EnableLetrecFixpointAnalysis, Option, Default(None)) + (EnableLetrecFixpointAnalysis, Option, Default(None)), + ( + EnableProjectionPushdownAfterRelationCse, + Option, + Default(None) + ) ); /// Convert a [`CreateClusterStatement`] into a [`Plan`]. @@ -4496,6 +4501,7 @@ pub fn plan_create_cluster_inner( enable_new_outer_join_lowering, enable_variadic_left_join_lowering, enable_letrec_fixpoint_analysis, + enable_projection_pushdown_after_relation_cse, seen: _, } = ClusterFeatureExtracted::try_from(features)?; let optimizer_feature_overrides = OptimizerFeatureOverrides { @@ -4504,6 +4510,7 @@ pub fn plan_create_cluster_inner( enable_new_outer_join_lowering, enable_variadic_left_join_lowering, enable_letrec_fixpoint_analysis, + enable_projection_pushdown_after_relation_cse, ..Default::default() }; @@ -4599,6 +4606,8 @@ pub fn unplan_create_cluster( enable_variadic_left_join_lowering, enable_letrec_fixpoint_analysis, enable_reduce_reduction: _, + enable_let_prefix_extraction: _, + enable_projection_pushdown_after_relation_cse, } = optimizer_feature_overrides; let features_extracted = ClusterFeatureExtracted { // Seen is ignored when unplanning. @@ -4608,6 +4617,7 @@ pub fn unplan_create_cluster( enable_new_outer_join_lowering, enable_variadic_left_join_lowering, enable_letrec_fixpoint_analysis, + enable_projection_pushdown_after_relation_cse, }; let features = features_extracted.into_values(scx.catalog); let availability_zones = if availability_zones.is_empty() { diff --git a/src/sql/src/plan/statement/dml.rs b/src/sql/src/plan/statement/dml.rs index b5b25f41fbbc3..c2293dc504d06 100644 --- a/src/sql/src/plan/statement/dml.rs +++ b/src/sql/src/plan/statement/dml.rs @@ -377,7 +377,12 @@ generate_extracted_config!( (EnableNewOuterJoinLowering, Option, Default(None)), (EnableEagerDeltaJoins, Option, Default(None)), (EnableVariadicLeftJoinLowering, Option, Default(None)), - (EnableLetrecFixpointAnalysis, Option, Default(None)) + (EnableLetrecFixpointAnalysis, Option, Default(None)), + ( + EnableProjectionPushdownAfterRelationCse, + Option, + Default(None) + ) ); impl TryFrom for ExplainConfig { @@ -430,6 +435,9 @@ impl TryFrom for ExplainConfig { persist_fast_path_limit: Default::default(), reoptimize_imported_views: v.reoptimize_imported_views, enable_reduce_reduction: Default::default(), + enable_let_prefix_extraction: Default::default(), + enable_projection_pushdown_after_relation_cse: v + .enable_projection_pushdown_after_relation_cse, }, }) } diff --git a/src/sql/src/plan/statement/scl.rs b/src/sql/src/plan/statement/scl.rs index 4ed5bb25d4619..c20eb1ce27d66 100644 --- a/src/sql/src/plan/statement/scl.rs +++ b/src/sql/src/plan/statement/scl.rs @@ -151,7 +151,8 @@ pub fn plan_inspect_shard( // Always inspect the shard at the latest GlobalId. let gid = scx .catalog - .get_item(&id) + .try_get_item(&id) + .ok_or_else(|| sql_err!("item doesn't exist"))? .at_version(RelationVersionSelector::Latest) .global_id(); Ok(Plan::InspectShard(InspectShardPlan { id: gid })) diff --git a/src/sql/src/plan/statement/show.rs b/src/sql/src/plan/statement/show.rs index 5adb62107d31f..d9540e641e674 100644 --- a/src/sql/src/plan/statement/show.rs +++ b/src/sql/src/plan/statement/show.rs @@ -60,9 +60,12 @@ pub fn describe_show_create_view( pub fn plan_show_create_view( scx: &StatementContext, - ShowCreateViewStatement { view_name }: ShowCreateViewStatement, + ShowCreateViewStatement { + view_name, + redacted, + }: ShowCreateViewStatement, ) -> Result { - plan_show_create_item(scx, &view_name, CatalogItemType::View) + plan_show_create_item(scx, &view_name, CatalogItemType::View, redacted) } pub fn describe_show_create_materialized_view( @@ -81,12 +84,14 @@ pub fn plan_show_create_materialized_view( scx: &StatementContext, ShowCreateMaterializedViewStatement { materialized_view_name, + redacted, }: ShowCreateMaterializedViewStatement, ) -> Result { plan_show_create_item( scx, &materialized_view_name, CatalogItemType::MaterializedView, + redacted, ) } @@ -106,6 +111,7 @@ fn plan_show_create_item( scx: &StatementContext, name: &ResolvedItemName, expect_type: CatalogItemType, + redacted: bool, ) -> Result { let item = scx.get_item_by_resolved_name(name)?; let name = name.full_name_str(); @@ -124,7 +130,8 @@ fn plan_show_create_item( if item.item_type() != expect_type { sql_bail!("{name} is not a {expect_type}"); } - let create_sql = humanize_sql_for_show_create(scx.catalog, item.id(), item.create_sql())?; + let create_sql = + humanize_sql_for_show_create(scx.catalog, item.id(), item.create_sql(), redacted)?; Ok(ShowCreatePlan { id: ObjectId::Item(item.id()), row: Row::pack_slice(&[Datum::String(&name), Datum::String(&create_sql)]), @@ -133,9 +140,12 @@ fn plan_show_create_item( pub fn plan_show_create_table( scx: &StatementContext, - ShowCreateTableStatement { table_name }: ShowCreateTableStatement, + ShowCreateTableStatement { + table_name, + redacted, + }: ShowCreateTableStatement, ) -> Result { - plan_show_create_item(scx, &table_name, CatalogItemType::Table) + plan_show_create_item(scx, &table_name, CatalogItemType::Table, redacted) } pub fn describe_show_create_source( @@ -152,9 +162,12 @@ pub fn describe_show_create_source( pub fn plan_show_create_source( scx: &StatementContext, - ShowCreateSourceStatement { source_name }: ShowCreateSourceStatement, + ShowCreateSourceStatement { + source_name, + redacted, + }: ShowCreateSourceStatement, ) -> Result { - plan_show_create_item(scx, &source_name, CatalogItemType::Source) + plan_show_create_item(scx, &source_name, CatalogItemType::Source, redacted) } pub fn describe_show_create_sink( @@ -171,9 +184,12 @@ pub fn describe_show_create_sink( pub fn plan_show_create_sink( scx: &StatementContext, - ShowCreateSinkStatement { sink_name }: ShowCreateSinkStatement, + ShowCreateSinkStatement { + sink_name, + redacted, + }: ShowCreateSinkStatement, ) -> Result { - plan_show_create_item(scx, &sink_name, CatalogItemType::Sink) + plan_show_create_item(scx, &sink_name, CatalogItemType::Sink, redacted) } pub fn describe_show_create_index( @@ -190,9 +206,12 @@ pub fn describe_show_create_index( pub fn plan_show_create_index( scx: &StatementContext, - ShowCreateIndexStatement { index_name }: ShowCreateIndexStatement, + ShowCreateIndexStatement { + index_name, + redacted, + }: ShowCreateIndexStatement, ) -> Result { - plan_show_create_item(scx, &index_name, CatalogItemType::Index) + plan_show_create_item(scx, &index_name, CatalogItemType::Index, redacted) } pub fn describe_show_create_connection( @@ -236,9 +255,12 @@ pub fn describe_show_create_cluster( pub fn plan_show_create_connection( scx: &StatementContext, - ShowCreateConnectionStatement { connection_name }: ShowCreateConnectionStatement, + ShowCreateConnectionStatement { + connection_name, + redacted, + }: ShowCreateConnectionStatement, ) -> Result { - plan_show_create_item(scx, &connection_name, CatalogItemType::Connection) + plan_show_create_item(scx, &connection_name, CatalogItemType::Connection, redacted) } pub fn show_databases<'a>( @@ -999,6 +1021,7 @@ fn humanize_sql_for_show_create( catalog: &dyn SessionCatalog, id: CatalogItemId, sql: &str, + redacted: bool, ) -> Result { use mz_sql_parser::ast::{CreateSourceConnection, MySqlConfigOptionName, PgConfigOptionName}; @@ -1019,7 +1042,7 @@ fn humanize_sql_for_show_create( // // For instance, `DROP SOURCE` statements can leave dangling references // to subsources that must be filtered out here, that, due to catalog - // transaction limitations, can only be be cleaned up when a top-level + // transaction limitations, can only be cleaned up when a top-level // source is altered. Statement::CreateSource(stmt) => { // Collect all current subsource references. @@ -1053,7 +1076,7 @@ fn humanize_sql_for_show_create( // COLUMNS` values that refer to the table it // ingests, which we'll handle below. PgConfigOptionName::TextColumns => {} - // Drop details, which does not rountrip. + // Drop details, which does not roundtrip. PgConfigOptionName::Details => return false, _ => return true, }; @@ -1086,7 +1109,7 @@ fn humanize_sql_for_show_create( // ingests, which we'll handle below. MySqlConfigOptionName::TextColumns | MySqlConfigOptionName::ExcludeColumns => {} - // Drop details, which does not rountrip. + // Drop details, which does not roundtrip. MySqlConfigOptionName::Details => return false, }; @@ -1142,7 +1165,7 @@ fn humanize_sql_for_show_create( match o.name { CreateSubsourceOptionName::TextColumns => true, CreateSubsourceOptionName::ExcludeColumns => true, - // Drop details, which does not rountrip. + // Drop details, which does not roundtrip. CreateSubsourceOptionName::Details => false, CreateSubsourceOptionName::ExternalReference => true, CreateSubsourceOptionName::Progress => true, @@ -1153,5 +1176,9 @@ fn humanize_sql_for_show_create( _ => (), } - Ok(resolved.to_ast_string_stable()) + if redacted { + Ok(resolved.to_ast_string_redacted()) + } else { + Ok(resolved.to_ast_string_stable()) + } } diff --git a/src/sql/src/session/vars/definitions.rs b/src/sql/src/session/vars/definitions.rs index 3d1cccbb5c9f6..626384d4ba0cb 100644 --- a/src/sql/src/session/vars/definitions.rs +++ b/src/sql/src/session/vars/definitions.rs @@ -1788,6 +1788,12 @@ macro_rules! feature_flags { } feature_flags!( + { + name: enable_let_prefix_extraction, + desc: "Enables hoisting of loop-invariant CTE bindindgs", + default: true, + enable_for_item_parsing: false, + }, // Gates for other feature flags { name: allow_real_time_recency, @@ -2182,6 +2188,12 @@ feature_flags!( default: false, enable_for_item_parsing: true, }, + { + name: enable_projection_pushdown_after_relation_cse, + desc: "Run ProjectionPushdown one more time after the last RelationCSE.", + default: true, + enable_for_item_parsing: false, + }, ); impl From<&super::SystemVars> for OptimizerFeatures { @@ -2197,6 +2209,9 @@ impl From<&super::SystemVars> for OptimizerFeatures { enable_reduce_reduction: vars.enable_reduce_reduction(), persist_fast_path_limit: vars.persist_fast_path_limit(), reoptimize_imported_views: false, + enable_let_prefix_extraction: vars.enable_let_prefix_extraction(), + enable_projection_pushdown_after_relation_cse: vars + .enable_projection_pushdown_after_relation_cse(), } } } diff --git a/src/sqllogictest/BUILD.bazel b/src/sqllogictest/BUILD.bazel index 72ca01ca13b6a..6ab50b83eb779 100644 --- a/src/sqllogictest/BUILD.bazel +++ b/src/sqllogictest/BUILD.bazel @@ -29,6 +29,7 @@ rust_library( rustc_flags = [], version = "0.0.1", deps = [ + "//src/adapter-types:mz_adapter_types", "//src/build-info:mz_build_info", "//src/catalog:mz_catalog", "//src/controller:mz_controller", @@ -73,6 +74,7 @@ rust_test( rustc_flags = [], version = "0.0.1", deps = [ + "//src/adapter-types:mz_adapter_types", "//src/build-info:mz_build_info", "//src/catalog:mz_catalog", "//src/controller:mz_controller", @@ -102,6 +104,7 @@ rust_doc_test( name = "mz_sqllogictest_doc_test", crate = ":mz_sqllogictest", deps = [ + "//src/adapter-types:mz_adapter_types", "//src/build-info:mz_build_info", "//src/catalog:mz_catalog", "//src/controller:mz_controller", @@ -151,6 +154,7 @@ rust_test( version = "0.0.1", deps = [ ":mz_sqllogictest", + "//src/adapter-types:mz_adapter_types", "//src/build-info:mz_build_info", "//src/catalog:mz_catalog", "//src/controller:mz_controller", @@ -194,6 +198,7 @@ rust_binary( version = "0.0.1", deps = [ ":mz_sqllogictest", + "//src/adapter-types:mz_adapter_types", "//src/build-info:mz_build_info", "//src/catalog:mz_catalog", "//src/controller:mz_controller", diff --git a/src/sqllogictest/Cargo.toml b/src/sqllogictest/Cargo.toml index 15c44069f3483..8e0f5a485982c 100644 --- a/src/sqllogictest/Cargo.toml +++ b/src/sqllogictest/Cargo.toml @@ -20,6 +20,7 @@ futures = "0.3.25" itertools = "0.12.1" junit-report = "0.8.3" md-5 = "0.10.5" +mz-adapter-types = { path = "../adapter-types" } mz-build-info = { path = "../build-info" } mz-catalog = { path = "../catalog" } mz-controller = { path = "../controller" } diff --git a/src/sqllogictest/src/ast.rs b/src/sqllogictest/src/ast.rs index c6c9837794ea7..22142813e29be 100644 --- a/src/sqllogictest/src/ast.rs +++ b/src/sqllogictest/src/ast.rs @@ -101,6 +101,7 @@ impl std::fmt::Display for Output { pub struct QueryOutput<'a> { pub types: Vec, pub sort: Sort, + pub multiline: bool, pub label: Option<&'a str>, pub column_names: Option>, pub mode: Mode, diff --git a/src/sqllogictest/src/parser.rs b/src/sqllogictest/src/parser.rs index 8b703596028f8..698317b66cdcf 100644 --- a/src/sqllogictest/src/parser.rs +++ b/src/sqllogictest/src/parser.rs @@ -286,30 +286,47 @@ impl<'a> Parser<'a> { Output::Values(vec![]) } else { let mut vals: Vec = output_str.lines().map(|s| s.to_owned()).collect(); - if let Mode::Cockroach = self.mode { - let mut rows: Vec> = vec![]; - for line in vals { - let cols = split_cols(&line, types.len()); - if sort != Sort::No && cols.len() != types.len() { - // We can't check this condition for - // Sort::No, because some tests use strings - // with whitespace that look like extra - // columns. (Note that these tests never - // use any of the sorting options.) - bail!( - "col len ({}) did not match declared col len ({})", - cols.len(), - types.len() - ); + match self.mode { + Mode::Standard => { + if !multiline { + vals = vals.into_iter().map(|val| val.replace('⏎', "\n")).collect(); } - rows.push(cols.into_iter().map(|col| col.replace('␠', " ")).collect()); - } - if sort == Sort::Row { - rows.sort(); } - vals = rows.into_iter().flatten().collect(); - if sort == Sort::Value { - vals.sort(); + Mode::Cockroach => { + let mut rows: Vec> = vec![]; + for line in vals { + let cols = split_cols(&line, types.len()); + if sort != Sort::No && cols.len() != types.len() { + // We can't check this condition for + // Sort::No, because some tests use strings + // with whitespace that look like extra + // columns. (Note that these tests never + // use any of the sorting options.) + bail!( + "col len ({}) did not match declared col len ({})", + cols.len(), + types.len() + ); + } + rows.push( + cols.into_iter() + .map(|col| { + let mut col = col.replace('␠', " "); + if !multiline { + col = col.replace('⏎', "\n"); + } + col + }) + .collect(), + ); + } + if sort == Sort::Row { + rows.sort(); + } + vals = rows.into_iter().flatten().collect(); + if sort == Sort::Value { + vals.sort(); + } } } Output::Values(vals) @@ -321,6 +338,7 @@ impl<'a> Parser<'a> { output: Ok(QueryOutput { types, sort, + multiline, label, column_names, mode: self.mode, diff --git a/src/sqllogictest/src/runner.rs b/src/sqllogictest/src/runner.rs index b9b2229f44728..92b2d7585693b 100644 --- a/src/sqllogictest/src/runner.rs +++ b/src/sqllogictest/src/runner.rs @@ -43,6 +43,11 @@ use fallible_iterator::FallibleIterator; use futures::sink::SinkExt; use itertools::Itertools; use md5::{Digest, Md5}; +use mz_adapter_types::bootstrap_builtin_cluster_config::{ + BootstrapBuiltinClusterConfig, ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR, + CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR, + SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR, +}; use mz_catalog::config::ClusterReplicaSizeMap; use mz_controller::ControllerConfig; use mz_environmentd::CatalogConfig; @@ -1074,11 +1079,26 @@ impl<'a> RunnerInner<'a> { environment_id, cluster_replica_sizes: ClusterReplicaSizeMap::for_tests(), bootstrap_default_cluster_replica_size: config.replicas.to_string(), - bootstrap_builtin_system_cluster_replica_size: config.replicas.to_string(), - bootstrap_builtin_catalog_server_cluster_replica_size: config.replicas.to_string(), - bootstrap_builtin_probe_cluster_replica_size: config.replicas.to_string(), - bootstrap_builtin_support_cluster_replica_size: config.replicas.to_string(), - bootstrap_builtin_analytics_cluster_replica_size: config.replicas.to_string(), + bootstrap_builtin_system_cluster_config: BootstrapBuiltinClusterConfig { + replication_factor: SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR, + size: config.replicas.to_string(), + }, + bootstrap_builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig { + replication_factor: CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, + size: config.replicas.to_string(), + }, + bootstrap_builtin_probe_cluster_config: BootstrapBuiltinClusterConfig { + replication_factor: PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR, + size: config.replicas.to_string(), + }, + bootstrap_builtin_support_cluster_config: BootstrapBuiltinClusterConfig { + replication_factor: SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, + size: config.replicas.to_string(), + }, + bootstrap_builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig { + replication_factor: ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR, + size: config.replicas.to_string(), + }, system_parameter_defaults: { let mut params = BTreeMap::new(); params.insert( @@ -1981,6 +2001,7 @@ pub async fn rewrite_file(runner: &mut Runner<'_>, filename: &Path) -> Result<() types: &Vec, column_names: Option<&Vec>, actual_output: &Vec, + multiline: bool, ) { buf.append_header(input, expected_output, column_names); @@ -1993,20 +2014,46 @@ pub async fn rewrite_file(runner: &mut Runner<'_>, filename: &Path) -> Result<() buf.append("\n"); } - if row.len() <= 1 { - buf.append(&row.iter().join(" ")); + if row.len() == 0 { + // nothing to do + } else if row.len() == 1 { + // If there is only one column, then there is no need for space + // substitution, so we only do newline substitution. + if multiline { + buf.append(&row[0]); + } else { + buf.append(&row[0].replace('\n', "⏎")) + } } else { - buf.append(&row.iter().map(|col| col.replace(' ', "␠")).join(" ")); + // Substitute spaces with ␠ to avoid mistaking the spaces in the result + // values with spaces that separate columns. + buf.append( + &row.iter() + .map(|col| { + let mut col = col.replace(' ', "␠"); + if !multiline { + col = col.replace('\n', "⏎"); + } + col + }) + .join(" "), + ); } } // In standard mode, output each value on its own line, // and ignore row boundaries. + // No need to substitute spaces, because every value (not row) is on a separate + // line. But we do need to substitute newlines. Mode::Standard => { for (j, col) in row.iter().enumerate() { if i != 0 || j != 0 { buf.append("\n"); } - buf.append(col); + buf.append(&if multiline { + col.clone() + } else { + col.replace('\n', "⏎") + }); } } } @@ -2028,6 +2075,7 @@ pub async fn rewrite_file(runner: &mut Runner<'_>, filename: &Path) -> Result<() output_str: expected_output, types, column_names, + multiline, .. }), .. @@ -2045,6 +2093,7 @@ pub async fn rewrite_file(runner: &mut Runner<'_>, filename: &Path) -> Result<() types, column_names.as_ref(), actual_output, + *multiline, ); } ( @@ -2055,6 +2104,7 @@ pub async fn rewrite_file(runner: &mut Runner<'_>, filename: &Path) -> Result<() output: Output::Values(_), output_str: expected_output, types, + multiline, .. }), .. @@ -2073,6 +2123,7 @@ pub async fn rewrite_file(runner: &mut Runner<'_>, filename: &Path) -> Result<() types, Some(actual_column_names), actual_output, + *multiline, ); } ( @@ -2211,7 +2262,7 @@ impl<'a> RewriteBuffer<'a> { self.append( &names .iter() - .map(|name| name.as_str().replace('␠', " ")) + .map(|name| name.as_str().replace(' ', "␠")) .collect::>() .join(" "), ); diff --git a/src/storage/src/server.rs b/src/storage/src/server.rs index 5350da4f9e73e..022bfc089cd5d 100644 --- a/src/storage/src/server.rs +++ b/src/storage/src/server.rs @@ -10,18 +10,19 @@ //! An interactive dataflow server. use std::sync::Arc; -use std::thread::Thread; use mz_cluster::server::TimelyContainerRef; use mz_ore::now::NowFn; use mz_ore::tracing::TracingHandle; use mz_persist_client::cache::PersistClientCache; use mz_rocksdb::config::SharedWriteBufferManager; +use mz_service::local::LocalActivator; use mz_storage_client::client::{StorageClient, StorageCommand, StorageResponse}; use mz_storage_types::connections::ConnectionContext; use mz_txn_wal::operator::TxnsContext; use timely::communication::initialize::WorkerGuards; use timely::worker::Worker as TimelyWorker; +use tokio::sync::mpsc; use crate::metrics::StorageMetrics; use crate::storage_state::{StorageInstanceContext, Worker}; @@ -57,7 +58,7 @@ pub fn serve( instance_context: StorageInstanceContext, ) -> Result< ( - TimelyContainerRef, + TimelyContainerRef, impl Fn() -> Box, ), anyhow::Error, @@ -93,14 +94,13 @@ pub fn serve( } impl mz_cluster::types::AsRunnableWorker for Config { - type Activatable = std::thread::Thread; fn build_and_run( config: Self, timely_worker: &mut TimelyWorker, client_rx: crossbeam_channel::Receiver<( crossbeam_channel::Receiver, - tokio::sync::mpsc::UnboundedSender, - tokio::sync::mpsc::UnboundedSender, + mpsc::UnboundedSender, + mpsc::UnboundedSender, )>, persist_clients: Arc, txns_ctx: TxnsContext, diff --git a/src/storage/src/source/source_reader_pipeline.rs b/src/storage/src/source/source_reader_pipeline.rs index 90fe442d3dd04..07a04b3741d57 100644 --- a/src/storage/src/source/source_reader_pipeline.rs +++ b/src/storage/src/source/source_reader_pipeline.rs @@ -665,7 +665,8 @@ where builder.build(move |_| { // Remap bindings beyond the upper - let mut accepted_times = Vec::new(); + use timely::progress::ChangeBatch; + let mut accepted_times: ChangeBatch<(G::Timestamp, FromTime)> = ChangeBatch::new(); // The upper frontier of the bindings let mut upper = Antichain::from_elem(Timestamp::minimum()); // Remap bindings not beyond upper @@ -677,19 +678,25 @@ where while let Some((_, data)) = bindings.next() { accepted_times.extend(data.drain(..).map(|(from, mut into, diff)| { into.advance_by(as_of.borrow()); - (from, into, diff) + ((into, from), diff) })); } // Extract ready bindings let new_upper = frontiers[0].frontier(); if PartialOrder::less_than(&upper.borrow(), &new_upper) { upper = new_upper.to_owned(); - - accepted_times.sort_unstable_by(|a, b| a.1.cmp(&b.1)); - // The times are totally ordered so we can binary search to find the prefix that is - // not beyond the upper and extract it into a batch. - let idx = accepted_times.partition_point(|(_, t, _)| !upper.less_equal(t)); - ready_times.extend(accepted_times.drain(0..idx)); + // Drain consolidated accepted times not greater or equal to `upper` into `ready_times`. + // Retain accepted times greater or equal to `upper` in + let mut pending_times = std::mem::take(&mut accepted_times).into_inner(); + // These should already be sorted, as part of `.into_inner()`, but sort defensively in case. + pending_times.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + for ((into, from), diff) in pending_times.drain(..) { + if !upper.less_equal(&into) { + ready_times.push_back((from, into, diff)); + } else { + accepted_times.update((into, from), diff); + } + } } // The received times only accumulate correctly for times beyond the as_of. diff --git a/src/storage/src/storage_state.rs b/src/storage/src/storage_state.rs index 19049058903a7..194fe30dc62ab 100644 --- a/src/storage/src/storage_state.rs +++ b/src/storage/src/storage_state.rs @@ -89,6 +89,7 @@ use mz_ore::{soft_assert_or_log, soft_panic_or_log}; use mz_persist_client::cache::PersistClientCache; use mz_repr::{GlobalId, Timestamp}; use mz_rocksdb::config::SharedWriteBufferManager; +use mz_service::local::LocalActivator; use mz_storage_client::client::{ RunIngestionCommand, StatusUpdate, StorageCommand, StorageResponse, }; @@ -135,7 +136,7 @@ pub struct Worker<'w, A: Allocate> { pub client_rx: crossbeam_channel::Receiver<( CommandReceiver, ResponseSender, - mpsc::UnboundedSender, + mpsc::UnboundedSender, )>, /// The state associated with collection ingress and egress. pub storage_state: StorageState, @@ -148,7 +149,7 @@ impl<'w, A: Allocate> Worker<'w, A> { client_rx: crossbeam_channel::Receiver<( CommandReceiver, ResponseSender, - mpsc::UnboundedSender, + mpsc::UnboundedSender, )>, metrics: StorageMetrics, now: NowFn, @@ -389,9 +390,10 @@ impl<'w, A: Allocate> Worker<'w, A> { while !shutdown { match self.client_rx.recv() { Ok((rx, tx, activator_tx)) => { + let activator = LocalActivator::new(thread::current()); // This might fail if the client has already shut down, which is fine. // `run_client` knows how to handle a disconnected client. - let _ = activator_tx.send(std::thread::current()); + let _ = activator_tx.send(activator); self.run_client(rx, tx) } Err(_) => { diff --git a/src/storage/src/storage_state/async_storage_worker.rs b/src/storage/src/storage_state/async_storage_worker.rs index ef6673b418a1f..f7b5edc28ef79 100644 --- a/src/storage/src/storage_state/async_storage_worker.rs +++ b/src/storage/src/storage_state/async_storage_worker.rs @@ -23,7 +23,7 @@ use mz_persist_client::read::ListenEvent; use mz_persist_client::Diagnostics; use mz_persist_types::codec_impls::UnitSchema; use mz_persist_types::Codec64; -use mz_repr::{Diff, GlobalId, Row}; +use mz_repr::{Diff, GlobalId, Row, TimestampManipulation}; use mz_service::local::Activatable; use mz_storage_types::controller::CollectionMetadata; use mz_storage_types::sources::{ @@ -179,7 +179,9 @@ where source_resume_uppers } -impl AsyncStorageWorker { +impl + AsyncStorageWorker +{ /// Creates a new [`AsyncStorageWorker`]. /// /// IMPORTANT: The passed in `activatable` is activated when new responses @@ -199,73 +201,12 @@ impl AsyncStorageWorker { while let Some(command) = command_rx.recv().await { match command { AsyncStorageWorkerCommand::UpdateFrontiers(id, ingestion_description) => { - // Here we update the as-of and upper(i.e resumption) frontiers of the - // ingestion. - // - // A good enough value for the as-of is the `meet({e.since for e in - // exports})` but this is not as tight as it could be because the since - // might be held back for unrelated to the ingestion reasons (e.g a user - // wanting to keep historical data). To make it tight we would need to find - // the maximum frontier at which all inputs to the ingestion are readable - // and start from there. We can find this by defining: - // - // max_readable(shard) = {(t - 1) for t in shard.upper} - // advanced_max_readable(shard) = advance_by(max_readable(shard), shard.since) - // as_of = meet({advanced_max_readable(e) for e in exports}) - // - // We defer this optimization for when Materialize allows users to - // arbitrarily hold back collections to perform historical queries and when - // the storage command protocol is updated such that these calculations are - // performed by the controller and not here. let mut resume_uppers = BTreeMap::new(); - // TODO(petrosagg): The as_of of the ingestion should normally be based - // on the since frontiers of its outputs. Even though the storage - // controller makes sure to make downgrade decisions in an organized - // and ordered fashion, it then proceeds to persist them in an - // asynchronous and disorganized fashion to persist. The net effect is - // that upon restart, or upon observing the persist state like this - // function, one can see non-sensical results like the since of A be in - // advance of B even when B depends on A! This can happen because the - // downgrade of B gets reordered and lost. Here is our best attempt at - // playing detective of what the controller meant to do by blindly - // assuming that the since of the remap shard is a suitable since - // frontier without consulting the since frontier of the outputs. One - // day we will enforce order to chaos and this comment will be deleted. - let remap_shard = ingestion_description + let seen_remap_shard = ingestion_description .ingestion_metadata .remap_shard .expect("ingestions must have a remap shard"); - let client = persist_clients - .open( - ingestion_description - .ingestion_metadata - .persist_location - .clone(), - ) - .await - .expect("error creating persist client"); - let read_handle = client - .open_leased_reader::( - remap_shard, - Arc::new(ingestion_description.desc.connection.timestamp_desc()), - Arc::new(UnitSchema), - Diagnostics { - shard_name: ingestion_description - .remap_collection_id - .to_string(), - handle_purpose: format!("resumption data for {}", id), - }, - false, - ) - .await - .unwrap(); - let as_of = read_handle.since().clone(); - mz_ore::task::spawn(move || "deferred_expire", async move { - tokio::time::sleep(std::time::Duration::from_secs(300)).await; - read_handle.expire().await; - }); - let seen_remap_shard = remap_shard.clone(); for (id, export) in ingestion_description.source_exports.iter() { // Explicit destructuring to force a compile error when the metadata change @@ -317,6 +258,61 @@ impl AsyncStorageWorker { } } + // Here we update the as-of frontier of the ingestion. + // + // The as-of frontier controls the frontier with which all inputs of the + // ingestion dataflow will be advanced by. It is in our interest to set the + // as-of froniter to the largest possible value, which will result in the + // maximum amount of consolidation, which in turn results in the minimum + // amount of memory required to hydrate. + // + // For each output `o` and for each input `i` of the ingestion the + // controller guarantees that i.since < o.upper except when o.upper is + // [T::minimum()]. Therefore the largest as-of for a particular output `o` + // is `{ (t - 1).advance_by(i.since) | t in o.upper }`. + // + // To calculate the global as_of frontier we take the minimum of all those + // per-output as-of frontiers. + let client = persist_clients + .open( + ingestion_description + .ingestion_metadata + .persist_location + .clone(), + ) + .await + .expect("error creating persist client"); + let read_handle = client + .open_leased_reader::( + seen_remap_shard, + Arc::new(ingestion_description.desc.connection.timestamp_desc()), + Arc::new(UnitSchema), + Diagnostics { + shard_name: ingestion_description + .remap_collection_id + .to_string(), + handle_purpose: format!("resumption data for {}", id), + }, + false, + ) + .await + .unwrap(); + let remap_since = read_handle.since().clone(); + mz_ore::task::spawn(move || "deferred_expire", async move { + tokio::time::sleep(std::time::Duration::from_secs(300)).await; + read_handle.expire().await; + }); + let mut as_of = Antichain::new(); + for upper in resume_uppers.values() { + for t in upper.elements() { + let mut t_prime = t.step_back().unwrap_or(T::minimum()); + if !remap_since.is_empty() { + t_prime.advance_by(remap_since.borrow()); + as_of.insert(t_prime); + } + } + } + /// Convenience function to convert `BTreeMap>` to /// `BTreeMap>`. fn to_vec_row( diff --git a/src/testdrive/BUILD.bazel b/src/testdrive/BUILD.bazel index a62185129638e..f0cc2a98f4d2a 100644 --- a/src/testdrive/BUILD.bazel +++ b/src/testdrive/BUILD.bazel @@ -29,7 +29,7 @@ rust_library( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ ":mz_testdrive_build_script", "//src/adapter:mz_adapter", @@ -74,7 +74,7 @@ rust_test( ), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ "//src/adapter:mz_adapter", "//src/avro:mz_avro", @@ -157,7 +157,7 @@ rust_binary( proc_macro_deps = [] + all_crate_deps(proc_macro = True), rustc_env = {}, rustc_flags = [], - version = "0.130.0-dev.0", + version = "0.130.13", deps = [ ":mz_testdrive", "//src/adapter:mz_adapter", diff --git a/src/testdrive/Cargo.toml b/src/testdrive/Cargo.toml index b3d6f3b65ce09..81f7c36f0e78f 100644 --- a/src/testdrive/Cargo.toml +++ b/src/testdrive/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "mz-testdrive" description = "Integration test driver for Materialize." -version = "0.130.0-dev.0" +version = "0.130.13" edition.workspace = true rust-version.workspace = true publish = false diff --git a/src/timely-util/src/reclock.rs b/src/timely-util/src/reclock.rs index 0ca5f6450588f..8e91565c9da0a 100644 --- a/src/timely-util/src/reclock.rs +++ b/src/timely-util/src/reclock.rs @@ -238,7 +238,7 @@ where // complicated API to traverse it. This is left for future work if the naive trace // maintenance implemented in this operator becomes problematic. let mut remap_upper = Antichain::from_elem(IntoTime::minimum()); - let mut remap_since = as_of; + let mut remap_since = as_of.clone(); let mut remap_trace = Vec::new(); // A stash of source updates for which we don't know the corresponding binding yet. @@ -249,6 +249,10 @@ where let mut binding_buffer = Vec::new(); let mut interesting_times = Vec::new(); + // Accumulation buffer for `remap_input` updates. + use timely::progress::ChangeBatch; + let mut remap_accum_buffer: ChangeBatch<(IntoTime, FromTime)> = ChangeBatch::new(); + // The operator drains `remap_input` and organizes new bindings that are not beyond // `remap_input`'s frontier into the time ordered `remap_trace`. // @@ -268,8 +272,18 @@ where let mut session = output.session(cap); // STEP 1. Accept new bindings into `pending_remap`. + // Advance all `into` times by `as_of`, and consolidate all updates at that frontier. while let Some((_, data)) = remap_input.next() { - for (from, into, diff) in data.drain(..) { + for (from, mut into, diff) in data.drain(..) { + into.advance_by(as_of.borrow()); + remap_accum_buffer.update((into, from), diff); + } + } + // Drain consolidated bindings into the `pending_remap` heap. + // Only do this once any of the `remap_input` frontier has passed `as_of`. + // For as long as the input frontier is less-equal `as_of`, we have no finalized times. + if !PartialOrder::less_equal(&frontiers[0].frontier(), &as_of.borrow()) { + for ((into, from), diff) in remap_accum_buffer.drain() { pending_remap.push(Reverse((into, from, diff))); } } diff --git a/src/transform/src/analysis.rs b/src/transform/src/analysis.rs index d750d9ade27f4..c00cac0edc1bb 100644 --- a/src/transform/src/analysis.rs +++ b/src/transform/src/analysis.rs @@ -1024,7 +1024,7 @@ mod column_names { typ, access_strategy: _, } => { - // Emit ColumnName::Global instanceds for each column in the + // Emit ColumnName::Global instances for each column in the // `Get` type. Those can be resolved to real names later when an // ExpressionHumanizer is available. (0..typ.columns().len()) diff --git a/src/transform/src/lib.rs b/src/transform/src/lib.rs index 19873522c3418..4ecdf5e9c9d6b 100644 --- a/src/transform/src/lib.rs +++ b/src/transform/src/lib.rs @@ -530,9 +530,12 @@ macro_rules! transforms { // do nothing }; ($($transforms:tt)*) => {{ - let mut __buf = Vec::>::new(); - transforms!(@op fill __buf with $($transforms)*); - __buf + #[allow(clippy::vec_init_then_push)] + { + let mut __buf = Vec::>::new(); + transforms!(@op fill __buf with $($transforms)*); + __buf + } }}; } @@ -737,7 +740,7 @@ impl Optimizer { /// rendering. pub fn physical_optimizer(ctx: &mut TransformCtx) -> Self { // Implementation transformations - let transforms: Vec> = vec![ + let transforms: Vec> = transforms![ Box::new( Typecheck::new(ctx.typecheck()) .disallow_new_globals() @@ -791,6 +794,14 @@ impl Optimizer { Box::new(CanonicalizeMfp), // Identifies common relation subexpressions. Box::new(cse::relation_cse::RelationCSE::new(false)), + // `RelationCSE` can create new points of interest for `ProjectionPushdown`: If an MFP + // is cut in half by `RelationCSE`, then we'd like to push projections behind the new + // Get as much as possible. This is because a fork in the plan involves copying the + // data. (But we need `ProjectionPushdown` to skip joins, because it can't deal with + // filled in JoinImplementations.) + Box::new(ProjectionPushdown::skip_joins()); if ctx.features.enable_projection_pushdown_after_relation_cse, + // Plans look nicer if we tidy MFPs again after ProjectionPushdown. + Box::new(CanonicalizeMfp); if ctx.features.enable_projection_pushdown_after_relation_cse, // Do a last run of constant folding. Importantly, this also runs `NormalizeLets`! // We need `NormalizeLets` at the end of the MIR pipeline for various reasons: // - The rendering expects some invariants about Let/LetRecs. diff --git a/src/transform/src/movement/projection_pushdown.rs b/src/transform/src/movement/projection_pushdown.rs index b70e397d54f65..38214770a416d 100644 --- a/src/transform/src/movement/projection_pushdown.rs +++ b/src/transform/src/movement/projection_pushdown.rs @@ -33,7 +33,9 @@ use std::collections::{BTreeMap, BTreeSet}; use itertools::zip_eq; -use mz_expr::{Id, JoinInputMapper, MirRelationExpr, MirScalarExpr, RECURSION_LIMIT}; +use mz_expr::{ + Id, JoinImplementation, JoinInputMapper, MirRelationExpr, MirScalarExpr, RECURSION_LIMIT, +}; use mz_ore::assert_none; use mz_ore::stack::{CheckedRecursion, RecursionGuard}; @@ -43,12 +45,25 @@ use crate::{TransformCtx, TransformError}; #[derive(Debug)] pub struct ProjectionPushdown { recursion_guard: RecursionGuard, + include_joins: bool, } impl Default for ProjectionPushdown { fn default() -> Self { Self { recursion_guard: RecursionGuard::with_limit(RECURSION_LIMIT), + include_joins: true, + } + } +} + +impl ProjectionPushdown { + /// Construct a `ProjectionPushdown` that does not push projections through joins (but does + /// descend into join inputs). + pub fn skip_joins() -> Self { + Self { + recursion_guard: RecursionGuard::with_limit(RECURSION_LIMIT), + include_joins: false, } } } @@ -207,8 +222,13 @@ impl ProjectionPushdown { MirRelationExpr::Join { inputs, equivalences, - .. - } => { + implementation, + } if self.include_joins => { + assert!( + matches!(implementation, JoinImplementation::Unimplemented), + "ProjectionPushdown can't deal with filled in join implementations. Turn off `include_joins` if you'd like to run it after `JoinImplementation`." + ); + let input_mapper = JoinInputMapper::new(inputs); let mut columns_to_pushdown = @@ -237,6 +257,23 @@ impl ProjectionPushdown { columns_to_pushdown.into_iter().collect() } + // Skip joins if `self.include_joins` is turned off. + MirRelationExpr::Join { inputs, equivalences: _, implementation: _ } => { + let input_mapper = JoinInputMapper::new(inputs); + + // Include all columns. + let columns_to_pushdown: Vec<_> = (0..input_mapper.total_columns()).collect(); + let child_columns = + input_mapper.split_column_set_by_input(columns_to_pushdown.iter()); + + // Recursively indicate the requirements. + for (input, inp_columns) in inputs.iter_mut().zip(child_columns) { + let inp_columns = inp_columns.into_iter().collect::>(); + self.action(input, &inp_columns, gets)?; + } + + columns_to_pushdown.into_iter().collect() + } MirRelationExpr::FlatMap { input, func, exprs } => { let inner_arity = input.arity(); // A FlatMap which returns zero rows acts like a filter diff --git a/src/transform/src/normalize_lets.rs b/src/transform/src/normalize_lets.rs index 6443c6a19db58..89924bba97b06 100644 --- a/src/transform/src/normalize_lets.rs +++ b/src/transform/src/normalize_lets.rs @@ -132,32 +132,38 @@ impl NormalizeLets { // A final bottom-up traversal to normalize the shape of nested LetRec blocks relation.try_visit_mut_post(&mut |relation| -> Result<(), RecursionLimitError> { - // Disassemble `LetRec` into a `Let` stack if possible. - // If a `LetRec` remains, return the would-be `Let` bindings to it. - // This is to maintain `LetRec`-freedom for `LetRec`-free expressions. - let mut bindings = let_motion::harvest_non_recursive(relation); - if let MirRelationExpr::LetRec { - ids, - values, - limits, - body: _, - } = relation - { - bindings.extend(ids.drain(..).zip(values.drain(..).zip(limits.drain(..)))); - support::replace_bindings_from_map(bindings, ids, values, limits); - } else { - for (id, (value, max_iter)) in bindings.into_iter().rev() { - assert_none!(max_iter); - *relation = MirRelationExpr::Let { - id, - value: Box::new(value), - body: Box::new(relation.take_dangerous()), - }; + if !features.enable_let_prefix_extraction { + // Disassemble `LetRec` into a `Let` stack if possible. + // If a `LetRec` remains, return the would-be `Let` bindings to it. + // This is to maintain `LetRec`-freedom for `LetRec`-free expressions. + let mut bindings = let_motion::harvest_non_recursive(relation); + if let MirRelationExpr::LetRec { + ids, + values, + limits, + body: _, + } = relation + { + bindings.extend(ids.drain(..).zip(values.drain(..).zip(limits.drain(..)))); + support::replace_bindings_from_map(bindings, ids, values, limits); + } else { + for (id, (value, max_iter)) in bindings.into_iter().rev() { + assert_none!(max_iter); + *relation = MirRelationExpr::Let { + id, + value: Box::new(value), + body: Box::new(relation.take_dangerous()), + }; + } } } // Move a non-recursive suffix of bindings from the end of the LetRec // to the LetRec body. + // This is unsafe when applied to expressions which contain `ArrangeBy`, + // as if the extracted suffixes reference arrangements they will not be + // able to access those arrangements from outside the `LetRec` scope. + // It happens to work at the moment, so we don't touch it but should fix. let bindings = let_motion::harvest_nonrec_suffix(relation)?; if let MirRelationExpr::LetRec { ids: _, @@ -183,6 +189,22 @@ impl NormalizeLets { } } + if features.enable_let_prefix_extraction { + // Extract `Let` prefixes from `LetRec`, to reveal their non-recursive nature. + // This assists with hoisting e.g. arrangements out of `LetRec` blocks, a thing + // we don't promise to do, but it can be helpful to do. This also exposes more + // AST nodes to non-`LetRec` analyses, which don't always have parity with `LetRec`. + let bindings = let_motion::harvest_non_recursive(relation); + for (id, (value, max_iter)) in bindings.into_iter().rev() { + assert_none!(max_iter); + *relation = MirRelationExpr::Let { + id, + value: Box::new(value), + body: Box::new(relation.take_dangerous()), + }; + } + } + Ok(()) })?; diff --git a/src/transform/src/reduction_pushdown.rs b/src/transform/src/reduction_pushdown.rs index 564709bd4ab03..0f8858405059d 100644 --- a/src/transform/src/reduction_pushdown.rs +++ b/src/transform/src/reduction_pushdown.rs @@ -53,6 +53,7 @@ use std::iter::FromIterator; use mz_expr::visit::Visit; use mz_expr::{AggregateExpr, JoinInputMapper, MirRelationExpr, MirScalarExpr}; +use crate::analysis::equivalences::EquivalenceClasses; use crate::TransformCtx; /// Pushes Reduce operators toward sources. @@ -183,6 +184,13 @@ fn try_push_reduce_through_join( // `` is either `Join {}` or // ``. + // 0) Make sure that `equivalences` is a proper equivalence relation. Later, in 3a)/i), we'll + // rely on expressions appearing in at most one equivalence class. + let mut eq_classes = EquivalenceClasses::default(); + eq_classes.classes = equivalences.clone(); + eq_classes.minimize(None); + let equivalences = eq_classes.classes; + let old_join_mapper = JoinInputMapper::new(inputs.as_slice()); // 1) Partition the join constraints into constraints containing a group // key and constraints that don't. @@ -255,6 +263,8 @@ fn try_push_reduce_through_join( // (2) every expression in the equivalences of the new join. for key in group_key { // i) Find the equivalence class that the key is in. + // This relies on the expression appearing in at most one equivalence class. This + // invariant is ensured in step 0). if let Some(cls) = new_join_equivalences .iter() .find(|cls| cls.iter().any(|expr| expr == key)) diff --git a/src/transform/tests/test_transforms.rs b/src/transform/tests/test_transforms.rs index f4cbb2f6237e8..25b8387c6968e 100644 --- a/src/transform/tests/test_transforms.rs +++ b/src/transform/tests/test_transforms.rs @@ -260,6 +260,7 @@ fn apply_transform( let mut features = mz_repr::optimize::OptimizerFeatures::default(); // Apply a non-default feature flag to test the right implementation. features.enable_letrec_fixpoint_analysis = true; + features.enable_let_prefix_extraction = true; let typecheck_ctx = mz_transform::typecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = diff --git a/src/transform/tests/test_transforms/humanized_exprs.spec b/src/transform/tests/test_transforms/humanized_exprs.spec index 6ec112acdb457..35a09682cf774 100644 --- a/src/transform/tests/test_transforms/humanized_exprs.spec +++ b/src/transform/tests/test_transforms/humanized_exprs.spec @@ -67,7 +67,7 @@ explain with=humanized_exprs Project (#1, #0, #1) Get t0 ---- -Project (#1{c0}, #0{c1}, #1{c0}) +Project (#1{c1}, #0{c0}, #1{c1}) Get t0 # Map diff --git a/src/transform/tests/test_transforms/normalize_lets.spec b/src/transform/tests/test_transforms/normalize_lets.spec index 79e5aea17d583..a05b11be2a3c9 100644 --- a/src/transform/tests/test_transforms/normalize_lets.spec +++ b/src/transform/tests/test_transforms/normalize_lets.spec @@ -196,21 +196,23 @@ With Map (null::bigint) Get t0 ---- -With Mutually Recursive +With cte l0 = Map (null) Get t0 - cte l1 = - Union - Get l0 - Get l0 - Get l1 Return - With - cte l2 = - Filter (#0 > 0) + With Mutually Recursive + cte l1 = + Union + Get l0 + Get l0 Get l1 Return - Union - Get l2 - Get l2 + With + cte l2 = + Filter (#0 > 0) + Get l1 + Return + Union + Get l2 + Get l2 diff --git a/src/transform/tests/test_transforms/reduction_pushdown.spec b/src/transform/tests/test_transforms/reduction_pushdown.spec index 408f5e3df2221..3078ccaa57320 100644 --- a/src/transform/tests/test_transforms/reduction_pushdown.spec +++ b/src/transform/tests/test_transforms/reduction_pushdown.spec @@ -40,7 +40,7 @@ Distinct project=[#1] Project (#0) CrossJoin Distinct project=[#1] - Filter (((#1 + #1) = #0) OR (((#1 + #1)) IS NULL AND (#0) IS NULL)) + Filter ((#0 = (#1 + #1)) OR ((#0) IS NULL AND ((#1 + #1)) IS NULL)) Get x Distinct project=[] Get y diff --git a/src/transform/tests/test_transforms/relation_cse.spec b/src/transform/tests/test_transforms/relation_cse.spec index c55de1af4fafd..f363d56399343 100644 --- a/src/transform/tests/test_transforms/relation_cse.spec +++ b/src/transform/tests/test_transforms/relation_cse.spec @@ -87,7 +87,7 @@ With Project (#0, #1) Get t0 ---- -With Mutually Recursive +With cte l0 = Project (#0, #1) Get t0 @@ -96,36 +96,38 @@ With Mutually Recursive Union Get l0 Get l0 - cte l2 = - Filter (#1 > 7) - Get l4 - cte l3 = - Filter (#1 > 7) - Get l6 - cte l4 = - Distinct project=[#0, #1] - Union - Get l1 - Get l2 - Get l2 - Get l3 - Get l3 - cte l5 = - Filter (#1 > 7) - Get l4 - cte l6 = - Distinct project=[#0, #1] - Union - Get l1 - Get l5 - Get l5 - Get l3 - Get l3 Return - Union - Filter (#1 > 7) - Get t0 - Filter (#1 > 7) - Get l6 - Filter (#1 > 7) - Get l4 + With Mutually Recursive + cte l2 = + Filter (#1 > 7) + Get l4 + cte l3 = + Filter (#1 > 7) + Get l6 + cte l4 = + Distinct project=[#0, #1] + Union + Get l1 + Get l2 + Get l2 + Get l3 + Get l3 + cte l5 = + Filter (#1 > 7) + Get l4 + cte l6 = + Distinct project=[#0, #1] + Union + Get l1 + Get l5 + Get l5 + Get l3 + Get l3 + Return + Union + Filter (#1 > 7) + Get t0 + Filter (#1 > 7) + Get l6 + Filter (#1 > 7) + Get l4 diff --git a/src/transform/tests/testdata/partial-reduction-pushdown b/src/transform/tests/testdata/partial-reduction-pushdown index 8120a380245ce..254c242aa1929 100644 --- a/src/transform/tests/testdata/partial-reduction-pushdown +++ b/src/transform/tests/testdata/partial-reduction-pushdown @@ -87,7 +87,7 @@ build apply=ReductionPushdown []) ---- Project (#1) - Join on=(#0 = #1) + Join on=(#1 = #0) Distinct project=[substr(#1, 5)] Constant Distinct project=[#1] @@ -101,7 +101,7 @@ build apply=ReductionPushdown []) ---- Project (#0) - Join on=(#0 = #1) + Join on=(#1 = #0) Distinct project=[substr(#1, 5)] Get x Distinct project=[#1] diff --git a/src/transform/tests/testdata/reduction-pushdown b/src/transform/tests/testdata/reduction-pushdown index 05f692e062319..98647df36a8a1 100644 --- a/src/transform/tests/testdata/reduction-pushdown +++ b/src/transform/tests/testdata/reduction-pushdown @@ -59,7 +59,7 @@ build apply=ReductionPushdown (reduce (join [(get x) (get y)] [[(call_variadic substr [#1 5]) #3]]) [#3] []) ---- Project (#1) - Join on=(#0 = #1) + Join on=(#1 = #0) Distinct project=[substr(#1, 5)] Get x Distinct project=[#1] @@ -73,7 +73,7 @@ build apply=ReductionPushdown []) ---- Project (#0) - Join on=(#0 = #1) + Join on=(#1 = #0) Distinct project=[substr(#1, 5)] Get x Distinct project=[#1] diff --git a/src/workspace-hack/Cargo.toml b/src/workspace-hack/Cargo.toml index 049a595de2cb9..5521ab1ae4183 100644 --- a/src/workspace-hack/Cargo.toml +++ b/src/workspace-hack/Cargo.toml @@ -33,9 +33,10 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.14" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.10.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.4.0", features = ["serde"] } -chrono = { version = "0.4.35", default-features = false, features = ["clock", "serde"] } +chrono = { version = "0.4.35", default-features = false, features = ["clock", "serde", "wasmbind"] } clap = { version = "4.5.23", features = ["derive", "env", "string", "wrap_help"] } clap_builder = { version = "4.5.23", default-features = false, features = ["color", "env", "std", "string", "suggestions", "usage", "wrap_help"] } +concurrent-queue = { version = "2.5.0" } console = { version = "0.15.5", default-features = false, features = ["ansi-parsing", "unicode-width"] } criterion = { version = "0.5.1", features = ["async_tokio", "html_reports"] } crossbeam-deque = { version = "0.8.3" } @@ -46,6 +47,8 @@ debugid = { version = "0.8.0", default-features = false, features = ["serde"] } dec = { version = "0.4.8", default-features = false, features = ["serde"] } digest = { version = "0.10.6", features = ["mac", "std"] } either = { version = "1.8.0" } +event-listener = { version = "5.3.1" } +event-listener-strategy = { version = "0.5.2" } flate2 = { version = "1.0.24", features = ["zlib"] } form_urlencoded = { version = "1.2.1" } futures = { version = "0.3.25" } @@ -68,7 +71,7 @@ kube = { version = "0.92.1", default-features = false, features = ["client", "de kube-client = { version = "0.92.1", default-features = false, features = ["jsonpatch", "openssl-tls", "ws"] } kube-core = { version = "0.92.1", default-features = false, features = ["jsonpatch", "schema", "ws"] } libc = { version = "0.2.169", features = ["extra_traits", "use_std"] } -libz-sys = { version = "1.1.8", features = ["static"] } +libz-sys = { version = "1.1.19", features = ["static"] } log = { version = "0.4.22", default-features = false, features = ["std"] } lru = { version = "0.12.3" } memchr = { version = "2.7.4", features = ["use_std"] } @@ -106,7 +109,8 @@ rdkafka-sys = { git = "https://github.com/MaterializeInc/rust-rdkafka.git", feat regex = { version = "1.10.5" } regex-automata = { version = "0.4.7", default-features = false, features = ["dfa-onepass", "dfa-search", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.3" } -reqwest = { version = "0.11.24", features = ["blocking", "json", "multipart", "native-tls-vendored"] } +reqwest-5ef9efb8ec2df382 = { package = "reqwest", version = "0.12.4", features = ["blocking", "json", "stream"] } +reqwest-a6292c17cd707f01 = { package = "reqwest", version = "0.11.24", features = ["blocking", "json", "multipart", "native-tls-vendored"] } schemars = { version = "0.8.11", features = ["uuid1"] } scopeguard = { version = "1.1.0" } semver = { version = "1.0.23", features = ["serde"] } @@ -119,7 +123,7 @@ socket2 = { version = "0.5.7", default-features = false, features = ["all"] } subtle = { version = "2.4.1" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.107", features = ["extra-traits", "full", "visit", "visit-mut"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.63", features = ["extra-traits", "full", "visit", "visit-mut"] } -time = { version = "0.3.17", features = ["macros", "quickcheck", "serde-well-known"] } +time = { version = "0.3.17", features = ["local-offset", "macros", "quickcheck", "serde-well-known"] } tokio = { version = "1.38.0", features = ["full", "test-util", "tracing"] } tokio-postgres = { git = "https://github.com/MaterializeInc/rust-postgres", features = ["serde", "with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.16", features = ["net", "sync"] } @@ -128,8 +132,8 @@ toml_datetime = { version = "0.6.3", default-features = false, features = ["serd tonic = { version = "0.12.3", features = ["gzip"] } tower = { version = "0.4.13", features = ["balance", "buffer", "filter", "limit", "load-shed", "retry", "timeout", "util"] } tower-http = { version = "0.5.2", features = ["auth", "cors", "map-response-body", "trace", "util"] } -tracing = { version = "0.1.37", features = ["log"] } -tracing-core = { version = "0.1.30" } +tracing = { version = "0.1.41", features = ["log"] } +tracing-core = { version = "0.1.33" } tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] } tungstenite = { version = "0.21.0" } twox-hash = { version = "1.6.3" } @@ -164,9 +168,10 @@ bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.10.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.4.0", features = ["serde"] } cc = { version = "1.1.28", default-features = false, features = ["parallel"] } -chrono = { version = "0.4.35", default-features = false, features = ["clock", "serde"] } +chrono = { version = "0.4.35", default-features = false, features = ["clock", "serde", "wasmbind"] } clap = { version = "4.5.23", features = ["derive", "env", "string", "wrap_help"] } clap_builder = { version = "4.5.23", default-features = false, features = ["color", "env", "std", "string", "suggestions", "usage", "wrap_help"] } +concurrent-queue = { version = "2.5.0" } console = { version = "0.15.5", default-features = false, features = ["ansi-parsing", "unicode-width"] } criterion = { version = "0.5.1", features = ["async_tokio", "html_reports"] } crossbeam-deque = { version = "0.8.3" } @@ -177,6 +182,8 @@ debugid = { version = "0.8.0", default-features = false, features = ["serde"] } dec = { version = "0.4.8", default-features = false, features = ["serde"] } digest = { version = "0.10.6", features = ["mac", "std"] } either = { version = "1.8.0" } +event-listener = { version = "5.3.1" } +event-listener-strategy = { version = "0.5.2" } flate2 = { version = "1.0.24", features = ["zlib"] } form_urlencoded = { version = "1.2.1" } futures = { version = "0.3.25" } @@ -199,7 +206,7 @@ kube = { version = "0.92.1", default-features = false, features = ["client", "de kube-client = { version = "0.92.1", default-features = false, features = ["jsonpatch", "openssl-tls", "ws"] } kube-core = { version = "0.92.1", default-features = false, features = ["jsonpatch", "schema", "ws"] } libc = { version = "0.2.169", features = ["extra_traits", "use_std"] } -libz-sys = { version = "1.1.8", features = ["static"] } +libz-sys = { version = "1.1.19", features = ["static"] } log = { version = "0.4.22", default-features = false, features = ["std"] } lru = { version = "0.12.3" } memchr = { version = "2.7.4", features = ["use_std"] } @@ -238,7 +245,8 @@ rdkafka-sys = { git = "https://github.com/MaterializeInc/rust-rdkafka.git", feat regex = { version = "1.10.5" } regex-automata = { version = "0.4.7", default-features = false, features = ["dfa-onepass", "dfa-search", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.3" } -reqwest = { version = "0.11.24", features = ["blocking", "json", "multipart", "native-tls-vendored"] } +reqwest-5ef9efb8ec2df382 = { package = "reqwest", version = "0.12.4", features = ["blocking", "json", "stream"] } +reqwest-a6292c17cd707f01 = { package = "reqwest", version = "0.11.24", features = ["blocking", "json", "multipart", "native-tls-vendored"] } schemars = { version = "0.8.11", features = ["uuid1"] } scopeguard = { version = "1.1.0" } semver = { version = "1.0.23", features = ["serde"] } @@ -251,7 +259,7 @@ socket2 = { version = "0.5.7", default-features = false, features = ["all"] } subtle = { version = "2.4.1" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.107", features = ["extra-traits", "full", "visit", "visit-mut"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.63", features = ["extra-traits", "full", "visit", "visit-mut"] } -time = { version = "0.3.17", features = ["macros", "quickcheck", "serde-well-known"] } +time = { version = "0.3.17", features = ["local-offset", "macros", "quickcheck", "serde-well-known"] } time-macros = { version = "0.2.6", default-features = false, features = ["formatting", "parsing", "serde"] } tokio = { version = "1.38.0", features = ["full", "test-util", "tracing"] } tokio-postgres = { git = "https://github.com/MaterializeInc/rust-postgres", features = ["serde", "with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } @@ -261,8 +269,8 @@ toml_datetime = { version = "0.6.3", default-features = false, features = ["serd tonic = { version = "0.12.3", features = ["gzip"] } tower = { version = "0.4.13", features = ["balance", "buffer", "filter", "limit", "load-shed", "retry", "timeout", "util"] } tower-http = { version = "0.5.2", features = ["auth", "cors", "map-response-body", "trace", "util"] } -tracing = { version = "0.1.37", features = ["log"] } -tracing-core = { version = "0.1.30" } +tracing = { version = "0.1.41", features = ["log"] } +tracing-core = { version = "0.1.33" } tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] } tungstenite = { version = "0.21.0" } twox-hash = { version = "1.6.3" } @@ -280,19 +288,21 @@ zstd-sys = { version = "2.0.9", features = ["std"] } bitflags = { version = "2.4.1", default-features = false, features = ["std"] } camino = { version = "1.1.7", default-features = false, features = ["serde1"] } hyper-582f2526e08bb6a0 = { package = "hyper", version = "0.14.27", default-features = false, features = ["runtime"] } +linux-raw-sys = { version = "0.4.14", default-features = false, features = ["elf", "errno", "general", "if_ether", "ioctl", "net", "netlink", "no_std", "prctl", "xdp"] } native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } pathdiff = { version = "0.2.1", default-features = false, features = ["camino"] } ring = { version = "0.17.7", features = ["std"] } -rustix = { version = "0.38.43", features = ["fs", "termios"] } +rustix = { version = "0.38.43", features = ["event", "fs", "net", "pipe", "process", "termios", "time"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags = { version = "2.4.1", default-features = false, features = ["std"] } camino = { version = "1.1.7", default-features = false, features = ["serde1"] } hyper-582f2526e08bb6a0 = { package = "hyper", version = "0.14.27", default-features = false, features = ["runtime"] } +linux-raw-sys = { version = "0.4.14", default-features = false, features = ["elf", "errno", "general", "if_ether", "ioctl", "net", "netlink", "no_std", "prctl", "xdp"] } native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } pathdiff = { version = "0.2.1", default-features = false, features = ["camino"] } ring = { version = "0.17.7", features = ["std"] } -rustix = { version = "0.38.43", features = ["fs", "termios"] } +rustix = { version = "0.38.43", features = ["event", "fs", "net", "pipe", "process", "termios", "time"] } [target.x86_64-apple-darwin.dependencies] bitflags = { version = "2.4.1", default-features = false, features = ["std"] } @@ -301,7 +311,7 @@ hyper-582f2526e08bb6a0 = { package = "hyper", version = "0.14.27", default-featu native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } pathdiff = { version = "0.2.1", default-features = false, features = ["camino"] } ring = { version = "0.17.7", features = ["std"] } -rustix = { version = "0.38.43", features = ["fs", "termios"] } +rustix = { version = "0.38.43", features = ["event", "fs", "net", "pipe", "process", "termios", "time"] } security-framework = { version = "2.7.0", features = ["alpn"] } [target.x86_64-apple-darwin.build-dependencies] @@ -311,7 +321,7 @@ hyper-582f2526e08bb6a0 = { package = "hyper", version = "0.14.27", default-featu native-tls = { version = "0.2.11", default-features = false, features = ["vendored"] } pathdiff = { version = "0.2.1", default-features = false, features = ["camino"] } ring = { version = "0.17.7", features = ["std"] } -rustix = { version = "0.38.43", features = ["fs", "termios"] } +rustix = { version = "0.38.43", features = ["event", "fs", "net", "pipe", "process", "termios", "time"] } security-framework = { version = "2.7.0", features = ["alpn"] } ### END HAKARI SECTION diff --git a/test/backup-restore-postgres/mzcompose.py b/test/backup-restore-postgres/mzcompose.py index a24cd29bad500..9e9e837ffee05 100644 --- a/test/backup-restore-postgres/mzcompose.py +++ b/test/backup-restore-postgres/mzcompose.py @@ -25,7 +25,7 @@ Minio(setup_materialize=True), Mc(), Materialized( - external_minio=True, + external_blob_store=True, external_metadata_store=True, sanity_restart=False, metadata_store="postgres-metadata", diff --git a/test/backup-restore/mzcompose.py b/test/backup-restore/mzcompose.py index e9dba483bfdb6..65d004d9f8618 100644 --- a/test/backup-restore/mzcompose.py +++ b/test/backup-restore/mzcompose.py @@ -25,7 +25,7 @@ Minio(setup_materialize=True), Mc(), Materialized( - external_minio=True, + external_blob_store=True, external_metadata_store=True, sanity_restart=False, metadata_store="cockroach", diff --git a/test/cloudtest/test_compute.py b/test/cloudtest/test_compute.py index f488fa2270f76..6f444503a9da7 100644 --- a/test/cloudtest/test_compute.py +++ b/test/cloudtest/test_compute.py @@ -135,7 +135,7 @@ def test_disk_label(mz: MaterializeApplication) -> None: for value in ("true", "false"): mz.environmentd.sql( - f"CREATE CLUSTER disk_{value} MANAGED, SIZE = '2-1', DISK = {value}" + f"CREATE CLUSTER disk_{value} MANAGED, SIZE = '2-no-disk', DISK = {value}" ) (cluster_id, replica_id) = mz.environmentd.sql_query( @@ -147,8 +147,7 @@ def test_disk_label(mz: MaterializeApplication) -> None: node_selectors = get_node_selector(mz, cluster_id, replica_id) if value == "true": assert ( - node_selectors - == '\'{"materialize.cloud/disk":"true"} {"materialize.cloud/disk":"true"}\'' + node_selectors == '\'{"materialize.cloud/disk":"true"}\'' ), node_selectors else: assert node_selectors == "''" diff --git a/test/cloudtest/test_disk.py b/test/cloudtest/test_disk.py index d3de1dae95dd5..4e78ec75ea2ac 100644 --- a/test/cloudtest/test_disk.py +++ b/test/cloudtest/test_disk.py @@ -26,7 +26,7 @@ def test_disk_replica(mz: MaterializeApplication) -> None: > CREATE CLUSTER testdrive_no_reset_disk_cluster1 REPLICAS (r1 ( - SIZE '1', DISK = true + SIZE '1-no-disk', DISK = true )) > CREATE CONNECTION IF NOT EXISTS kafka TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT) @@ -100,7 +100,7 @@ def test_always_use_disk_replica(mz: MaterializeApplication) -> None: key2:val2 > CREATE CLUSTER disk_cluster2 - REPLICAS (r1 (SIZE '1')) + REPLICAS (r1 (SIZE '1-no-disk')) > CREATE CONNECTION IF NOT EXISTS kafka TO KAFKA (BROKER '${testdrive.kafka-addr}', SECURITY PROTOCOL PLAINTEXT) @@ -168,7 +168,7 @@ def test_no_disk_replica(mz: MaterializeApplication) -> None: > CREATE CLUSTER no_disk_cluster1 REPLICAS (r1 ( - SIZE '1', DISK = false + SIZE '1-no-disk', DISK = false )) > CREATE CONNECTION IF NOT EXISTS kafka diff --git a/test/cloudtest/test_upgrade.py b/test/cloudtest/test_upgrade.py index a3cdf393e5e3b..0932afde0a163 100644 --- a/test/cloudtest/test_upgrade.py +++ b/test/cloudtest/test_upgrade.py @@ -91,5 +91,5 @@ def test_upgrade(aws_region: str | None, log_filter: str | None, dev: bool) -> N AlterConnectionHost, } ) - scenario = CloudtestUpgrade(checks=checks, executor=executor) + scenario = CloudtestUpgrade(checks=checks, executor=executor, azurite=False) scenario.run() diff --git a/test/cluster/mzcompose.py b/test/cluster/mzcompose.py index 82f48c10d19ee..b6ee8dcadc338 100644 --- a/test/cluster/mzcompose.py +++ b/test/cluster/mzcompose.py @@ -3189,7 +3189,7 @@ def workflow_test_incident_70(c: Composition) -> None: with c.override( Materialized( external_metadata_store=True, - external_minio=True, + external_blob_store=True, sanity_restart=False, ), Minio(setup_materialize=True), diff --git a/test/data-ingest/mzcompose.py b/test/data-ingest/mzcompose.py index 0264cf2453d2c..0f9b186e871c9 100644 --- a/test/data-ingest/mzcompose.py +++ b/test/data-ingest/mzcompose.py @@ -25,6 +25,7 @@ from materialize.data_ingest.workload import WORKLOADS, execute_workload from materialize.mzcompose import get_default_system_parameters from materialize.mzcompose.composition import Composition, WorkflowArgumentParser +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.clusterd import Clusterd from materialize.mzcompose.services.kafka import Kafka from materialize.mzcompose.services.materialized import Materialized @@ -53,24 +54,10 @@ SchemaRegistry(), CockroachOrPostgresMetadata(), Minio(setup_materialize=True), - # Fixed port so that we keep the same port after restarting Mz in disruptions - Materialized( - ports=["16875:6875"], - external_minio=True, - external_metadata_store=True, - system_parameter_defaults=get_default_system_parameters(zero_downtime=True), - additional_system_parameter_defaults={"unsafe_enable_table_keys": "true"}, - sanity_restart=False, - ), - Materialized( - name="materialized2", - ports=["26875:6875"], - external_minio=True, - external_metadata_store=True, - system_parameter_defaults=get_default_system_parameters(zero_downtime=True), - additional_system_parameter_defaults={"unsafe_enable_table_keys": "true"}, - sanity_restart=False, - ), + Azurite(), + # Overridden below + Materialized(), + Materialized(name="materialized2"), Clusterd(name="clusterd1", scratch_directory="/mzdata/source_data"), ] @@ -90,6 +77,9 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: action="append", help="Workload(s) to run.", ) + parser.add_argument( + "--azurite", action="store_true", help="Use Azurite as blob store instead of S3" + ) args = parser.parse_args() @@ -118,38 +108,60 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: executor_classes = [MySqlExecutor, KafkaRoundtripExecutor, KafkaExecutor] - c.up(*services) - conn = c.sql_connection() - conn.autocommit = True - with conn.cursor() as cur: - cur.execute( - """CREATE CONNECTION IF NOT EXISTS kafka_conn - FOR KAFKA BROKER 'kafka:9092', SECURITY PROTOCOL PLAINTEXT""" - ) - cur.execute( - """CREATE CONNECTION IF NOT EXISTS csr_conn - FOR CONFLUENT SCHEMA REGISTRY - URL 'http://schema-registry:8081'""" - ) - conn.autocommit = False - conn.close() + with c.override( + # Fixed port so that we keep the same port after restarting Mz in disruptions + Materialized( + ports=["16875:6875"], + external_blob_store=True, + blob_store_is_azure=args.azurite, + external_metadata_store=True, + system_parameter_defaults=get_default_system_parameters(zero_downtime=True), + additional_system_parameter_defaults={"unsafe_enable_table_keys": "true"}, + sanity_restart=False, + ), + Materialized( + name="materialized2", + ports=["26875:6875"], + external_blob_store=True, + blob_store_is_azure=args.azurite, + external_metadata_store=True, + system_parameter_defaults=get_default_system_parameters(zero_downtime=True), + additional_system_parameter_defaults={"unsafe_enable_table_keys": "true"}, + sanity_restart=False, + ), + ): + c.up(*services) + conn = c.sql_connection() + conn.autocommit = True + with conn.cursor() as cur: + cur.execute( + """CREATE CONNECTION IF NOT EXISTS kafka_conn + FOR KAFKA BROKER 'kafka:9092', SECURITY PROTOCOL PLAINTEXT""" + ) + cur.execute( + """CREATE CONNECTION IF NOT EXISTS csr_conn + FOR CONFLUENT SCHEMA REGISTRY + URL 'http://schema-registry:8081'""" + ) + conn.autocommit = False + conn.close() - ports = {s: c.default_port(s) for s in services} - ports["materialized2"] = 26875 - mz_service = "materialized" - deploy_generation = 0 + ports = {s: c.default_port(s) for s in services} + ports["materialized2"] = 26875 + mz_service = "materialized" + deploy_generation = 0 - for i, workload_class in enumerate(workloads): - random.seed(args.seed) - print(f"--- Testing workload {workload_class.__name__}") - workload = workload_class(c, mz_service, deploy_generation) - execute_workload( - executor_classes, - workload, - i, - ports, - args.runtime, - args.verbose, - ) - mz_service = workload.mz_service - deploy_generation = workload.deploy_generation + for i, workload_class in enumerate(workloads): + random.seed(args.seed) + print(f"--- Testing workload {workload_class.__name__}") + workload = workload_class(args.azurite, c, mz_service, deploy_generation) + execute_workload( + executor_classes, + workload, + i, + ports, + args.runtime, + args.verbose, + ) + mz_service = workload.mz_service + deploy_generation = workload.deploy_generation diff --git a/test/feature-benchmark/mzcompose.py b/test/feature-benchmark/mzcompose.py index c4378267d5b57..e50c5754393b0 100644 --- a/test/feature-benchmark/mzcompose.py +++ b/test/feature-benchmark/mzcompose.py @@ -84,6 +84,7 @@ TerminationCondition, ) from materialize.mzcompose.composition import Composition, WorkflowArgumentParser +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.balancerd import Balancerd from materialize.mzcompose.services.clusterd import Clusterd from materialize.mzcompose.services.cockroach import Cockroach @@ -136,6 +137,7 @@ def make_aggregation_class() -> type[Aggregation]: Redpanda(), Cockroach(setup_materialize=True), Minio(setup_materialize=True), + Azurite(), KgenService(), Postgres(), MySql(), @@ -198,7 +200,13 @@ def run_one_scenario( additional_system_parameter_defaults[param_name] = param_value mz_image = f"materialize/materialized:{tag}" if tag else None - mz = create_mz_service(mz_image, size, additional_system_parameter_defaults) + # TODO: Better azurite support detection + mz = create_mz_service( + mz_image, + size, + additional_system_parameter_defaults, + args.azurite and instance == "this", + ) clusterd_image = f"materialize/clusterd:{tag}" if tag else None clusterd = create_clusterd_service( clusterd_image, size, additional_system_parameter_defaults @@ -209,7 +217,13 @@ def run_one_scenario( f"Unable to find materialize image with tag {tag}, proceeding with latest instead!" ) mz_image = "materialize/materialized:latest" - mz = create_mz_service(mz_image, size, additional_system_parameter_defaults) + # TODO: Better azurite support detection + mz = create_mz_service( + mz_image, + size, + additional_system_parameter_defaults, + args.azurite and instance == "this", + ) clusterd_image = f"materialize/clusterd:{tag}" if tag else None clusterd = create_clusterd_service( clusterd_image, size, additional_system_parameter_defaults @@ -225,6 +239,8 @@ def run_one_scenario( default_timeout=default_timeout, materialize_params={"statement_timeout": f"'{default_timeout}'"}, metadata_store="cockroach", + external_blob_store=True, + blob_store_is_azure=args.azurite, ) ): c.testdrive( @@ -310,6 +326,7 @@ def create_mz_service( mz_image: str | None, default_size: int, additional_system_parameter_defaults: dict[str, str] | None, + azurite: bool, ) -> Materialized: return Materialized( image=mz_image, @@ -320,7 +337,8 @@ def create_mz_service( additional_system_parameter_defaults=additional_system_parameter_defaults, external_metadata_store=True, metadata_store="cockroach", - external_minio=True, + external_blob_store=True, + blob_store_is_azure=azurite, sanity_restart=False, ) @@ -464,6 +482,9 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "--other-size", metavar="N", type=int, default=4, help="SIZE to use for 'OTHER'" ) + parser.add_argument( + "--azurite", action="store_true", help="Use Azurite as blob store instead of S3" + ) args = parser.parse_args() diff --git a/test/legacy-upgrade/mzcompose.py b/test/legacy-upgrade/mzcompose.py index 97dbc8e976288..45a7d2824bce2 100644 --- a/test/legacy-upgrade/mzcompose.py +++ b/test/legacy-upgrade/mzcompose.py @@ -28,6 +28,7 @@ from materialize.mzcompose.services.testdrive import Testdrive from materialize.mzcompose.services.zookeeper import Zookeeper from materialize.version_list import ( + LTS_VERSIONS, VersionsFromDocs, get_all_published_mz_versions, get_published_minor_mz_versions, @@ -78,6 +79,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: choices=["docs", "git"], help="from what source to fetch the versions", ) + parser.add_argument("--lts-upgrade", action="store_true") args = parser.parse_args() parallelism_index = buildkite.get_parallelism_index() @@ -123,6 +125,18 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: test_upgrade_from_version( c, "current_source", priors=[], filter=args.filter, zero_downtime=False ) + if args.lts_upgrade: + # Direct upgrade from latest LTS version without any inbetween versions + version = LTS_VERSIONS[-1] + priors = [v for v in all_versions if v <= version] + test_upgrade_from_version( + c, + f"{version}", + priors, + filter=args.filter, + zero_downtime=False, + lts_upgrade=True, + ) def get_all_and_latest_two_minor_mz_versions( @@ -144,6 +158,7 @@ def test_upgrade_from_version( priors: list[MzVersion], filter: str, zero_downtime: bool, + lts_upgrade: bool = False, ) -> None: print( f"+++ Testing {'0dt upgrade' if zero_downtime else 'regular upgrade'} from Materialize {from_version} to current_source." @@ -233,7 +248,7 @@ def test_upgrade_from_version( c.kill(mz_service) c.rm(mz_service, "testdrive") - if from_version != "current_source": + if from_version != "current_source" and not lts_upgrade: # We can't skip in-between minor versions anymore, so go through all of them for version in get_published_minor_mz_versions(newest_first=False): if version <= from_version: diff --git a/test/mysql-cdc/types-bit.td b/test/mysql-cdc/types-bit.td new file mode 100644 index 0000000000000..7d3b84b3b758f --- /dev/null +++ b/test/mysql-cdc/types-bit.td @@ -0,0 +1,84 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +$ set-sql-timeout duration=1s +$ set-max-tries max-tries=20 + + +# +# Test the BIT data type +# + +> CREATE SECRET mysqlpass AS '${arg.mysql-root-password}' +> CREATE CONNECTION mysql_conn TO MYSQL ( + HOST mysql, + USER root, + PASSWORD SECRET mysqlpass + ) + +$ mysql-connect name=mysql url=mysql://root@mysql password=${arg.mysql-root-password} + +$ mysql-execute name=mysql +DROP DATABASE IF EXISTS public; +CREATE DATABASE public; +USE public; + +# Insert data pre-snapshot +CREATE TABLE t1 (f1 BIT(11), f2 BIT(1)); +INSERT INTO t1 VALUES (8, 0); +INSERT INTO t1 VALUES (13, 1); +INSERT INTO t1 VALUES (b'11100000100', b'1'); +INSERT INTO t1 VALUES (b'0000', b'0'); +INSERT INTO t1 VALUES (b'11111111111', b'0'); + +CREATE TABLE t2 (f1 BIT(64)); +INSERT INTO t2 VALUES (0); +INSERT INTO t2 VALUES (1); +INSERT INTO t2 VALUES (2032); +INSERT INTO t2 VALUES (b'11111111'); +INSERT INTO t2 VALUES (b'1111111111111111111111111111111111111111111111111111111111111111'); + +> CREATE SOURCE mz_source FROM MYSQL CONNECTION mysql_conn; + +> CREATE TABLE t1 FROM SOURCE mz_source (REFERENCE public.t1); + +> CREATE TABLE t2 FROM SOURCE mz_source (REFERENCE public.t2); + +> SELECT COUNT(*) > 0 FROM t1; +true + +> SELECT COUNT(*) > 0 FROM t2; +true + +# Insert the same data post-snapshot +$ mysql-execute name=mysql +INSERT INTO t1 SELECT * FROM t1; + +# MySQL does not have a proper boolean type +> SELECT pg_typeof(f1), pg_typeof(f2) FROM t1 LIMIT 1; +uint8 uint8 + +> SELECT * FROM t1 ORDER BY f1 DESC; +0 0 +0 0 +8 0 +8 0 +13 1 +13 1 +1796 1 +1796 1 +2047 0 +2047 0 + +> SELECT * FROM t2 ORDER BY f1 DESC; +0 +1 +255 +2032 +18446744073709551615 diff --git a/test/mzcompose_examples/mzcompose.py b/test/mzcompose_examples/mzcompose.py index 53b4a687059b2..9adcc15d8355b 100644 --- a/test/mzcompose_examples/mzcompose.py +++ b/test/mzcompose_examples/mzcompose.py @@ -73,7 +73,7 @@ def workflow_mz_with_options(c: Composition) -> None: def workflow_minio(c: Composition) -> None: - mz = Materialized(external_minio=True) + mz = Materialized(external_blob_store=True) with c.override(mz): c.up("materialized") diff --git a/test/parallel-benchmark/mzcompose.py b/test/parallel-benchmark/mzcompose.py index d464220dcfc6c..907d41839e8fe 100644 --- a/test/parallel-benchmark/mzcompose.py +++ b/test/parallel-benchmark/mzcompose.py @@ -25,6 +25,7 @@ from materialize.mz_env_util import get_cloud_hostname from materialize.mzcompose import ADDITIONAL_BENCHMARKING_SYSTEM_PARAMETERS from materialize.mzcompose.composition import Composition, WorkflowArgumentParser +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.balancerd import Balancerd from materialize.mzcompose.services.cockroach import Cockroach from materialize.mzcompose.services.kafka import Kafka as KafkaService @@ -95,13 +96,14 @@ def known_regression(scenario: str, other_tag: str) -> bool: Redpanda(), Cockroach(setup_materialize=True), Minio(setup_materialize=True), + Azurite(), KgenService(), Postgres(), MySql(), Balancerd(), # Overridden below Materialized(), - Testdrive(no_reset=True, seed=1, metadata_store="cockroach"), + Testdrive(), Mz(app_password=""), ] @@ -438,12 +440,22 @@ def run_once( default_size=args.size, soft_assertions=False, external_metadata_store=True, - external_minio=True, + external_blob_store=True, + # TODO: Better azurite support detection + blob_store_is_azure=args.azurite and bool(tag), sanity_restart=False, additional_system_parameter_defaults=ADDITIONAL_BENCHMARKING_SYSTEM_PARAMETERS | {"max_connections": "100000"}, metadata_store="cockroach", - ) + ), + Testdrive( + no_reset=True, + seed=1, + metadata_store="cockroach", + external_blob_store=True, + # TODO: Better azurite support detection + blob_store_is_azure=args.azurite and bool(tag), + ), ] target = None @@ -778,6 +790,9 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: action="store_true", help="Store results in SQLite instead of in memory", ) + parser.add_argument( + "--azurite", action="store_true", help="Use Azurite as blob store instead of S3" + ) args = parser.parse_args() diff --git a/test/parallel-workload/mzcompose.py b/test/parallel-workload/mzcompose.py index 129e32e595821..d6bd038b1a294 100644 --- a/test/parallel-workload/mzcompose.py +++ b/test/parallel-workload/mzcompose.py @@ -20,6 +20,7 @@ from materialize.mzcompose.composition import Composition, WorkflowArgumentParser from materialize.mzcompose.service import Service +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.cockroach import Cockroach from materialize.mzcompose.services.kafka import Kafka from materialize.mzcompose.services.materialized import Materialized @@ -48,6 +49,7 @@ ), SchemaRegistry(), Minio(setup_materialize=True, additional_directories=["copytos3"]), + Azurite(), Mc(), Materialized(), Materialized(name="materialized2"), @@ -72,6 +74,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "zookeeper", "kafka", "schema-registry", + # Still required for backups/s3 testing even when we use Azurite as blob store "minio", "materialized", ] @@ -83,7 +86,9 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: with c.override( Materialized( - external_minio="toxiproxy", + # TODO: Retry with toxiproxy on azurite + external_blob_store=True, + blob_store_is_azure=args.azurite, external_metadata_store="toxiproxy", ports=["6975:6875", "6976:6876", "6977:6877"], sanity_restart=sanity_restart, @@ -124,6 +129,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: args.threads, args.naughty_identifiers, c, + args.azurite, sanity_restart, ) # Don't wait for potentially hanging threads that we are ignoring @@ -161,6 +167,16 @@ def toxiproxy_start(c: Composition) -> None: }, ) assert r.status_code == 201, r + r = requests.post( + f"http://localhost:{port}/proxies", + json={ + "name": "azurite", + "listen": "0.0.0.0:10000", + "upstream": "azurite:10000", + "enabled": True, + }, + ) + assert r.status_code == 201, r r = requests.post( f"http://localhost:{port}/proxies/cockroach/toxics", json={ @@ -179,3 +195,12 @@ def toxiproxy_start(c: Composition) -> None: }, ) assert r.status_code == 200, r + r = requests.post( + f"http://localhost:{port}/proxies/minio/toxics", + json={ + "name": "azurite", + "type": "latency", + "attributes": {"latency": 0, "jitter": 100}, + }, + ) + assert r.status_code == 200, r diff --git a/test/persist/mzcompose.py b/test/persist/mzcompose.py index c52fc3a697904..3cbcc51ca0131 100644 --- a/test/persist/mzcompose.py +++ b/test/persist/mzcompose.py @@ -19,9 +19,11 @@ WorkflowArgumentParser, ) from materialize.mzcompose.services.cockroach import Cockroach +from materialize.mzcompose.services.postgres import PostgresMetadata SERVICES = [ Cockroach(setup_materialize=True), + PostgresMetadata(), Service( "maelstrom-persist", {"mzbuild": "maelstrom-persist", "volumes": ["./maelstrom:/store"]}, @@ -48,7 +50,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "--consensus", type=str, - choices=["mem", "cockroach", "maelstrom"], + choices=["mem", "cockroach", "maelstrom", "postgres"], default="maelstrom", ) parser.add_argument( @@ -66,7 +68,12 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: consensus_uri = ( "postgres://root@cockroach:26257?options=--search_path=consensus" ) - c.up(c.metadata_store()) + c.up("cockroach") + elif args.consensus == "postgres": + consensus_uri = ( + "postgres://root@postgres-metadata:26257?options=--search_path=consensus" + ) + c.up("postgres-metadata") else: # empty consensus uri defaults to Maelstrom consensus implementation consensus_uri = "" diff --git a/test/platform-checks/mzcompose.py b/test/platform-checks/mzcompose.py index 330e3feaf378f..e1b3a26f61572 100644 --- a/test/platform-checks/mzcompose.py +++ b/test/platform-checks/mzcompose.py @@ -26,6 +26,7 @@ from materialize.checks.scenarios_upgrade import * # noqa: F401 F403 from materialize.checks.scenarios_zero_downtime import * # noqa: F401 F403 from materialize.mzcompose.composition import Composition, WorkflowArgumentParser +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.clusterd import Clusterd from materialize.mzcompose.services.cockroach import Cockroach from materialize.mzcompose.services.debezium import Debezium @@ -48,19 +49,37 @@ def create_mzs( - additional_system_parameter_defaults: dict[str, str] | None = None -) -> list[Materialized]: + azurite: bool, + additional_system_parameter_defaults: dict[str, str] | None = None, +) -> list[TestdriveService | Materialized]: return [ Materialized( name=mz_name, external_metadata_store=True, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=azurite, sanity_restart=False, volumes_extra=["secrets:/share/secrets"], metadata_store="cockroach", additional_system_parameter_defaults=additional_system_parameter_defaults, ) for mz_name in ["materialized", "mz_1", "mz_2", "mz_3", "mz_4", "mz_5"] + ] + [ + TestdriveService( + default_timeout=TESTDRIVE_DEFAULT_TIMEOUT, + materialize_params={"statement_timeout": f"'{TESTDRIVE_DEFAULT_TIMEOUT}'"}, + external_blob_store=True, + blob_store_is_azure=azurite, + no_reset=True, + seed=1, + entrypoint_extra=[ + "--var=replicas=1", + f"--var=default-replica-size={Materialized.Size.DEFAULT_SIZE}-{Materialized.Size.DEFAULT_SIZE}", + f"--var=default-storage-size={Materialized.Size.DEFAULT_SIZE}-1", + ], + volumes_extra=["secrets:/share/secrets"], + metadata_store="cockroach", + ) ] @@ -73,6 +92,7 @@ def create_mzs( restart="on-failure:5", ), Minio(setup_materialize=True, additional_directories=["copytos3"]), + Azurite(), Mc(), Postgres(), MySql(), @@ -117,20 +137,7 @@ def create_mzs( Clusterd( name="clusterd_compute_1" ), # Started by some Scenarios, defined here only for the teardown - *create_mzs(), - TestdriveService( - default_timeout=TESTDRIVE_DEFAULT_TIMEOUT, - materialize_params={"statement_timeout": f"'{TESTDRIVE_DEFAULT_TIMEOUT}'"}, - no_reset=True, - seed=1, - entrypoint_extra=[ - "--var=replicas=1", - f"--var=default-replica-size={Materialized.Size.DEFAULT_SIZE}-{Materialized.Size.DEFAULT_SIZE}", - f"--var=default-storage-size={Materialized.Size.DEFAULT_SIZE}-1", - ], - volumes_extra=["secrets:/share/secrets"], - metadata_store="cockroach", - ), + *create_mzs(azurite=False), Persistcli(), SshBastionHost(), ] @@ -157,7 +164,6 @@ def setup(c: Composition) -> None: "postgres", "mysql", "debezium", - "minio", "ssh-bastion-host", ) @@ -211,6 +217,9 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: nargs="*", help="System parameters to set in Materialize, i.e. what you would set with `ALTER SYSTEM SET`", ) + parser.add_argument( + "--azurite", action="store_true", help="Use Azurite as blob store instead of S3" + ) args = parser.parse_args() @@ -242,7 +251,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: assert len(x) == 2, f"--system-param '{val}' should be the format =" additional_system_parameter_defaults[x[0]] = x[1] - with c.override(*create_mzs(additional_system_parameter_defaults)): + with c.override(*create_mzs(args.azurite, additional_system_parameter_defaults)): executor = MzcomposeExecutor(composition=c) for scenario_class in scenarios: assert issubclass( @@ -263,7 +272,10 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: if execution_mode in [ExecutionMode.SEQUENTIAL, ExecutionMode.PARALLEL]: setup(c) scenario = scenario_class( - checks=checks, executor=executor, seed=args.seed + checks=checks, + executor=executor, + azurite=args.azurite, + seed=args.seed, ) scenario.run() teardown(c) @@ -277,7 +289,10 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: ) setup(c) scenario = scenario_class( - checks=[check], executor=executor, seed=args.seed + checks=[check], + executor=executor, + azurite=args.azurite, + seed=args.seed, ) scenario.run() teardown(c) diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1203.slt b/test/sqllogictest/advent-of-code/2023/aoc_1203.slt index 631a437f543af..d6132606b5acd 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1203.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1203.slt @@ -228,7 +228,7 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Reduce aggregates=[count(*)] Project () @@ -295,141 +295,143 @@ Explained Query: cte l8 = ArrangeBy keys=[[#0, #1]] Get l5 - cte l9 = - Distinct project=[#0..=#3] - Union - Distinct project=[#0..=#3] - Union - Project (#2, #0, #1, #7) - Map (1) - Join on=(#0 = (#3 + #5) AND #1 = (#4 + #6)) type=delta - ArrangeBy keys=[[]] - Get l5 - ArrangeBy keys=[[]] - Project (#0, #1) - Get l6 - Get l7 - Get l7 - Project (#7, #0, #1, #8) - Map ((#2 || #3), (#6 + 1)) - Join on=(#0 = #4 AND #1 = (#5 - 1)) type=differential - Get l8 - ArrangeBy keys=[[#1, (#2 - 1)]] - Get l9 - Project (#7, #0, #5, #8) - Map ((#3 || #2), (#6 + 1)) - Join on=(#0 = #4 AND #1 = (#5 + #6)) type=differential - Get l8 - ArrangeBy keys=[[#1, (#2 + #3)]] - Get l9 Return - With - cte l10 = - Distinct project=[#0, #1] - Project (#1, #2) - Get l9 - cte l11 = - ArrangeBy keys=[[#0, #1]] - Project (#0, #1) - Get l5 - cte l12 = - Project (#0..=#3) - Join on=(#1 = #4 AND #2 = #5) type=differential - ArrangeBy keys=[[#1, #2]] - Get l9 - ArrangeBy keys=[[#0, #1]] + With Mutually Recursive + cte l9 = + Distinct project=[#0..=#3] + Union + Distinct project=[#0..=#3] Union - Negate - Distinct project=[#0, #1] - Project (#0, #1) - Join on=(#0 = #2 AND #3 = (#1 - 1)) type=differential - ArrangeBy keys=[[#0, (#1 - 1)]] - Get l10 - Get l11 - Get l10 - cte l13 = - Distinct project=[#0..=#2] - Project (#1..=#3) - Get l12 - cte l14 = - Project (#1..=#3, #7) - Map (text_to_integer(#0)) - Join on=(#1 = #4 AND #2 = #5 AND #3 = #6) type=differential - ArrangeBy keys=[[#1..=#3]] - Get l12 - ArrangeBy keys=[[#0..=#2]] + Project (#2, #0, #1, #7) + Map (1) + Join on=(#0 = (#3 + #5) AND #1 = (#4 + #6)) type=delta + ArrangeBy keys=[[]] + Get l5 + ArrangeBy keys=[[]] + Project (#0, #1) + Get l6 + Get l7 + Get l7 + Project (#7, #0, #1, #8) + Map ((#2 || #3), (#6 + 1)) + Join on=(#0 = #4 AND #1 = (#5 - 1)) type=differential + Get l8 + ArrangeBy keys=[[#1, (#2 - 1)]] + Get l9 + Project (#7, #0, #5, #8) + Map ((#3 || #2), (#6 + 1)) + Join on=(#0 = #4 AND #1 = (#5 + #6)) type=differential + Get l8 + ArrangeBy keys=[[#1, (#2 + #3)]] + Get l9 + Return + With + cte l10 = + Distinct project=[#0, #1] + Project (#1, #2) + Get l9 + cte l11 = + ArrangeBy keys=[[#0, #1]] + Project (#0, #1) + Get l5 + cte l12 = + Project (#0..=#3) + Join on=(#1 = #4 AND #2 = #5) type=differential + ArrangeBy keys=[[#1, #2]] + Get l9 + ArrangeBy keys=[[#0, #1]] Union Negate - Distinct project=[#0..=#2] - Project (#0..=#2) - Join on=(#0 = #3 AND #4 = (#1 + #2)) type=differential - ArrangeBy keys=[[#0, (#1 + #2)]] - Get l13 - Get l11 - Get l13 - cte l15 = - Reduce aggregates=[sum(#0)] - Project (#3) - Get l14 - cte l16 = - ArrangeBy keys=[[]] - Constant - - (0) - - (-1) - - (1) - cte l17 = - Distinct project=[#0, #1, #4, #2, #3] - Project (#0, #1, #3, #4, #6) - Filter (#7 = (#1 + #2)) - FlatMap generate_series(#4, ((#4 + #5) - 1), 1) - Project (#0, #1, #3..=#7) - Join on=(#4 = (#0 + #2)) type=delta - ArrangeBy keys=[[]] + Distinct project=[#0, #1] Project (#0, #1) - Filter (#2 = "*") - Get l6 - Get l16 - Get l16 - ArrangeBy keys=[[#0]] - Get l14 - cte l18 = - ArrangeBy keys=[[#0, #1]] - Get l17 - cte l19 = - Reduce aggregates=[sum(#0)] - Project (#2) - Distinct project=[#0, #1, (#2 * #3)] - Project (#0, #1, #5, #10) - Filter (#2 = 2) AND ((#6 != #11) OR (#7 != #12)) - Join on=(#0 = #3 = #8 AND #1 = #4 = #9) type=delta - ArrangeBy keys=[[#0, #1]] - Reduce group_by=[#0, #1] aggregates=[count(*)] + Join on=(#0 = #2 AND #3 = (#1 - 1)) type=differential + ArrangeBy keys=[[#0, (#1 - 1)]] + Get l10 + Get l11 + Get l10 + cte l13 = + Distinct project=[#0..=#2] + Project (#1..=#3) + Get l12 + cte l14 = + Project (#1..=#3, #7) + Map (text_to_integer(#0)) + Join on=(#1 = #4 AND #2 = #5 AND #3 = #6) type=differential + ArrangeBy keys=[[#1..=#3]] + Get l12 + ArrangeBy keys=[[#0..=#2]] + Union + Negate + Distinct project=[#0..=#2] + Project (#0..=#2) + Join on=(#0 = #3 AND #4 = (#1 + #2)) type=differential + ArrangeBy keys=[[#0, (#1 + #2)]] + Get l13 + Get l11 + Get l13 + cte l15 = + Reduce aggregates=[sum(#0)] + Project (#3) + Get l14 + cte l16 = + ArrangeBy keys=[[]] + Constant + - (0) + - (-1) + - (1) + cte l17 = + Distinct project=[#0, #1, #4, #2, #3] + Project (#0, #1, #3, #4, #6) + Filter (#7 = (#1 + #2)) + FlatMap generate_series(#4, ((#4 + #5) - 1), 1) + Project (#0, #1, #3..=#7) + Join on=(#4 = (#0 + #2)) type=delta + ArrangeBy keys=[[]] Project (#0, #1) - Get l17 - Get l18 - Get l18 - Return - CrossJoin type=differential - ArrangeBy keys=[[]] - Union - Get l15 - Map (null) - Union - Negate - Project () - Get l15 - Constant - - () - ArrangeBy keys=[[]] - Union - Get l19 - Map (null) - Union - Negate - Project () - Get l19 - Constant - - () + Filter (#2 = "*") + Get l6 + Get l16 + Get l16 + ArrangeBy keys=[[#0]] + Get l14 + cte l18 = + ArrangeBy keys=[[#0, #1]] + Get l17 + cte l19 = + Reduce aggregates=[sum(#0)] + Project (#2) + Distinct project=[#0, #1, (#2 * #3)] + Project (#0, #1, #5, #10) + Filter (#2 = 2) AND ((#6 != #11) OR (#7 != #12)) + Join on=(#0 = #3 = #8 AND #1 = #4 = #9) type=delta + ArrangeBy keys=[[#0, #1]] + Reduce group_by=[#0, #1] aggregates=[count(*)] + Project (#0, #1) + Get l17 + Get l18 + Get l18 + Return + CrossJoin type=differential + ArrangeBy keys=[[]] + Union + Get l15 + Map (null) + Union + Negate + Project () + Get l15 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l19 + Map (null) + Union + Negate + Project () + Get l19 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1204.slt b/test/sqllogictest/advent-of-code/2023/aoc_1204.slt index a19ac7000bb2f..3b4a01fa265d1 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1204.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1204.slt @@ -280,7 +280,7 @@ FROM multipliers; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#3, #4) Map (regexp_match["Card +(\d+): (.*)", case_insensitive=false](#1), text_to_integer(array_index(#2, 1)), regexp_split_to_array[" \| ", case_insensitive=false](array_index(#2, 2))) @@ -330,44 +330,46 @@ Explained Query: Project (#2, #0) Map (0) Get l1 - cte l5 = - Project (#1, #3) - Join on=(#0 = #2) type=differential - ArrangeBy keys=[[#0]] - Filter (#0) IS NOT NULL - Get l4 - ArrangeBy keys=[[#0]] - Filter (#0) IS NOT NULL - Get l6 - cte l6 = - Project (#0, #2) - Map (bigint_to_integer(#1)) - Reduce group_by=[#0] aggregates=[sum(coalesce(#1, 1))] - Union - Map (null) - Union - Negate - Project (#0) - Get l5 - Project (#1) - Get l4 - Get l5 Return - With - cte l7 = - Reduce aggregates=[sum(#0)] - Project (#1) - Get l6 + With Mutually Recursive + cte l5 = + Project (#1, #3) + Join on=(#0 = #2) type=differential + ArrangeBy keys=[[#0]] + Filter (#0) IS NOT NULL + Get l4 + ArrangeBy keys=[[#0]] + Filter (#0) IS NOT NULL + Get l6 + cte l6 = + Project (#0, #2) + Map (bigint_to_integer(#1)) + Reduce group_by=[#0] aggregates=[sum(coalesce(#1, 1))] + Union + Map (null) + Union + Negate + Project (#0) + Get l5 + Project (#1) + Get l4 + Get l5 Return - Union - Get l7 - Map (null) - Union - Negate - Project () - Get l7 - Constant - - () + With + cte l7 = + Reduce aggregates=[sum(#0)] + Project (#1) + Get l6 + Return + Union + Get l7 + Map (null) + Union + Negate + Project () + Get l7 + Constant + - () Source materialize.public.aoc_1204 @@ -506,129 +508,105 @@ EXPLAIN WITH MUTUALLY RECURSIVE select * from part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#3..=#5) Map (regexp_split_to_array["(:|\|)", case_insensitive=false](#1), text_to_integer(array_index(regexp_match["[0-9]+", case_insensitive=false](btrim(array_index(#2, 1))), 1)), regexp_split_to_array[" ", case_insensitive=false](btrim(array_index(#2, 2))), regexp_split_to_array[" ", case_insensitive=false](btrim(array_index(#2, 3)))) FlatMap unnest_array(regexp_split_to_array["\n", case_insensitive=false](#0)) ReadStorage materialize.public.aoc_1204 cte l1 = - Distinct project=[#0..=#2] - Get l0 - cte l2 = Distinct project=[#0, #1] Project (#1, #2) - Get l1 - cte l3 = + Get l0 + cte l2 = Filter (#2 != "") FlatMap unnest_array(#0) - Get l2 - cte l4 = + Get l1 + cte l3 = Reduce group_by=[#0, #1] aggregates=[count(*)] Project (#0, #1) Distinct project=[#0..=#2] Union - Get l3 + Get l2 Negate Threshold Union - Get l3 + Get l2 Negate Filter (#2 != "") FlatMap unnest_array(#1) - Get l2 - cte l5 = - ArrangeBy keys=[[#0, #1]] - Get l2 - cte l6 = + Get l1 + cte l4 = Union - Get l4 - Project (#0, #1, #4) - Map (0) - Join on=(#0 = #2 AND #1 = #3) type=differential - ArrangeBy keys=[[#0, #1]] + Get l3 + Map (0) + Union + Negate + Project (#0, #1) + Get l3 + Get l1 + cte l5 = + Project (#0, #5) + Join on=(#1 = #3 AND #2 = #4) type=differential + ArrangeBy keys=[[#1, #2]] + Get l0 + ArrangeBy keys=[[#0, #1]] + Union + Get l4 + Map (null) Union Negate Project (#0, #1) Get l4 - Get l2 - Get l5 - cte l7 = - Union - Get l6 - Map (error("more than one record produced in subquery")) - Project (#0, #1) - Filter (#2 > 1) - Reduce group_by=[#0, #1] aggregates=[count(*)] - Project (#0, #1) - Get l6 - cte l8 = - Project (#0, #8) - Join on=(#0 = #3 AND #1 = #4 = #6 AND #2 = #5 = #7) type=delta - ArrangeBy keys=[[#0..=#2], [#1, #2]] - Get l0 - ArrangeBy keys=[[#0..=#2]] - Get l1 - ArrangeBy keys=[[#0, #1]] - Union - Get l7 - Project (#0, #1, #4) - Map (null) - Join on=(#0 = #2 AND #1 = #3) type=differential - ArrangeBy keys=[[#0, #1]] - Union - Negate - Distinct project=[#0, #1] - Project (#0, #1) - Get l7 - Get l2 - Get l5 - cte l9 = + Get l1 + cte l6 = Reduce aggregates=[sum(power(2, bigint_to_double((#0 - 1))))] Project (#1) Filter (#1 > 0) - Get l8 - cte l10 = - Union - Get l8 - Project (#2, #3) - Filter (integer_to_bigint(#2) = (integer_to_bigint(#0) + #4)) - FlatMap generate_series(1, #1, 1) - CrossJoin type=differential - ArrangeBy keys=[[]] - Get l10 - ArrangeBy keys=[[]] - Get l8 + Get l5 Return - With - cte l11 = - Reduce aggregates=[count(*)] - Project () - Get l10 + With Mutually Recursive + cte l7 = + Union + Get l5 + Project (#2, #3) + Filter (integer_to_bigint(#2) = (integer_to_bigint(#0) + #4)) + FlatMap generate_series(1, #1, 1) + CrossJoin type=differential + ArrangeBy keys=[[]] + Get l7 + ArrangeBy keys=[[]] + Get l5 Return - CrossJoin type=differential - ArrangeBy keys=[[]] - Project (#1) - Map (double_to_numeric(#0)) - Union - Get l9 - Map (null) - Union - Negate - Project () - Get l9 - Constant - - () - ArrangeBy keys=[[]] - Union - Get l11 - Map (0) - Union - Negate - Project () - Get l11 - Constant - - () + With + cte l8 = + Reduce aggregates=[count(*)] + Project () + Get l7 + Return + CrossJoin type=differential + ArrangeBy keys=[[]] + Project (#1) + Map (double_to_numeric(#0)) + Union + Get l6 + Map (null) + Union + Negate + Project () + Get l6 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l8 + Map (0) + Union + Negate + Project () + Get l8 + Constant + - () Source materialize.public.aoc_1204 diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1205.slt b/test/sqllogictest/advent-of-code/2023/aoc_1205.slt index 38bf58d38e374..053034f6eb5af 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1205.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1205.slt @@ -1145,7 +1145,7 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#2, #3) Map (split_string(#1, ":", 1), split_string(#1, ":", 2)) @@ -1162,209 +1162,211 @@ Explained Query: ArrangeBy keys=[[#0, #1]] Get l1 cte l3 = - Project (#0..=#2, #5, #6) - Filter (#2 >= #5) AND (#2 <= ((#5 + #7) - 1)) - Join on=(#0 = #3 AND #1 = #4) type=differential - ArrangeBy keys=[[#0, #1]] - Get l7 - Get l2 - cte l4 = Project (#1) Filter (#0 = "seeds") Get l0 - cte l5 = - Union - Project (#3, #2) - Map (text_to_bigint(#1), "seed") - FlatMap unnest_array(regexp_split_to_array[" ", case_insensitive=false](btrim(#0))) - Get l4 - Project (#0, #4) - Map (coalesce((#1 + (#3 - #2)), #1)) - Union - Project (#1..=#4) - Get l3 - Project (#1, #2, #6, #7) - Map (null, null) - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential - ArrangeBy keys=[[#0..=#2]] - Union - Negate - Distinct project=[#0..=#2] - Project (#0..=#2) - Get l3 - Get l7 - ArrangeBy keys=[[#0..=#2]] - Get l7 - cte l6 = + cte l4 = ArrangeBy keys=[[#0]] Distinct project=[#0, #1] Project (#0, #1) Get l1 - cte l7 = - Project (#0, #3, #1) - Join on=(#0 = #2) type=differential - ArrangeBy keys=[[#0]] - Distinct project=[#0, #1] - Get l5 - Get l6 - cte l8 = - Reduce aggregates=[min(#0)] - Project (#1) - Filter (#0 = "location") - Get l5 - cte l9 = - Distinct project=[#0..=#2] - Union - Project (#6, #4, #5) - Map (regexp_split_to_array[" ", case_insensitive=false](btrim(#0)), (2 * #1), text_to_bigint(array_index(#2, integer_to_bigint((#3 - 1)))), (#4 + text_to_bigint(array_index(#2, integer_to_bigint(#3)))), "seed") - FlatMap generate_series(1, ((regexp_split_to_array[" ", case_insensitive=false](btrim(#0)) array_length 1) / 2), 1) - Get l4 - Project (#1, #10, #11) - Map ((#8 - #4), (#6 + #9), (#7 + #9)) - Get l11 - Get l19 - cte l10 = - Project (#0..=#2, #4) - Join on=(#0 = #3) type=differential - ArrangeBy keys=[[#0]] - Get l9 - Get l6 - cte l11 = - Project (#0, #3, #1, #2, #6, #10, #9, #11, #7) - Filter (#9 < #11) - Map (greatest(#1, #6), (#6 + #8), least(#2, #10)) - Join on=(#0 = #4 AND #3 = #5) type=differential - ArrangeBy keys=[[#0, #3]] - Get l10 + Return + With Mutually Recursive + cte l5 = + Project (#0..=#2, #5, #6) + Filter (#2 >= #5) AND (#2 <= ((#5 + #7) - 1)) + Join on=(#0 = #3 AND #1 = #4) type=differential + ArrangeBy keys=[[#0, #1]] + Get l7 Get l2 - cte l12 = - Distinct project=[#0..=#8] - Get l11 - cte l13 = - Distinct project=[#0..=#4] - Project (#0..=#3, #7) - Get l12 - cte l14 = - Reduce group_by=[#0..=#4] aggregates=[min(#5)] - Project (#0..=#4, #9) - Filter (#9 >= #4) - Join on=(#0 = #5 AND #1 = #6 AND #2 = #7 AND #3 = #8) type=differential - ArrangeBy keys=[[#0..=#3]] - Filter (#2) IS NOT NULL AND (#3) IS NOT NULL - Get l13 + cte l6 = + Union + Project (#3, #2) + Map (text_to_bigint(#1), "seed") + FlatMap unnest_array(regexp_split_to_array[" ", case_insensitive=false](btrim(#0))) + Get l3 + Project (#0, #4) + Map (coalesce((#1 + (#3 - #2)), #1)) + Union + Project (#1..=#4) + Get l5 + Project (#1, #2, #6, #7) + Map (null, null) + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential + ArrangeBy keys=[[#0..=#2]] + Union + Negate + Distinct project=[#0..=#2] + Project (#0..=#2) + Get l5 + Get l7 + ArrangeBy keys=[[#0..=#2]] + Get l7 + cte l7 = + Project (#0, #3, #1) + Join on=(#0 = #2) type=differential + ArrangeBy keys=[[#0]] + Distinct project=[#0, #1] + Get l6 + Get l4 + cte l8 = + Reduce aggregates=[min(#0)] + Project (#1) + Filter (#0 = "location") + Get l6 + cte l9 = + Distinct project=[#0..=#2] + Union + Project (#6, #4, #5) + Map (regexp_split_to_array[" ", case_insensitive=false](btrim(#0)), (2 * #1), text_to_bigint(array_index(#2, integer_to_bigint((#3 - 1)))), (#4 + text_to_bigint(array_index(#2, integer_to_bigint(#3)))), "seed") + FlatMap generate_series(1, ((regexp_split_to_array[" ", case_insensitive=false](btrim(#0)) array_length 1) / 2), 1) + Get l3 + Project (#1, #10, #11) + Map ((#8 - #4), (#6 + #9), (#7 + #9)) + Get l11 + Get l19 + cte l10 = + Project (#0..=#2, #4) + Join on=(#0 = #3) type=differential + ArrangeBy keys=[[#0]] + Get l9 + Get l4 + cte l11 = + Project (#0, #3, #1, #2, #6, #10, #9, #11, #7) + Filter (#9 < #11) + Map (greatest(#1, #6), (#6 + #8), least(#2, #10)) + Join on=(#0 = #4 AND #3 = #5) type=differential + ArrangeBy keys=[[#0, #3]] + Get l10 + Get l2 + cte l12 = + Distinct project=[#0..=#8] + Get l11 + cte l13 = + Distinct project=[#0..=#4] + Project (#0..=#3, #7) + Get l12 + cte l14 = + Reduce group_by=[#0..=#4] aggregates=[min(#5)] + Project (#0..=#4, #9) + Filter (#9 >= #4) + Join on=(#0 = #5 AND #1 = #6 AND #2 = #7 AND #3 = #8) type=differential + ArrangeBy keys=[[#0..=#3]] + Filter (#2) IS NOT NULL AND (#3) IS NOT NULL + Get l13 + ArrangeBy keys=[[#0..=#3]] + Project (#0..=#3, #6) + Filter (#2) IS NOT NULL AND (#6 < #3) + Get l11 + cte l15 = + Project (#0..=#4, #6) + Map (coalesce(#5, #3)) + Union + Get l14 + Project (#0..=#4, #10) + Map (null) + Join on=(#0 = #5 AND #1 = #6 AND #2 = #7 AND #3 = #8 AND #4 = #9) type=differential + ArrangeBy keys=[[#0..=#4]] + Union + Negate + Project (#0..=#4) + Get l14 + Get l13 + ArrangeBy keys=[[#0..=#4]] + Get l13 + cte l16 = + Reduce group_by=[#0, #3, #1, #2] aggregates=[min(#4)] + Project (#0..=#3, #8) + Join on=(#0 = #4 AND #1 = #6 AND #2 = #7 AND #3 = #5) type=differential ArrangeBy keys=[[#0..=#3]] + Filter (#1) IS NOT NULL AND (#2) IS NOT NULL + Get l10 + ArrangeBy keys=[[#0, #2, #3, #1]] Project (#0..=#3, #6) - Filter (#2) IS NOT NULL AND (#6 < #3) + Filter (#6 < #3) AND (#6 >= #2) Get l11 - cte l15 = - Project (#0..=#4, #6) - Map (coalesce(#5, #3)) + cte l17 = + ArrangeBy keys=[[#0..=#3]] + Get l10 + cte l18 = + Project (#0..=#3, #5) + Map (coalesce(#4, #3)) + Union + Get l16 + Project (#0..=#3, #8) + Map (null) + Join on=(#0 = #4 AND #1 = #7 AND #2 = #5 AND #3 = #6) type=differential + ArrangeBy keys=[[#0, #2, #3, #1]] + Union + Negate + Project (#0..=#3) + Get l16 + Project (#0, #3, #1, #2) + Get l10 + Get l17 + cte l19 = + Distinct project=[#0..=#2] Union - Get l14 - Project (#0..=#4, #10) - Map (null) - Join on=(#0 = #5 AND #1 = #6 AND #2 = #7 AND #3 = #8 AND #4 = #9) type=differential - ArrangeBy keys=[[#0..=#4]] - Union - Negate - Project (#0..=#4) - Get l14 - Get l13 - ArrangeBy keys=[[#0..=#4]] - Get l13 - cte l16 = - Reduce group_by=[#0, #3, #1, #2] aggregates=[min(#4)] - Project (#0..=#3, #8) - Join on=(#0 = #4 AND #1 = #6 AND #2 = #7 AND #3 = #5) type=differential - ArrangeBy keys=[[#0..=#3]] - Filter (#1) IS NOT NULL AND (#2) IS NOT NULL - Get l10 - ArrangeBy keys=[[#0, #2, #3, #1]] - Project (#0..=#3, #6) - Filter (#6 < #3) AND (#6 >= #2) + Project (#1, #7, #23) + Join on=(#0 = #9 = #18 AND #1 = #10 = #19 AND #2 = #11 = #20 AND #3 = #12 = #21 AND #4 = #13 AND #5 = #14 AND #6 = #15 AND #7 = #16 = #22 AND #8 = #17) type=delta + ArrangeBy keys=[[#0..=#8], [#0..=#3, #7]] Get l11 - cte l17 = - ArrangeBy keys=[[#0..=#3]] - Get l10 - cte l18 = - Project (#0..=#3, #5) - Map (coalesce(#4, #3)) - Union - Get l16 - Project (#0..=#3, #8) - Map (null) - Join on=(#0 = #4 AND #1 = #7 AND #2 = #5 AND #3 = #6) type=differential - ArrangeBy keys=[[#0, #2, #3, #1]] - Union - Negate - Project (#0..=#3) - Get l16 - Project (#0, #3, #1, #2) - Get l10 - Get l17 - cte l19 = - Distinct project=[#0..=#2] - Union - Project (#1, #7, #23) - Join on=(#0 = #9 = #18 AND #1 = #10 = #19 AND #2 = #11 = #20 AND #3 = #12 = #21 AND #4 = #13 AND #5 = #14 AND #6 = #15 AND #7 = #16 = #22 AND #8 = #17) type=delta - ArrangeBy keys=[[#0..=#8], [#0..=#3, #7]] - Get l11 - ArrangeBy keys=[[#0..=#8]] - Get l12 - ArrangeBy keys=[[#0..=#4]] - Union - Filter (#4 < #5) - Get l15 - Map (error("more than one record produced in subquery")) - Project (#0..=#4) - Filter error("more than one record produced in subquery") AND (#5 > 1) - Reduce group_by=[#0..=#4] aggregates=[count(*)] - Project (#0..=#4) - Get l15 - Distinct project=[#1, #0, #2] - Project (#1, #3, #12) - Join on=(#0 = #4 = #8 AND #1 = #5 = #10 AND #2 = #6 = #11 AND #3 = #7 = #9) type=delta - Get l17 - Get l17 - ArrangeBy keys=[[#0..=#3]] + ArrangeBy keys=[[#0..=#8]] + Get l12 + ArrangeBy keys=[[#0..=#4]] Union - Filter (#2 < #4) - Get l18 + Filter (#4 < #5) + Get l15 Map (error("more than one record produced in subquery")) - Project (#0..=#3) - Filter error("more than one record produced in subquery") AND (#4 > 1) - Reduce group_by=[#0..=#3] aggregates=[count(*)] - Project (#0..=#3) - Get l18 - Return - With - cte l20 = - Reduce aggregates=[min(#0)] - Project (#1) - Filter (#0 = "location") - Get l9 + Project (#0..=#4) + Filter error("more than one record produced in subquery") AND (#5 > 1) + Reduce group_by=[#0..=#4] aggregates=[count(*)] + Project (#0..=#4) + Get l15 + Distinct project=[#1, #0, #2] + Project (#1, #3, #12) + Join on=(#0 = #4 = #8 AND #1 = #5 = #10 AND #2 = #6 = #11 AND #3 = #7 = #9) type=delta + Get l17 + Get l17 + ArrangeBy keys=[[#0..=#3]] + Union + Filter (#2 < #4) + Get l18 + Map (error("more than one record produced in subquery")) + Project (#0..=#3) + Filter error("more than one record produced in subquery") AND (#4 > 1) + Reduce group_by=[#0..=#3] aggregates=[count(*)] + Project (#0..=#3) + Get l18 Return - CrossJoin type=differential - ArrangeBy keys=[[]] - Union - Get l8 - Map (null) - Union - Negate - Project () - Get l8 - Constant - - () - ArrangeBy keys=[[]] - Union - Get l20 - Map (null) - Union - Negate - Project () - Get l20 - Constant - - () + With + cte l20 = + Reduce aggregates=[min(#0)] + Project (#1) + Filter (#0 = "location") + Get l9 + Return + CrossJoin type=differential + ArrangeBy keys=[[]] + Union + Get l8 + Map (null) + Union + Negate + Project () + Get l8 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l20 + Map (null) + Union + Negate + Project () + Get l20 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1207.slt b/test/sqllogictest/advent-of-code/2023/aoc_1207.slt index 146fce9917b72..7d3a0f914cc71 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1207.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1207.slt @@ -333,8 +333,9 @@ Explained Query: Get l7 Get l5 cte l9 = - Filter ((#2) IS NULL OR (#2 = false)) - Get l8 + Project (#0, #1) + Filter ((#2) IS NULL OR (#2 = false)) + Get l8 cte l10 = Distinct project=[#0] Project (#0) @@ -361,8 +362,7 @@ Explained Query: Project (#0, #1, #3) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] - Project (#0, #1) - Get l9 + Get l9 ArrangeBy keys=[[#0]] Union Get l12 @@ -373,8 +373,9 @@ Explained Query: Get l12 Get l10 cte l14 = - Filter ((#2) IS NULL OR (#2 = false)) - Get l13 + Project (#0, #1) + Filter ((#2) IS NULL OR (#2 = false)) + Get l13 cte l15 = Distinct project=[#0] Project (#0) @@ -403,8 +404,7 @@ Explained Query: Project (#0, #1, #3) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] - Project (#0, #1) - Get l14 + Get l14 ArrangeBy keys=[[#0]] Union Get l18 @@ -453,9 +453,10 @@ Explained Query: Get l23 Get l20 cte l25 = - Filter ((#4) IS NULL OR (#4 = false)) - Map ((#2 AND #3)) - Get l24 + Project (#0, #1) + Filter ((#4) IS NULL OR (#4 = false)) + Map ((#2 AND #3)) + Get l24 cte l26 = Distinct project=[#0] Project (#0) @@ -479,8 +480,7 @@ Explained Query: Project (#0, #1, #3) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] - Project (#0, #1) - Get l25 + Get l25 ArrangeBy keys=[[#0]] Union Get l28 @@ -491,8 +491,9 @@ Explained Query: Get l28 Get l26 cte l30 = - Filter ((#2) IS NULL OR (#2 = false)) - Get l29 + Project (#0, #1) + Filter ((#2) IS NULL OR (#2 = false)) + Get l29 cte l31 = Distinct project=[#0] Project (#0) @@ -519,8 +520,7 @@ Explained Query: Project (#0, #1, #3) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] - Project (#0, #1) - Get l30 + Get l30 ArrangeBy keys=[[#0]] Union Get l33 @@ -531,8 +531,9 @@ Explained Query: Get l33 Get l31 cte l35 = - Filter ((#2) IS NULL OR (#2 = false)) - Get l34 + Project (#0, #1) + Filter ((#2) IS NULL OR (#2 = false)) + Get l34 cte l36 = Distinct project=[#0] Project (#0) @@ -580,8 +581,7 @@ Explained Query: Map (case when #3 then 6 else 7 end) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] - Project (#0, #1) - Get l35 + Get l35 ArrangeBy keys=[[#0]] Union Get l38 @@ -691,8 +691,9 @@ Explained Query: Get l48 Get l46 cte l50 = - Filter ((#2) IS NULL OR (#2 = false)) - Get l49 + Project (#0, #1) + Filter ((#2) IS NULL OR (#2 = false)) + Get l49 cte l51 = Distinct project=[#0] Project (#1) @@ -719,8 +720,7 @@ Explained Query: Project (#0, #1, #3) Join on=(#1 = #2) type=differential ArrangeBy keys=[[#1]] - Project (#0, #1) - Get l50 + Get l50 ArrangeBy keys=[[#0]] Union Get l53 @@ -731,8 +731,9 @@ Explained Query: Get l53 Get l51 cte l55 = - Filter ((#2) IS NULL OR (#2 = false)) - Get l54 + Project (#0, #1) + Filter ((#2) IS NULL OR (#2 = false)) + Get l54 cte l56 = Distinct project=[#0] Project (#1) @@ -761,8 +762,7 @@ Explained Query: Project (#0, #1, #3) Join on=(#1 = #2) type=differential ArrangeBy keys=[[#1]] - Project (#0, #1) - Get l55 + Get l55 ArrangeBy keys=[[#0]] Union Get l59 @@ -811,9 +811,10 @@ Explained Query: Get l64 Get l61 cte l66 = - Filter ((#4) IS NULL OR (#4 = false)) - Map ((#2 AND #3)) - Get l65 + Project (#0, #1) + Filter ((#4) IS NULL OR (#4 = false)) + Map ((#2 AND #3)) + Get l65 cte l67 = Distinct project=[#0] Project (#1) @@ -837,8 +838,7 @@ Explained Query: Project (#0, #1, #3) Join on=(#1 = #2) type=differential ArrangeBy keys=[[#1]] - Project (#0, #1) - Get l66 + Get l66 ArrangeBy keys=[[#0]] Union Get l69 @@ -849,8 +849,9 @@ Explained Query: Get l69 Get l67 cte l71 = - Filter ((#2) IS NULL OR (#2 = false)) - Get l70 + Project (#0, #1) + Filter ((#2) IS NULL OR (#2 = false)) + Get l70 cte l72 = Distinct project=[#0] Project (#1) @@ -877,8 +878,7 @@ Explained Query: Project (#0, #1, #3) Join on=(#1 = #2) type=differential ArrangeBy keys=[[#1]] - Project (#0, #1) - Get l71 + Get l71 ArrangeBy keys=[[#0]] Union Get l74 @@ -889,8 +889,9 @@ Explained Query: Get l74 Get l72 cte l76 = - Filter ((#2) IS NULL OR (#2 = false)) - Get l75 + Project (#0, #1) + Filter ((#2) IS NULL OR (#2 = false)) + Get l75 cte l77 = Distinct project=[#0] Project (#1) @@ -944,8 +945,7 @@ Explained Query: Map (case when #3 then 6 else 7 end) Join on=(#1 = #2) type=differential ArrangeBy keys=[[#1]] - Project (#0, #1) - Get l76 + Get l76 ArrangeBy keys=[[#0]] Union Get l79 diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1210.slt b/test/sqllogictest/advent-of-code/2023/aoc_1210.slt index ba4ae969918ad..d14f6b56354f9 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1210.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1210.slt @@ -370,7 +370,7 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#0, #2, #3) Map (substr(#1, #2, 1)) @@ -380,27 +380,29 @@ Explained Query: FlatMap generate_series(1, (regexp_split_to_array["\n", case_insensitive=false](#0) array_length 1), 1) ReadStorage materialize.public.input cte l1 = - Map ((#2 = "-"), case when #3 then #0 else case when (#2 = "|") then (#0 - 1) else case when (#2 = "F") then (#0 + 1) else case when (#2 = "L") then (#0 - 1) else case when (#2 = "J") then #0 else case when (#2 = "7") then #0 else null end end end end end end, case when #3 then (#1 - 1) else case when (#2 = "|") then #1 else case when (#2 = "F") then #1 else case when (#2 = "L") then #1 else case when (#2 = "J") then (#1 - 1) else case when (#2 = "7") then (#1 - 1) else null end end end end end end) - Get l0 + Project (#0..=#2, #4, #5) + Map ((#2 = "-"), case when #3 then #0 else case when (#2 = "|") then (#0 - 1) else case when (#2 = "F") then (#0 + 1) else case when (#2 = "L") then (#0 - 1) else case when (#2 = "J") then #0 else case when (#2 = "7") then #0 else null end end end end end end, case when #3 then (#1 - 1) else case when (#2 = "|") then #1 else case when (#2 = "F") then #1 else case when (#2 = "L") then #1 else case when (#2 = "J") then (#1 - 1) else case when (#2 = "7") then (#1 - 1) else null end end end end end end) + Get l0 cte l2 = - Map ((#2 = "-"), case when #3 then #0 else case when (#2 = "|") then (#0 + 1) else case when (#2 = "F") then #0 else case when (#2 = "L") then #0 else case when (#2 = "J") then (#0 - 1) else case when (#2 = "7") then (#0 + 1) else null end end end end end end, case when #3 then (#1 + 1) else case when (#2 = "|") then #1 else case when (#2 = "F") then (#1 + 1) else case when (#2 = "L") then (#1 + 1) else case when (#2 = "J") then #1 else case when (#2 = "7") then #1 else null end end end end end end) - Get l0 + Project (#0..=#2, #4, #5) + Map ((#2 = "-"), case when #3 then #0 else case when (#2 = "|") then (#0 + 1) else case when (#2 = "F") then #0 else case when (#2 = "L") then #0 else case when (#2 = "J") then (#0 - 1) else case when (#2 = "7") then (#0 + 1) else null end end end end end end, case when #3 then (#1 + 1) else case when (#2 = "|") then #1 else case when (#2 = "F") then (#1 + 1) else case when (#2 = "L") then (#1 + 1) else case when (#2 = "J") then #1 else case when (#2 = "7") then #1 else null end end end end end end) + Get l0 cte l3 = Project (#0..=#3) Filter (#4 = 2) Reduce group_by=[#0..=#3] aggregates=[count(*)] Union - Project (#0, #1, #4, #5) + Project (#0, #1, #3, #4) Filter (#2 != ".") AND (#2 != "S") Get l1 - Project (#0, #1, #4, #5) + Project (#0, #1, #3, #4) Filter (#2 != ".") AND (#2 != "S") Get l2 - Project (#4, #5, #0, #1) - Filter (#2 != ".") AND (#2 != "S") AND (#4) IS NOT NULL AND (#5) IS NOT NULL + Project (#3, #4, #0, #1) + Filter (#2 != ".") AND (#2 != "S") AND (#3) IS NOT NULL AND (#4) IS NOT NULL Get l1 - Project (#4, #5, #0, #1) - Filter (#2 != ".") AND (#2 != "S") AND (#4) IS NOT NULL AND (#5) IS NOT NULL + Project (#3, #4, #0, #1) + Filter (#2 != ".") AND (#2 != "S") AND (#3) IS NOT NULL AND (#4) IS NOT NULL Get l2 Project (#0, #1, #3, #1) Filter (#2 = "S") @@ -425,173 +427,175 @@ Explained Query: cte l5 = ArrangeBy keys=[[#0, #1]] Get l3 - cte l6 = - Distinct project=[#0, #1] - Union - Get l4 - Project (#4, #5) - Join on=(#0 = #2 AND #1 = #3) type=differential - ArrangeBy keys=[[#0, #1]] - Filter (#0) IS NOT NULL AND (#1) IS NOT NULL - Get l6 - Get l5 - cte l7 = - Reduce aggregates=[count(*)] - Project () - Get l6 - cte l8 = - Distinct project=[#0..=#2] - Union - Project (#0, #1, #4) - Join on=(#0 = #2 AND #1 = #3) type=differential - ArrangeBy keys=[[#0, #1]] - Filter (#0) IS NOT NULL AND (#1) IS NOT NULL - Get l6 - ArrangeBy keys=[[#0, #1]] - Filter (#2 != "S") - Get l0 - Project (#0, #1, #8) - Map (case when ((#0 = #0) AND (#0 = (#6 + 1)) AND (#1 = #7) AND (#1 = (#1 + 1))) then "J" else case when ((#0 = #0) AND (#0 = #6) AND (#1 = (#1 + 1)) AND (#1 = (#7 - 1))) then "-" else case when ((#0 = #0) AND (#0 = (#6 - 1)) AND (#1 = #7) AND (#1 = (#1 + 1))) then "7" else case when ((#0 = #6) AND (#0 = (#0 + 1)) AND (#1 = #1) AND (#1 = (#7 - 1))) then "L" else case when ((#0 = (#0 + 1)) AND (#0 = (#6 - 1)) AND (#1 = #1) AND (#1 = #7)) then "|" else case when ((#0 = #0) AND (#0 = #6) AND (#1 = (#1 - 1)) AND (#1 = (#7 - 1))) then "F" else "???" end end end end end end) - Join on=(#0 = #2 = #4 AND #1 = #3 = #5) type=delta - ArrangeBy keys=[[#0, #1]] - Get l4 + Return + With Mutually Recursive + cte l6 = + Distinct project=[#0, #1] + Union + Get l4 + Project (#4, #5) + Join on=(#0 = #2 AND #1 = #3) type=differential ArrangeBy keys=[[#0, #1]] - Project (#0, #1) - Get l3 + Filter (#0) IS NOT NULL AND (#1) IS NOT NULL + Get l6 Get l5 - cte l9 = - ArrangeBy keys=[[#0, #1]] - Get l17 - cte l10 = - Project (#0..=#2, #5) - Join on=(#0 = #3 AND #1 = #4) type=differential - Get l9 - ArrangeBy keys=[[#0, #1]] - Get l8 - cte l11 = - Project (#2, #3, #6, #7) - Map ((#0 + 1), (#1 + 1)) - Join on=(#0 = #4 AND #1 = #5) type=differential - ArrangeBy keys=[[#0, #1]] - Union - Map (null) - Union - Negate - Project (#0..=#2) - Join on=(#0 = #3 AND #1 = #4) type=differential - Get l9 - ArrangeBy keys=[[#0, #1]] - Distinct project=[#0, #1] - Project (#0, #1) - Get l10 - Get l17 - Get l10 - ArrangeBy keys=[[#0, #1]] - Project (#0, #1) - Get l0 - cte l12 = - Distinct project=[#0] - Project (#1) - Get l11 - cte l13 = - Reduce group_by=[#0] aggregates=[any((#0 = #1))] - FlatMap wrap1("J", "-", "|", "F") - Get l12 - cte l14 = - ArrangeBy keys=[[#0]] - Get l12 - cte l15 = - Union - Get l13 - Project (#0, #2) - Map (false) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Union - Negate - Project (#0) - Get l13 - Get l12 - Get l14 - cte l16 = - Union - Get l15 - Map (error("more than one record produced in subquery")) - Project (#0) - Filter (#1 > 1) - Reduce group_by=[#0] aggregates=[count(*)] - Project (#0) - Get l15 - cte l17 = - Distinct project=[#0..=#2] - Union - Project (#0, #1, #3) - Filter ((#0 = 1) OR (#1 = 1)) - Map (false) - Get l0 - Project (#2, #3, #6) - Map (case when #5 then NOT(#0) else #0 end) - Join on=(#1 = #4) type=differential - ArrangeBy keys=[[#1]] - Get l11 - ArrangeBy keys=[[#0]] - Union - Get l16 - Project (#0, #2) - Map (null) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Union - Negate - Distinct project=[#0] - Project (#0) - Get l16 - Get l12 - Get l14 - Return - With - cte l18 = - Filter (#2 = true) - Get l17 - cte l19 = + cte l7 = Reduce aggregates=[count(*)] + Project () + Get l6 + cte l8 = + Distinct project=[#0..=#2] Union - Negate - Project () - Join on=(#0 = #2 AND #1 = #3) type=differential + Project (#0, #1, #4) + Join on=(#0 = #2 AND #1 = #3) type=differential + ArrangeBy keys=[[#0, #1]] + Filter (#0) IS NOT NULL AND (#1) IS NOT NULL + Get l6 + ArrangeBy keys=[[#0, #1]] + Filter (#2 != "S") + Get l0 + Project (#0, #1, #8) + Map (case when ((#0 = #0) AND (#0 = (#6 + 1)) AND (#1 = #7) AND (#1 = (#1 + 1))) then "J" else case when ((#0 = #0) AND (#0 = #6) AND (#1 = (#1 + 1)) AND (#1 = (#7 - 1))) then "-" else case when ((#0 = #0) AND (#0 = (#6 - 1)) AND (#1 = #7) AND (#1 = (#1 + 1))) then "7" else case when ((#0 = #6) AND (#0 = (#0 + 1)) AND (#1 = #1) AND (#1 = (#7 - 1))) then "L" else case when ((#0 = (#0 + 1)) AND (#0 = (#6 - 1)) AND (#1 = #1) AND (#1 = #7)) then "|" else case when ((#0 = #0) AND (#0 = #6) AND (#1 = (#1 - 1)) AND (#1 = (#7 - 1))) then "F" else "???" end end end end end end) + Join on=(#0 = #2 = #4 AND #1 = #3 = #5) type=delta ArrangeBy keys=[[#0, #1]] - Project (#0, #1) - Get l18 + Get l4 ArrangeBy keys=[[#0, #1]] - Distinct project=[#0, #1] - Project (#0, #1) - Get l8 - Project () - Get l18 - Return - CrossJoin type=differential - ArrangeBy keys=[[]] + Project (#0, #1) + Get l3 + Get l5 + cte l9 = + ArrangeBy keys=[[#0, #1]] + Get l17 + cte l10 = + Project (#0..=#2, #5) + Join on=(#0 = #3 AND #1 = #4) type=differential + Get l9 + ArrangeBy keys=[[#0, #1]] + Get l8 + cte l11 = + Project (#2, #3, #6, #7) + Map ((#0 + 1), (#1 + 1)) + Join on=(#0 = #4 AND #1 = #5) type=differential + ArrangeBy keys=[[#0, #1]] + Union + Map (null) + Union + Negate + Project (#0..=#2) + Join on=(#0 = #3 AND #1 = #4) type=differential + Get l9 + ArrangeBy keys=[[#0, #1]] + Distinct project=[#0, #1] + Project (#0, #1) + Get l10 + Get l17 + Get l10 + ArrangeBy keys=[[#0, #1]] + Project (#0, #1) + Get l0 + cte l12 = + Distinct project=[#0] Project (#1) - Map ((#0 / 2)) - Union - Get l7 - Map (0) + Get l11 + cte l13 = + Reduce group_by=[#0] aggregates=[any((#0 = #1))] + FlatMap wrap1("J", "-", "|", "F") + Get l12 + cte l14 = + ArrangeBy keys=[[#0]] + Get l12 + cte l15 = + Union + Get l13 + Project (#0, #2) + Map (false) + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] Union Negate - Project () - Get l7 - Constant - - () - ArrangeBy keys=[[]] + Project (#0) + Get l13 + Get l12 + Get l14 + cte l16 = + Union + Get l15 + Map (error("more than one record produced in subquery")) + Project (#0) + Filter (#1 > 1) + Reduce group_by=[#0] aggregates=[count(*)] + Project (#0) + Get l15 + cte l17 = + Distinct project=[#0..=#2] Union - Get l19 - Map (0) - Union - Negate - Project () - Get l19 - Constant - - () + Project (#0, #1, #3) + Filter ((#0 = 1) OR (#1 = 1)) + Map (false) + Get l0 + Project (#2, #3, #6) + Map (case when #5 then NOT(#0) else #0 end) + Join on=(#1 = #4) type=differential + ArrangeBy keys=[[#1]] + Get l11 + ArrangeBy keys=[[#0]] + Union + Get l16 + Project (#0, #2) + Map (null) + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] + Union + Negate + Distinct project=[#0] + Project (#0) + Get l16 + Get l12 + Get l14 + Return + With + cte l18 = + Project (#0, #1) + Filter (#2 = true) + Get l17 + cte l19 = + Reduce aggregates=[count(*)] + Union + Negate + Project () + Join on=(#0 = #2 AND #1 = #3) type=differential + ArrangeBy keys=[[#0, #1]] + Get l18 + ArrangeBy keys=[[#0, #1]] + Distinct project=[#0, #1] + Project (#0, #1) + Get l8 + Project () + Get l18 + Return + CrossJoin type=differential + ArrangeBy keys=[[]] + Project (#1) + Map ((#0 / 2)) + Union + Get l7 + Map (0) + Union + Negate + Project () + Get l7 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l19 + Map (0) + Union + Negate + Project () + Get l19 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1212.slt b/test/sqllogictest/advent-of-code/2023/aoc_1212.slt index 9d7f1dd11436a..514f5ab7ee6c4 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1212.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1212.slt @@ -91,7 +91,7 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT SUM(count) FROM counts; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#1, #3, #4) Map (regexp_split_to_array[" ", case_insensitive=false](array_index(regexp_split_to_array["\n", case_insensitive=false](#0), integer_to_bigint(#1))), (array_index(#2, 1) || "."), array_index(#2, 2)) @@ -108,70 +108,72 @@ Explained Query: Project (#0, #1) Filter (#2 != "#") Get l1 - cte l3 = - Project (#0..=#2, #5) - Join on=(#0 = #3 = #6 AND #4 = (#2 + 1) AND #7 = ((#1 + #5) + 1)) type=delta - ArrangeBy keys=[[#0], [#0, (#2 + 1)]] - Get l5 - ArrangeBy keys=[[#0, #1]] - Project (#0, #2, #3) - Map (text_to_integer(array_index(regexp_split_to_array[",", case_insensitive=false](#1), integer_to_bigint(#2)))) - FlatMap generate_series(1, (regexp_split_to_array[",", case_insensitive=false](#1) array_length 1), 1) - Project (#0, #2) - Get l0 - Get l2 - cte l4 = - Distinct project=[#0..=#2] - Project (#0, #1, #3) - Get l3 - cte l5 = - Union - Project (#0, #3, #4) - Map (0, 0) - Get l0 - Project (#0, #5, #2) - Map ((#1 + 1)) - Join on=(#0 = #3 AND #4 = (#1 + 1)) type=differential - ArrangeBy keys=[[#0, (#1 + 1)]] - Get l5 - Get l2 - Project (#0, #7, #8) - Map (((#1 + #3) + 1), (#2 + 1)) - Join on=(#0 = #4 AND #1 = #5 AND #3 = #6) type=differential - ArrangeBy keys=[[#0, #1, #3]] - Get l3 - ArrangeBy keys=[[#0..=#2]] - Union - Negate - Distinct project=[#0..=#2] - Project (#0..=#2) - Filter (#4 >= (#1 + 1)) AND (#4 <= (#1 + #2)) - Join on=(#0 = #3) type=differential - ArrangeBy keys=[[#0]] - Get l4 - ArrangeBy keys=[[#0]] - Project (#0, #1) - Filter (#2 = ".") - Get l1 - Get l4 Return - With - cte l6 = - Reduce aggregates=[sum(#0)] - Project (#3) - TopK group_by=[#0] order_by=[#1 desc nulls_first, #2 desc nulls_first] limit=1 - Reduce group_by=[#0..=#2] aggregates=[count(*)] - Get l5 + With Mutually Recursive + cte l3 = + Project (#0..=#2, #5) + Join on=(#0 = #3 = #6 AND #4 = (#2 + 1) AND #7 = ((#1 + #5) + 1)) type=delta + ArrangeBy keys=[[#0], [#0, (#2 + 1)]] + Get l5 + ArrangeBy keys=[[#0, #1]] + Project (#0, #2, #3) + Map (text_to_integer(array_index(regexp_split_to_array[",", case_insensitive=false](#1), integer_to_bigint(#2)))) + FlatMap generate_series(1, (regexp_split_to_array[",", case_insensitive=false](#1) array_length 1), 1) + Project (#0, #2) + Get l0 + Get l2 + cte l4 = + Distinct project=[#0..=#2] + Project (#0, #1, #3) + Get l3 + cte l5 = + Union + Project (#0, #3, #4) + Map (0, 0) + Get l0 + Project (#0, #5, #2) + Map ((#1 + 1)) + Join on=(#0 = #3 AND #4 = (#1 + 1)) type=differential + ArrangeBy keys=[[#0, (#1 + 1)]] + Get l5 + Get l2 + Project (#0, #7, #8) + Map (((#1 + #3) + 1), (#2 + 1)) + Join on=(#0 = #4 AND #1 = #5 AND #3 = #6) type=differential + ArrangeBy keys=[[#0, #1, #3]] + Get l3 + ArrangeBy keys=[[#0..=#2]] + Union + Negate + Distinct project=[#0..=#2] + Project (#0..=#2) + Filter (#4 >= (#1 + 1)) AND (#4 <= (#1 + #2)) + Join on=(#0 = #3) type=differential + ArrangeBy keys=[[#0]] + Get l4 + ArrangeBy keys=[[#0]] + Project (#0, #1) + Filter (#2 = ".") + Get l1 + Get l4 Return - Union - Get l6 - Map (null) - Union - Negate - Project () - Get l6 - Constant - - () + With + cte l6 = + Reduce aggregates=[sum(#0)] + Project (#3) + TopK group_by=[#0] order_by=[#1 desc nulls_first, #2 desc nulls_first] limit=1 + Reduce group_by=[#0..=#2] aggregates=[count(*)] + Get l5 + Return + Union + Get l6 + Map (null) + Union + Negate + Project () + Get l6 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1214.slt b/test/sqllogictest/advent-of-code/2023/aoc_1214.slt index c950227af9287..c162763a6ebc0 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1214.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1214.slt @@ -127,95 +127,97 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM part1; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#1, #2) Map (array_index(regexp_split_to_array["\n", case_insensitive=false](#0), integer_to_bigint(#1))) FlatMap generate_series(1, (regexp_split_to_array["\n", case_insensitive=false](#0) array_length 1), 1) ReadStorage materialize.public.input cte l1 = + Reduce aggregates=[max(#0)] + Project (#0) + Get l0 + cte l2 = + Union + Get l1 + Map (null) + Union + Negate + Project () + Get l1 + Constant + - () + cte l3 = Project (#0, #2, #3) Map (substr(#1, #2, 1)) FlatMap generate_series(1, char_length(#1), 1) Get l0 - cte l2 = - Threshold - Union - Threshold - Union - Threshold - Union - Get l2 - Project (#2, #1, #3) - Map ((#0 - 1), "O") - Get l4 - Negate + Return + With Mutually Recursive + cte l4 = + Threshold + Union + Threshold + Union + Threshold + Union + Get l4 Project (#2, #1, #3) - Map ((#0 - 1), ".") - Get l4 - Map (".") - Get l4 - Negate - Map ("O") + Map ((#0 - 1), "O") + Get l6 + Negate + Project (#2, #1, #3) + Map ((#0 - 1), ".") + Get l6 + Map (".") + Get l6 + Negate + Map ("O") + Get l6 + Get l3 + Negate + Get l8 + cte l5 = + Project (#0, #1) + Filter (#2 = "O") + Get l4 + cte l6 = + Project (#0, #1) + Join on=(#0 = (#2 + 1) AND #1 = #3) type=differential + ArrangeBy keys=[[#1, #0]] + Get l5 + ArrangeBy keys=[[#1, (#0 + 1)]] + Project (#0, #1) + Filter (#2 = ".") Get l4 - Get l1 - Negate - Get l8 - cte l3 = - Filter (#2 = "O") - Get l2 - cte l4 = - Project (#0, #1) - Join on=(#0 = (#2 + 1) AND #1 = #3) type=differential - ArrangeBy keys=[[#1, #0]] - Project (#0, #1) - Get l3 - ArrangeBy keys=[[#1, (#0 + 1)]] - Project (#0, #1) - Filter (#2 = ".") + cte l7 = + Reduce aggregates=[sum(((1 + #1) - #0))] + CrossJoin type=differential + ArrangeBy keys=[[]] + Project (#0) + Get l5 + ArrangeBy keys=[[]] + Union Get l2 - cte l5 = - Reduce aggregates=[max(#0)] - Project (#0) - Get l0 - cte l6 = + Map (null) + Union + Negate + Project () + Get l2 + Constant + - () + cte l8 = + Get l3 + Return Union - Get l5 + Get l7 Map (null) Union Negate Project () - Get l5 + Get l7 Constant - () - cte l7 = - Reduce aggregates=[sum(((1 + #1) - #0))] - CrossJoin type=differential - ArrangeBy keys=[[]] - Project (#0) - Get l3 - ArrangeBy keys=[[]] - Union - Get l6 - Map (null) - Union - Negate - Project () - Get l6 - Constant - - () - cte l8 = - Get l1 - Return - Union - Get l7 - Map (null) - Union - Negate - Project () - Get l7 - Constant - - () Source materialize.public.input @@ -568,227 +570,229 @@ EXPLAIN WITH MUTUALLY RECURSIVE (RETURN AT RECURSION LIMIT 142) SELECT * FROM part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#1, #2) Map (array_index(regexp_split_to_array["\n", case_insensitive=false](#0), integer_to_bigint(#1))) FlatMap generate_series(1, (regexp_split_to_array["\n", case_insensitive=false](#0) array_length 1), 1) ReadStorage materialize.public.input cte l1 = + Reduce aggregates=[max(#0)] + Project (#0) + Get l0 + cte l2 = + Union + Get l1 + Map (null) + Union + Negate + Project () + Get l1 + Constant + - () + cte l3 = Project (#0, #2, #3) Map (substr(#1, #2, 1)) FlatMap generate_series(1, char_length(#1), 1) Get l0 - cte [recursion_limit=142, return_at_limit] l2 = - Threshold - Union - Get l18 - Get l1 - Negate - Get l22 - cte l6 = - With Mutually Recursive - cte l3 = - Threshold - Union - Threshold - Union - Threshold - Union - Get l3 - Project (#2, #1, #3) - Map ((#0 - 1), "O") - Get l4 - Negate + Return + With Mutually Recursive + cte [recursion_limit=142, return_at_limit] l4 = + Threshold + Union + Get l20 + Get l3 + Negate + Get l22 + cte l8 = + With Mutually Recursive + cte l5 = + Threshold + Union + Threshold + Union + Threshold + Union + Get l5 Project (#2, #1, #3) - Map ((#0 - 1), ".") - Get l4 - Map (".") - Get l4 - Negate - Map ("O") - Get l4 - Get l2 - Negate - Get l5 - cte l4 = - Project (#0, #1) - Join on=(#0 = (#2 + 1) AND #1 = #3) type=differential - ArrangeBy keys=[[#1, #0]] - Project (#0, #1) - Filter (#2 = "O") - Get l3 - ArrangeBy keys=[[#1, (#0 + 1)]] - Project (#0, #1) - Filter (#2 = ".") - Get l3 - cte l5 = - Get l2 - Return - Get l3 - cte l10 = - With Mutually Recursive - cte l7 = - Threshold - Union - Threshold - Union - Threshold - Union - Get l7 - Project (#0, #2, #3) - Map ((#1 - 1), "O") - Get l8 - Negate + Map ((#0 - 1), "O") + Get l6 + Negate + Project (#2, #1, #3) + Map ((#0 - 1), ".") + Get l6 + Map (".") + Get l6 + Negate + Map ("O") + Get l6 + Get l4 + Negate + Get l7 + cte l6 = + Project (#0, #1) + Join on=(#0 = (#2 + 1) AND #1 = #3) type=differential + ArrangeBy keys=[[#1, #0]] + Project (#0, #1) + Filter (#2 = "O") + Get l5 + ArrangeBy keys=[[#1, (#0 + 1)]] + Project (#0, #1) + Filter (#2 = ".") + Get l5 + cte l7 = + Get l4 + Return + Get l5 + cte l12 = + With Mutually Recursive + cte l9 = + Threshold + Union + Threshold + Union + Threshold + Union + Get l9 Project (#0, #2, #3) - Map ((#1 - 1), ".") - Get l8 - Map (".") - Get l8 - Negate - Map ("O") - Get l8 - Get l6 - Negate - Get l9 - cte l8 = - Project (#0, #1) - Join on=(#0 = #2 AND #1 = (#3 + 1)) type=differential - ArrangeBy keys=[[#0, #1]] - Project (#0, #1) - Filter (#2 = "O") - Get l7 - ArrangeBy keys=[[#0, (#1 + 1)]] - Project (#0, #1) - Filter (#2 = ".") - Get l7 - cte l9 = - Get l6 - Return - Get l7 - cte l14 = - With Mutually Recursive - cte l11 = - Threshold - Union - Threshold - Union - Threshold - Union - Get l11 - Project (#2, #1, #3) - Map ((#0 + 1), "O") - Get l12 - Negate + Map ((#1 - 1), "O") + Get l10 + Negate + Project (#0, #2, #3) + Map ((#1 - 1), ".") + Get l10 + Map (".") + Get l10 + Negate + Map ("O") + Get l10 + Get l8 + Negate + Get l11 + cte l10 = + Project (#0, #1) + Join on=(#0 = #2 AND #1 = (#3 + 1)) type=differential + ArrangeBy keys=[[#0, #1]] + Project (#0, #1) + Filter (#2 = "O") + Get l9 + ArrangeBy keys=[[#0, (#1 + 1)]] + Project (#0, #1) + Filter (#2 = ".") + Get l9 + cte l11 = + Get l8 + Return + Get l9 + cte l16 = + With Mutually Recursive + cte l13 = + Threshold + Union + Threshold + Union + Threshold + Union + Get l13 Project (#2, #1, #3) - Map ((#0 + 1), ".") - Get l12 - Map (".") - Get l12 - Negate - Map ("O") - Get l12 - Get l10 - Negate - Get l13 - cte l12 = - Project (#0, #1) - Join on=(#0 = (#2 - 1) AND #1 = #3) type=differential - ArrangeBy keys=[[#1, #0]] - Project (#0, #1) - Filter (#2 = "O") - Get l11 - ArrangeBy keys=[[#1, (#0 - 1)]] - Project (#0, #1) - Filter (#2 = ".") - Get l11 - cte l13 = - Get l10 - Return - Get l11 - cte [recursion_limit=142, return_at_limit] l18 = - With Mutually Recursive - cte l15 = - Threshold - Union - Threshold - Union - Threshold - Union - Get l15 - Project (#0, #2, #3) - Map ((#1 + 1), "O") - Get l16 - Negate + Map ((#0 + 1), "O") + Get l14 + Negate + Project (#2, #1, #3) + Map ((#0 + 1), ".") + Get l14 + Map (".") + Get l14 + Negate + Map ("O") + Get l14 + Get l12 + Negate + Get l15 + cte l14 = + Project (#0, #1) + Join on=(#0 = (#2 - 1) AND #1 = #3) type=differential + ArrangeBy keys=[[#1, #0]] + Project (#0, #1) + Filter (#2 = "O") + Get l13 + ArrangeBy keys=[[#1, (#0 - 1)]] + Project (#0, #1) + Filter (#2 = ".") + Get l13 + cte l15 = + Get l12 + Return + Get l13 + cte [recursion_limit=142, return_at_limit] l20 = + With Mutually Recursive + cte l17 = + Threshold + Union + Threshold + Union + Threshold + Union + Get l17 Project (#0, #2, #3) - Map ((#1 + 1), ".") - Get l16 - Map (".") - Get l16 - Negate - Map ("O") - Get l16 - Get l14 - Negate - Get l17 - cte l16 = - Project (#0, #1) - Join on=(#0 = #2 AND #1 = (#3 - 1)) type=differential - ArrangeBy keys=[[#0, #1]] - Project (#0, #1) - Filter (#2 = "O") - Get l15 - ArrangeBy keys=[[#0, (#1 - 1)]] - Project (#0, #1) - Filter (#2 = ".") - Get l15 - cte l17 = - Get l14 - Return - Get l15 - cte l19 = - Reduce aggregates=[max(#0)] - Project (#0) - Get l0 - cte l20 = + Map ((#1 + 1), "O") + Get l18 + Negate + Project (#0, #2, #3) + Map ((#1 + 1), ".") + Get l18 + Map (".") + Get l18 + Negate + Map ("O") + Get l18 + Get l16 + Negate + Get l19 + cte l18 = + Project (#0, #1) + Join on=(#0 = #2 AND #1 = (#3 - 1)) type=differential + ArrangeBy keys=[[#0, #1]] + Project (#0, #1) + Filter (#2 = "O") + Get l17 + ArrangeBy keys=[[#0, (#1 - 1)]] + Project (#0, #1) + Filter (#2 = ".") + Get l17 + cte l19 = + Get l16 + Return + Get l17 + cte l21 = + Reduce aggregates=[sum(((1 + #1) - #0))] + CrossJoin type=differential + ArrangeBy keys=[[]] + Project (#0) + Filter (#2 = "O") + Get l20 + ArrangeBy keys=[[]] + Union + Get l2 + Map (null) + Union + Negate + Project () + Get l2 + Constant + - () + cte [recursion_limit=142, return_at_limit] l22 = + Get l3 + Return Union - Get l19 + Get l21 Map (null) Union Negate Project () - Get l19 + Get l21 Constant - () - cte l21 = - Reduce aggregates=[sum(((1 + #1) - #0))] - CrossJoin type=differential - ArrangeBy keys=[[]] - Project (#0) - Filter (#2 = "O") - Get l18 - ArrangeBy keys=[[]] - Union - Get l20 - Map (null) - Union - Negate - Project () - Get l20 - Constant - - () - cte [recursion_limit=142, return_at_limit] l22 = - Get l1 - Return - Union - Get l21 - Map (null) - Union - Negate - Project () - Get l21 - Constant - - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1215.slt b/test/sqllogictest/advent-of-code/2023/aoc_1215.slt index 7c33317cb6f55..fe71e703bcc8c 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1215.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1215.slt @@ -197,69 +197,38 @@ EXPLAIN WITH MUTUALLY RECURSIVE (RETURN AT RECURSION LIMIT 10) SELECT * FROM part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#1, #2) Map (array_index(regexp_split_to_array[",", case_insensitive=false](#0), integer_to_bigint(#1))) FlatMap generate_series(1, (regexp_split_to_array[",", case_insensitive=false](#0) array_length 1), 1) ReadStorage materialize.public.input - cte [recursion_limit=10, return_at_limit] l1 = - Union - Project (#1, #2) - Map (0) - Get l0 - Project (#2, #3) - Filter (char_length(#0) > 0) - Map (substr(#0, 2), (((#1 + integer_to_bigint(ascii(substr(#0, 1, 1)))) * 17) % 256)) - Get l1 - cte l2 = - Reduce aggregates=[sum(#0)] - Project (#1) - Filter (#0 = "") - Get l1 - cte l3 = + cte l1 = Distinct project=[#0] Project (#2) Map (case when ("-" = substr(#1, char_length(#1))) then substr(#1, 1, (char_length(#1) - 1)) else substr(#1, 1, (char_length(#1) - 2)) end) Get l0 - cte l4 = + cte l2 = Reduce group_by=[#0] aggregates=[max(#1)] Project (#0, #1) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] - Filter (#0) IS NOT NULL - Get l3 + Get l1 ArrangeBy keys=[[#1]] Project (#0, #3) Filter (#3) IS NOT NULL AND (0 = case when #2 then 0 else text_to_integer(substr(#1, char_length(#1))) end) Map (("-" = substr(#1, char_length(#1))), case when #2 then substr(#1, 1, (char_length(#1) - 1)) else substr(#1, 1, (char_length(#1) - 2)) end) Get l0 - cte l5 = - ArrangeBy keys=[[#0]] - Get l3 - cte l6 = - Union - Get l4 - Project (#0, #2) - Map (null) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Union - Negate - Project (#0) - Get l4 - Get l3 - Get l5 - cte l7 = + cte l3 = Union - Get l6 - Map (error("more than one record produced in subquery")) - Project (#0) - Filter (#1 > 1) - Reduce group_by=[#0] aggregates=[count(*)] - Project (#0) - Get l6 - cte l8 = + Get l2 + Map (null) + Union + Negate + Project (#0) + Get l2 + Get l1 + cte l4 = Project (#0..=#2) Filter (#0 > coalesce(#4, 0)) Join on=(#1 = #3) type=differential @@ -269,172 +238,156 @@ Explained Query: Get l0 ArrangeBy keys=[[#0]] Union - Get l7 - Project (#0, #2) - Map (null) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Union - Negate - Distinct project=[#0] - Project (#0) - Get l7 - Get l3 - Get l5 - cte l9 = - Distinct project=[#0..=#2] - Get l8 - cte l10 = + Get l3 + Map (null) + Union + Negate + Project (#0) + Get l3 + Get l1 + cte l5 = Distinct project=[#0] Project (#1) - Get l9 - cte l11 = + Get l4 + cte l6 = Reduce group_by=[#0] aggregates=[min(#1)] Project (#0, #1) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] - Filter (#0) IS NOT NULL - Get l10 + Get l5 ArrangeBy keys=[[#1]] Project (#0, #1) Filter (#1) IS NOT NULL - Get l8 - cte l12 = - ArrangeBy keys=[[#0]] - Get l10 - cte l13 = - Union - Get l11 - Project (#0, #2) - Map (null) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Union - Negate - Project (#0) - Get l11 - Get l10 - Get l12 - cte l14 = + Get l4 + cte l7 = Union - Get l13 - Map (error("more than one record produced in subquery")) - Project (#0) - Filter (#1 > 1) - Reduce group_by=[#0] aggregates=[count(*)] - Project (#0) - Get l13 - cte l15 = + Get l6 + Map (null) + Union + Negate + Project (#0) + Get l6 + Get l5 + cte l8 = Project (#1..=#4) TopK group_by=[#1] order_by=[#0 desc nulls_first, #2 asc nulls_last] limit=1 - Project (#0..=#2, #7, #8) + Project (#0..=#2, #4, #5) Map ((#1) IS NULL) - Join on=(#0 = #3 AND #1 = #4 = #6 AND #2 = #5) type=delta - ArrangeBy keys=[[#0..=#2], [#1]] - Get l8 - ArrangeBy keys=[[#0..=#2]] - Get l9 + Join on=(#1 = #3) type=differential + ArrangeBy keys=[[#1]] + Get l4 ArrangeBy keys=[[#0]] Union - Get l14 - Project (#0, #2) - Map (null) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Union - Negate - Distinct project=[#0] - Project (#0) - Get l14 - Get l10 - Get l12 - cte [recursion_limit=10, return_at_limit] l16 = - Union - Project (#0, #0, #4) - Map (0) - Get l15 - Project (#0, #3, #4) - Filter (char_length(#1) > 0) - Map (substr(#1, 2), (((#2 + integer_to_bigint(ascii(substr(#1, 1, 1)))) * 17) % 256)) - Get l16 + Get l7 + Map (null) + Union + Negate + Project (#0) + Get l7 + Get l5 Return - With - cte l17 = - Project (#1, #3, #4) - Join on=(#0 = #2) type=differential - ArrangeBy keys=[[#0]] - Project (#0, #2) - Filter (#1 = "") AND (#0) IS NOT NULL - Get l16 - ArrangeBy keys=[[#0]] - Project (#0..=#2) - Filter NOT(#3) - Get l15 - cte l18 = - Project (#0, #2) - Get l17 - cte l19 = - Distinct project=[#0, #1] - Get l18 - cte l20 = - Reduce group_by=[#0, #1] aggregates=[count(*)] - Project (#0, #1) - Filter (#1 >= #3) - Join on=(#0 = #2) type=differential - ArrangeBy keys=[[#0]] - Get l19 - ArrangeBy keys=[[#0]] - Get l18 - cte l21 = + With Mutually Recursive + cte [recursion_limit=10, return_at_limit] l9 = Union - Get l20 - Map (0) - Union - Negate - Project (#0, #1) - Get l20 - Get l19 - cte l22 = - Reduce aggregates=[sum((((1 + #0) * #2) * integer_to_bigint(#1)))] - Project (#0, #1, #5) - Join on=(#0 = #3 AND #2 = #4) type=differential - ArrangeBy keys=[[#0, #2]] - Get l17 - ArrangeBy keys=[[#0, #1]] + Project (#1, #2) + Map (0) + Get l0 + Project (#2, #3) + Filter (char_length(#0) > 0) + Map (substr(#0, 2), (((#1 + integer_to_bigint(ascii(substr(#0, 1, 1)))) * 17) % 256)) + Get l9 + cte l10 = + Reduce aggregates=[sum(#0)] + Project (#1) + Filter (#0 = "") + Get l9 + cte [recursion_limit=10, return_at_limit] l11 = + Union + Project (#0, #0, #4) + Map (0) + Get l8 + Project (#0, #3, #4) + Filter (char_length(#1) > 0) + Map (substr(#1, 2), (((#2 + integer_to_bigint(ascii(substr(#1, 1, 1)))) * 17) % 256)) + Get l11 + Return + With + cte l12 = + Project (#1, #3, #4) + Join on=(#0 = #2) type=differential + ArrangeBy keys=[[#0]] + Project (#0, #2) + Filter (#1 = "") AND (#0) IS NOT NULL + Get l11 + ArrangeBy keys=[[#0]] + Project (#0..=#2) + Filter NOT(#3) + Get l8 + cte l13 = + Project (#0, #2) + Get l12 + cte l14 = + Distinct project=[#0, #1] + Get l13 + cte l15 = + Reduce group_by=[#0, #1] aggregates=[count(*)] + Project (#0, #1) + Filter (#1 >= #3) + Join on=(#0 = #2) type=differential + ArrangeBy keys=[[#0]] + Get l14 + ArrangeBy keys=[[#0]] + Get l13 + cte l16 = + Union + Get l15 + Map (0) + Union + Negate + Project (#0, #1) + Get l15 + Get l14 + cte l17 = + Reduce aggregates=[sum((((1 + #0) * #2) * integer_to_bigint(#1)))] + Project (#0, #1, #5) + Join on=(#0 = #3 AND #2 = #4) type=differential + ArrangeBy keys=[[#0, #2]] + Get l12 + ArrangeBy keys=[[#0, #1]] + Union + Get l16 + Map (null) + Union + Negate + Project (#0, #1) + Get l16 + Get l14 + Return + CrossJoin type=differential + ArrangeBy keys=[[]] + Project (#1) + Map (numeric_to_bigint(#0)) Union - Get l21 + Get l10 Map (null) Union Negate - Project (#0, #1) - Get l21 - Get l19 - Return - CrossJoin type=differential - ArrangeBy keys=[[]] - Project (#1) - Map (numeric_to_bigint(#0)) - Union - Get l2 - Map (null) - Union - Negate - Project () - Get l2 - Constant - - () - ArrangeBy keys=[[]] - Project (#1) - Map (numeric_to_bigint(#0)) - Union - Get l22 - Map (null) - Union - Negate - Project () - Get l22 - Constant - - () + Project () + Get l10 + Constant + - () + ArrangeBy keys=[[]] + Project (#1) + Map (numeric_to_bigint(#0)) + Union + Get l17 + Map (null) + Union + Negate + Project () + Get l17 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1216.slt b/test/sqllogictest/advent-of-code/2023/aoc_1216.slt index 2a538abf6907f..7e0425642d3c7 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1216.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1216.slt @@ -223,7 +223,7 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#0, #2, #3) Map (substr(#1, #2, 1)) @@ -233,205 +233,207 @@ Explained Query: FlatMap generate_series(1, (regexp_split_to_array["\n", case_insensitive=false](#0) array_length 1), 1) ReadStorage materialize.public.input cte l1 = - ArrangeBy keys=[[#0, #1]] - Filter (#2) IS NOT NULL - Get l0 - cte l2 = - ArrangeBy keys=[[#0, #1]] - Constant - total_rows (diffs absed): 24 - first_rows: - - ("d", "-", 0, -1, "l") - - ("d", "/", 0, -1, "l") - - ("l", "-", 0, -1, "l") - - ("l", ".", 0, -1, "l") - - ("l", "\", -1, 0, "u") - - ("l", "|", -1, 0, "u") - - ("r", "/", -1, 0, "u") - - ("r", "|", -1, 0, "u") - - ("u", "-", 0, -1, "l") - - ("u", ".", -1, 0, "u") - - ("u", "\", 0, -1, "l") - - ("u", "|", -1, 0, "u") - - ("d", "-", 0, 1, "r") - - ("d", ".", 1, 0, "d") - - ("d", "\", 0, 1, "r") - - ("d", "|", 1, 0, "d") - - ("l", "/", 1, 0, "d") - - ("l", "|", 1, 0, "d") - - ("r", "-", 0, 1, "r") - - ("r", ".", 0, 1, "r") - cte l3 = - Distinct project=[#0..=#2] - Union - Project (#11, #12, #10) - Map ((#0 + #8), (#1 + #9)) - Join on=(#0 = #3 AND #1 = #4 AND #2 = #6 AND #5 = #7) type=differential - ArrangeBy keys=[[#0, #1]] - Get l3 - Get l1 - Get l2 - Constant - - (1, 1, "r") - cte l4 = - Project (#0, #1) - Get l0 - cte l5 = - Reduce aggregates=[count(*)] - Project () - Join on=(#0 = #2 AND #1 = #3) type=differential - ArrangeBy keys=[[#0, #1]] - Distinct project=[#0, #1] - Project (#0, #1) - Get l3 - ArrangeBy keys=[[#0, #1]] - Distinct project=[#0, #1] - Get l4 - cte l6 = Project (#1) Get l0 - cte l7 = + cte l2 = Reduce aggregates=[min(#0)] - Get l6 - cte l8 = + Get l1 + cte l3 = Union - Get l7 + Get l2 Map (null) Union Negate Project () - Get l7 + Get l2 Constant - () - cte l9 = + cte l4 = Reduce aggregates=[max(#0)] - Get l6 - cte l10 = + Get l1 + cte l5 = Union - Get l9 + Get l4 Map (null) Union Negate Project () - Get l9 + Get l4 Constant - () - cte l11 = + cte l6 = Project (#0) Get l0 - cte l12 = + cte l7 = Reduce aggregates=[min(#0)] - Get l11 - cte l13 = + Get l6 + cte l8 = Union - Get l12 + Get l7 Map (null) Union Negate Project () - Get l12 + Get l7 Constant - () - cte l14 = + cte l9 = + Project (#0, #1) + Get l0 + cte l10 = CrossJoin type=differential ArrangeBy keys=[[]] - Get l4 + Get l9 ArrangeBy keys=[[]] Union - Get l10 + Get l5 Map (null) Union Negate Project () - Get l10 + Get l5 Constant - () - cte l15 = - Distinct project=[#0..=#3] - Union - Project (#0, #1, #3, #2) - Map (("r" || integer_to_text(#0)), "r") - CrossJoin type=differential - ArrangeBy keys=[[]] - Get l11 - ArrangeBy keys=[[]] - Union - Get l8 - Map (null) - Union - Negate - Project () - Get l8 - Constant - - () - Project (#0, #2, #4, #3) - Map (("l" || integer_to_text(#0)), "l") - Get l14 - Project (#1, #0, #3, #2) - Map (("d" || integer_to_text(#0)), "d") - CrossJoin type=differential - ArrangeBy keys=[[]] - Get l6 - ArrangeBy keys=[[]] - Union - Get l13 - Map (null) - Union - Negate - Project () - Get l13 - Constant - - () - Project (#2, #1, #4, #3) - Map (("u" || integer_to_text(#1)), "u") - Get l14 - Project (#12, #13, #11, #3) - Map ((#0 + #9), (#1 + #10)) - Join on=(#0 = #4 AND #1 = #5 AND #2 = #7 AND #6 = #8) type=differential - ArrangeBy keys=[[#0, #1]] - Filter (#0) IS NOT NULL AND (#1) IS NOT NULL - Get l15 - Get l1 - Get l2 + cte l11 = + ArrangeBy keys=[[#0, #1]] + Filter (#2) IS NOT NULL + Get l0 + cte l12 = + ArrangeBy keys=[[#0, #1]] + Constant + total_rows (diffs absed): 24 + first_rows: + - ("d", "-", 0, -1, "l") + - ("d", "/", 0, -1, "l") + - ("l", "-", 0, -1, "l") + - ("l", ".", 0, -1, "l") + - ("l", "\", -1, 0, "u") + - ("l", "|", -1, 0, "u") + - ("r", "/", -1, 0, "u") + - ("r", "|", -1, 0, "u") + - ("u", "-", 0, -1, "l") + - ("u", ".", -1, 0, "u") + - ("u", "\", 0, -1, "l") + - ("u", "|", -1, 0, "u") + - ("d", "-", 0, 1, "r") + - ("d", ".", 1, 0, "d") + - ("d", "\", 0, 1, "r") + - ("d", "|", 1, 0, "d") + - ("l", "/", 1, 0, "d") + - ("l", "|", 1, 0, "d") + - ("r", "-", 0, 1, "r") + - ("r", ".", 0, 1, "r") Return - With - cte l16 = - Reduce aggregates=[max(#0)] - Project (#1) - Reduce group_by=[#0] aggregates=[count(*)] - Project (#2) - Join on=(#0 = #3 AND #1 = #4) type=differential + With Mutually Recursive + cte l13 = + Distinct project=[#0..=#2] + Union + Project (#11, #12, #10) + Map ((#0 + #8), (#1 + #9)) + Join on=(#0 = #3 AND #1 = #4 AND #2 = #6 AND #5 = #7) type=differential ArrangeBy keys=[[#0, #1]] - Distinct project=[#0..=#2] - Project (#0, #1, #3) - Filter (#0) IS NOT NULL AND (#1) IS NOT NULL - Get l15 + Get l13 + Get l11 + Get l12 + Constant + - (1, 1, "r") + cte l14 = + Reduce aggregates=[count(*)] + Project () + Join on=(#0 = #2 AND #1 = #3) type=differential + ArrangeBy keys=[[#0, #1]] + Distinct project=[#0, #1] + Project (#0, #1) + Get l13 + ArrangeBy keys=[[#0, #1]] + Distinct project=[#0, #1] + Get l9 + cte l15 = + Distinct project=[#0..=#3] + Union + Project (#0, #1, #3, #2) + Map (("r" || integer_to_text(#0)), "r") + CrossJoin type=differential + ArrangeBy keys=[[]] + Get l6 + ArrangeBy keys=[[]] + Union + Get l3 + Map (null) + Union + Negate + Project () + Get l3 + Constant + - () + Project (#0, #2, #4, #3) + Map (("l" || integer_to_text(#0)), "l") + Get l10 + Project (#1, #0, #3, #2) + Map (("d" || integer_to_text(#0)), "d") + CrossJoin type=differential + ArrangeBy keys=[[]] + Get l1 + ArrangeBy keys=[[]] + Union + Get l8 + Map (null) + Union + Negate + Project () + Get l8 + Constant + - () + Project (#2, #1, #4, #3) + Map (("u" || integer_to_text(#1)), "u") + Get l10 + Project (#12, #13, #11, #3) + Map ((#0 + #9), (#1 + #10)) + Join on=(#0 = #4 AND #1 = #5 AND #2 = #7 AND #6 = #8) type=differential ArrangeBy keys=[[#0, #1]] - Distinct project=[#0, #1] - Project (#0, #1) - Get l0 + Filter (#0) IS NOT NULL AND (#1) IS NOT NULL + Get l15 + Get l11 + Get l12 Return - CrossJoin type=differential - ArrangeBy keys=[[]] - Union - Get l5 - Map (0) - Union - Negate - Project () - Get l5 - Constant - - () - ArrangeBy keys=[[]] - Union - Get l16 - Map (null) - Union - Negate - Project () - Get l16 - Constant - - () + With + cte l16 = + Reduce aggregates=[max(#0)] + Project (#1) + Reduce group_by=[#0] aggregates=[count(*)] + Project (#2) + Join on=(#0 = #3 AND #1 = #4) type=differential + ArrangeBy keys=[[#0, #1]] + Distinct project=[#0..=#2] + Project (#0, #1, #3) + Filter (#0) IS NOT NULL AND (#1) IS NOT NULL + Get l15 + ArrangeBy keys=[[#0, #1]] + Distinct project=[#0, #1] + Project (#0, #1) + Get l0 + Return + CrossJoin type=differential + ArrangeBy keys=[[]] + Union + Get l14 + Map (0) + Union + Negate + Project () + Get l14 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l16 + Map (null) + Union + Negate + Project () + Get l16 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1217.slt b/test/sqllogictest/advent-of-code/2023/aoc_1217.slt index d15a9425807de..2e52c5f7c858c 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1217.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1217.slt @@ -223,7 +223,7 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#0, #2, #3) Map (text_to_integer(substr(#1, #2, 1))) @@ -235,88 +235,43 @@ Explained Query: cte l1 = ArrangeBy keys=[[#0, #1]] Get l0 - cte l2 = - Project (#0..=#3, #5) - Get l3 - cte l3 = - Reduce group_by=[#0..=#4] aggregates=[min(#5)] - Union - Project (#6, #7, #2, #3, #9, #10) - Map ((#4 + 1), (#5 + #8)) - Join on=(#6 = (#0 + #2) AND #7 = (#1 + #3)) type=differential - ArrangeBy keys=[[(#0 + #2), (#1 + #3)]] - Filter (#4 < 3) - Get l3 - Get l1 - Project (#5, #6, #3, #2, #9, #8) - Map ((#4 + #7), 1) - Join on=(#5 = (#0 + #3) AND #6 = (#1 + #2)) type=differential - ArrangeBy keys=[[(#0 + #3), (#1 + #2)]] - Get l2 - Get l1 - Project (#5, #6, #8, #9, #11, #10) - Map (-(#3), -(#2), (#4 + #7), 1) - Join on=(#5 = (#0 - #3) AND #6 = (#1 - #2)) type=differential - ArrangeBy keys=[[(#0 - #3), (#1 - #2)]] - Get l2 - Get l1 - Constant - - (1, 1, 0, 1, 0, 0) - - (1, 1, 1, 0, 0, 0) - cte l4 = - Reduce aggregates=[min(#0)] - Project (#2) - Join on=(#0 = #3 AND #1 = #4) type=differential - ArrangeBy keys=[[#0, #1]] - Project (#0, #1, #5) - Get l3 - ArrangeBy keys=[[]] - Reduce aggregates=[max(#0)] - Project (#0) - Get l0 - ArrangeBy keys=[[]] - Reduce aggregates=[max(#0)] - Project (#1) - Get l0 - cte l5 = - Project (#0..=#3, #5) - Filter (#4 >= 4) - Get l6 - cte l6 = - Reduce group_by=[#0..=#4] aggregates=[min(#5)] - Union - Project (#6, #7, #2, #3, #9, #10) - Map ((#4 + 1), (#5 + #8)) - Join on=(#6 = (#0 + #2) AND #7 = (#1 + #3)) type=differential - ArrangeBy keys=[[(#0 + #2), (#1 + #3)]] - Filter (#4 < 10) - Get l6 - Get l1 - Project (#5, #6, #3, #2, #9, #8) - Map ((#4 + #7), 1) - Join on=(#5 = (#0 + #3) AND #6 = (#1 + #2)) type=differential - ArrangeBy keys=[[(#0 + #3), (#1 + #2)]] - Get l5 - Get l1 - Project (#5, #6, #8, #9, #11, #10) - Map (-(#3), -(#2), (#4 + #7), 1) - Join on=(#5 = (#0 - #3) AND #6 = (#1 - #2)) type=differential - ArrangeBy keys=[[(#0 - #3), (#1 - #2)]] - Get l5 - Get l1 - Constant - - (1, 1, 0, 1, 0, 0) - - (1, 1, 1, 0, 0, 0) Return - With - cte l7 = + With Mutually Recursive + cte l2 = + Project (#0..=#3, #5) + Get l3 + cte l3 = + Reduce group_by=[#0..=#4] aggregates=[min(#5)] + Union + Project (#6, #7, #2, #3, #9, #10) + Map ((#4 + 1), (#5 + #8)) + Join on=(#6 = (#0 + #2) AND #7 = (#1 + #3)) type=differential + ArrangeBy keys=[[(#0 + #2), (#1 + #3)]] + Filter (#4 < 3) + Get l3 + Get l1 + Project (#5, #6, #3, #2, #9, #8) + Map ((#4 + #7), 1) + Join on=(#5 = (#0 + #3) AND #6 = (#1 + #2)) type=differential + ArrangeBy keys=[[(#0 + #3), (#1 + #2)]] + Get l2 + Get l1 + Project (#5, #6, #8, #9, #11, #10) + Map (-(#3), -(#2), (#4 + #7), 1) + Join on=(#5 = (#0 - #3) AND #6 = (#1 - #2)) type=differential + ArrangeBy keys=[[(#0 - #3), (#1 - #2)]] + Get l2 + Get l1 + Constant + - (1, 1, 0, 1, 0, 0) + - (1, 1, 1, 0, 0, 0) + cte l4 = Reduce aggregates=[min(#0)] Project (#2) Join on=(#0 = #3 AND #1 = #4) type=differential ArrangeBy keys=[[#0, #1]] Project (#0, #1, #5) - Filter (#4 >= 4) - Get l6 + Get l3 ArrangeBy keys=[[]] Reduce aggregates=[max(#0)] Project (#0) @@ -325,28 +280,75 @@ Explained Query: Reduce aggregates=[max(#0)] Project (#1) Get l0 - Return - CrossJoin type=differential - ArrangeBy keys=[[]] + cte l5 = + Project (#0..=#3, #5) + Filter (#4 >= 4) + Get l6 + cte l6 = + Reduce group_by=[#0..=#4] aggregates=[min(#5)] Union - Get l4 - Map (null) - Union - Negate - Project () - Get l4 - Constant - - () - ArrangeBy keys=[[]] - Union - Get l7 - Map (null) - Union - Negate - Project () - Get l7 - Constant - - () + Project (#6, #7, #2, #3, #9, #10) + Map ((#4 + 1), (#5 + #8)) + Join on=(#6 = (#0 + #2) AND #7 = (#1 + #3)) type=differential + ArrangeBy keys=[[(#0 + #2), (#1 + #3)]] + Filter (#4 < 10) + Get l6 + Get l1 + Project (#5, #6, #3, #2, #9, #8) + Map ((#4 + #7), 1) + Join on=(#5 = (#0 + #3) AND #6 = (#1 + #2)) type=differential + ArrangeBy keys=[[(#0 + #3), (#1 + #2)]] + Get l5 + Get l1 + Project (#5, #6, #8, #9, #11, #10) + Map (-(#3), -(#2), (#4 + #7), 1) + Join on=(#5 = (#0 - #3) AND #6 = (#1 - #2)) type=differential + ArrangeBy keys=[[(#0 - #3), (#1 - #2)]] + Get l5 + Get l1 + Constant + - (1, 1, 0, 1, 0, 0) + - (1, 1, 1, 0, 0, 0) + Return + With + cte l7 = + Reduce aggregates=[min(#0)] + Project (#2) + Join on=(#0 = #3 AND #1 = #4) type=differential + ArrangeBy keys=[[#0, #1]] + Project (#0, #1, #5) + Filter (#4 >= 4) + Get l6 + ArrangeBy keys=[[]] + Reduce aggregates=[max(#0)] + Project (#0) + Get l0 + ArrangeBy keys=[[]] + Reduce aggregates=[max(#0)] + Project (#1) + Get l0 + Return + CrossJoin type=differential + ArrangeBy keys=[[]] + Union + Get l4 + Map (null) + Union + Negate + Project () + Get l4 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l7 + Map (null) + Union + Negate + Project () + Get l7 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1218.slt b/test/sqllogictest/advent-of-code/2023/aoc_1218.slt index bf5124a7b5c5b..49f5bb1de6711 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1218.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1218.slt @@ -217,146 +217,148 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#1, #2) Map (array_index(regexp_split_to_array["\n", case_insensitive=false](#0), integer_to_bigint(#1))) FlatMap generate_series(1, (regexp_split_to_array["\n", case_insensitive=false](#0) array_length 1), 1) ReadStorage materialize.public.input cte l1 = - Distinct project=[#0..=#4] - Union - Project (#0, #1, #7..=#9) - Map ((#0 + (#4 * #6)), (#1 + (#5 * #6)), (#2 + 1)) - Join on=(#2 = #3) type=differential - ArrangeBy keys=[[#2]] - Project (#2..=#4) - Get l1 - ArrangeBy keys=[[#0]] - Project (#0, #4..=#6) - Map (regexp_split_to_array[" ", case_insensitive=false](#1), array_index(#2, 1), case when (#3 = "U") then -1 else case when ("D" = array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 1)) then 1 else 0 end end, case when (#3 = "L") then -1 else case when ("R" = array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 1)) then 1 else 0 end end, text_to_integer(array_index(#2, 2))) - Get l0 - Constant - - (0, 0, 0, 0, 1) - cte l2 = - Reduce aggregates=[sum(((#0 + #2) * (#1 - #3)))] - Project (#0..=#3) - Get l1 - cte l3 = - Union - Get l2 - Map (null) - Union - Negate - Project () - Get l2 - Constant - - () - cte l4 = Reduce aggregates=[sum(#0)] Project (#2) Map (text_to_integer(array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 2))) Get l0 - cte l5 = + cte l2 = Union - Get l4 + Get l1 Map (null) Union Negate Project () - Get l4 + Get l1 Constant - () - cte l6 = - Distinct project=[#0..=#4] - Union - Project (#0, #1, #7..=#9) - Map ((#0 + integer_to_bigint((#4 * #6))), (#1 + integer_to_bigint((#5 * #6))), (#2 + 1)) - Join on=(#2 = #3) type=differential - ArrangeBy keys=[[#2]] - Project (#2..=#4) - Get l6 - ArrangeBy keys=[[#0]] - Project (#0, #4, #5, #7) - Map (array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 3), substr(#2, 8, 1), case when (#3 = "3") then -1 else case when ("1" = substr(array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 3), 8, 1)) then 1 else 0 end end, case when (#3 = "2") then -1 else case when ("0" = substr(array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 3), 8, 1)) then 1 else 0 end end, decode(("0" || substr(#2, 3, 5)), "hex"), (((65536 * get_byte(#6, 0)) + (256 * get_byte(#6, 1))) + get_byte(#6, 2))) - Get l0 - Constant - - (0, 0, 0, 0, 1) Return - With - cte l7 = + With Mutually Recursive + cte l3 = + Distinct project=[#0..=#4] + Union + Project (#0, #1, #7..=#9) + Map ((#0 + (#4 * #6)), (#1 + (#5 * #6)), (#2 + 1)) + Join on=(#2 = #3) type=differential + ArrangeBy keys=[[#2]] + Project (#2..=#4) + Get l3 + ArrangeBy keys=[[#0]] + Project (#0, #4..=#6) + Map (regexp_split_to_array[" ", case_insensitive=false](#1), array_index(#2, 1), case when (#3 = "U") then -1 else case when ("D" = array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 1)) then 1 else 0 end end, case when (#3 = "L") then -1 else case when ("R" = array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 1)) then 1 else 0 end end, text_to_integer(array_index(#2, 2))) + Get l0 + Constant + - (0, 0, 0, 0, 1) + cte l4 = Reduce aggregates=[sum(((#0 + #2) * (#1 - #3)))] Project (#0..=#3) - Get l6 - cte l8 = + Get l3 + cte l5 = Union - Get l7 + Get l4 Map (null) Union Negate Project () - Get l7 - Constant - - () - cte l9 = - Reduce aggregates=[sum(#0)] - Project (#3) - Map (decode(("0" || substr(array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 3), 3, 5)), "hex"), (((65536 * get_byte(#2, 0)) + (256 * get_byte(#2, 1))) + get_byte(#2, 2))) - Get l0 - cte l10 = - Union - Get l9 - Map (null) - Union - Negate - Project () - Get l9 + Get l4 Constant - () + cte l6 = + Distinct project=[#0..=#4] + Union + Project (#0, #1, #7..=#9) + Map ((#0 + integer_to_bigint((#4 * #6))), (#1 + integer_to_bigint((#5 * #6))), (#2 + 1)) + Join on=(#2 = #3) type=differential + ArrangeBy keys=[[#2]] + Project (#2..=#4) + Get l6 + ArrangeBy keys=[[#0]] + Project (#0, #4, #5, #7) + Map (array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 3), substr(#2, 8, 1), case when (#3 = "3") then -1 else case when ("1" = substr(array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 3), 8, 1)) then 1 else 0 end end, case when (#3 = "2") then -1 else case when ("0" = substr(array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 3), 8, 1)) then 1 else 0 end end, decode(("0" || substr(#2, 3, 5)), "hex"), (((65536 * get_byte(#6, 0)) + (256 * get_byte(#6, 1))) + get_byte(#6, 2))) + Get l0 + Constant + - (0, 0, 0, 0, 1) Return - Project (#4, #5) - Map ((((abs(#0) / 2) + (#1 / 2)) + 1), numeric_to_bigint((((abs(#2) / 2) + bigint_to_numeric((#3 / 2))) + 1))) - CrossJoin type=delta - ArrangeBy keys=[[]] - Union - Get l3 - Map (null) - Union - Negate - Project () - Get l3 - Constant - - () - ArrangeBy keys=[[]] - Union - Get l5 - Map (null) - Union - Negate - Project () - Get l5 - Constant - - () - ArrangeBy keys=[[]] + With + cte l7 = + Reduce aggregates=[sum(((#0 + #2) * (#1 - #3)))] + Project (#0..=#3) + Get l6 + cte l8 = + Union + Get l7 + Map (null) Union - Get l8 - Map (null) - Union - Negate - Project () - Get l8 - Constant - - () - ArrangeBy keys=[[]] + Negate + Project () + Get l7 + Constant + - () + cte l9 = + Reduce aggregates=[sum(#0)] + Project (#3) + Map (decode(("0" || substr(array_index(regexp_split_to_array[" ", case_insensitive=false](#1), 3), 3, 5)), "hex"), (((65536 * get_byte(#2, 0)) + (256 * get_byte(#2, 1))) + get_byte(#2, 2))) + Get l0 + cte l10 = + Union + Get l9 + Map (null) Union - Get l10 - Map (null) - Union - Negate - Project () - Get l10 - Constant - - () + Negate + Project () + Get l9 + Constant + - () + Return + Project (#4, #5) + Map ((((abs(#0) / 2) + (#1 / 2)) + 1), numeric_to_bigint((((abs(#2) / 2) + bigint_to_numeric((#3 / 2))) + 1))) + CrossJoin type=delta + ArrangeBy keys=[[]] + Union + Get l5 + Map (null) + Union + Negate + Project () + Get l5 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l2 + Map (null) + Union + Negate + Project () + Get l2 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l8 + Map (null) + Union + Negate + Project () + Get l8 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l10 + Map (null) + Union + Negate + Project () + Get l10 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1219.slt b/test/sqllogictest/advent-of-code/2023/aoc_1219.slt index 9d58a68a4ab2d..e798a73b31c0b 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1219.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1219.slt @@ -321,7 +321,7 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#0, #2, #6..=#9) Map (array_index(regexp_split_to_array[",", case_insensitive=false](#1), integer_to_bigint(#2)), substr(#3, 2, 1), ((#4 = "<") OR (#4 = ">")), case when #5 then substr(#3, 1, 1) else "x" end, case when #5 then #4 else ">" end, case when #5 then text_to_integer(array_index(regexp_split_to_array[":", case_insensitive=false](substr(#3, 3)), 1)) else 0 end, case when #5 then array_index(regexp_split_to_array[":", case_insensitive=false](substr(#3, 3)), 2) else #3 end) @@ -333,76 +333,78 @@ Explained Query: Project (#1) Map (array_index(regexp_split_to_array["\n\n", case_insensitive=false](#0), 1)) ReadStorage materialize.public.input - cte l1 = - Union - Project (#7, #3..=#6) - Map (regexp_split_to_array[",", case_insensitive=false](btrim(btrim(#1, "\}"), "\{")), text_to_integer(substr(array_index(#2, 1), 3)), text_to_integer(substr(array_index(#2, 2), 3)), text_to_integer(substr(array_index(#2, 3), 3)), text_to_integer(substr(array_index(#2, 4), 3)), "in") - FlatMap unnest_array(regexp_split_to_array["\n", case_insensitive=false](#0)) - Project (#1) - Map (array_index(regexp_split_to_array["\n\n", case_insensitive=false](#0), 2)) - ReadStorage materialize.public.input - Project (#6, #1..=#4) - TopK group_by=[#0..=#4] order_by=[#5 asc nulls_last] limit=1 - Project (#0..=#4, #6, #10) - Filter case when (#8 = "<") then case when (#7 = "x") then (#1 < #9) else case when (#7 = "m") then (#2 < #9) else case when (#7 = "a") then (#3 < #9) else case when (#7 = "s") then (#4 < #9) else false end end end end else case when (#8 = ">") then case when (#7 = "x") then (#1 > #9) else case when (#7 = "m") then (#2 > #9) else case when (#7 = "a") then (#3 > #9) else case when (#7 = "s") then (#4 > #9) else false end end end end else false end end - Join on=(#0 = #5) type=differential - ArrangeBy keys=[[#0]] - Filter (#0) IS NOT NULL - Get l1 - ArrangeBy keys=[[#0]] - Get l0 - cte l2 = - Reduce aggregates=[sum((((#0 + #1) + #2) + #3))] - Project (#1..=#4) - Filter (#0 = "A") - Get l1 - cte l3 = - Project (#0..=#9, #12..=#15) - Join on=(#0 = #10 AND #1 = #11) type=differential - ArrangeBy keys=[[#0, #1]] - Filter (#0) IS NOT NULL - Get l4 - ArrangeBy keys=[[#0, #1]] - Get l0 - cte l4 = - Union - Project (#13, #28, #16, #18, #20, #21, #23, #24, #26, #27) - Map ((#10 = "x"), (#11 = ">"), case when (#14 AND #15) then greatest((#12 + 1), #2) else #2 end, (#11 = "<"), case when (#14 AND #17) then least((#12 - 1), #3) else #3 end, (#10 = "m"), case when (#15 AND #19) then greatest((#12 + 1), #4) else #4 end, case when (#17 AND #19) then least((#12 - 1), #5) else #5 end, (#10 = "a"), case when (#15 AND #22) then greatest((#12 + 1), #6) else #6 end, case when (#17 AND #22) then least((#12 - 1), #7) else #7 end, (#10 = "s"), case when (#15 AND #25) then greatest((#12 + 1), #8) else #8 end, case when (#17 AND #25) then least((#12 - 1), #9) else #9 end, 1) - Get l3 - Project (#0, #14, #17, #19, #21, #22, #24, #25, #27, #28) - Map ((#1 + 1), (#10 = "x"), (#11 = "<"), case when (#15 AND #16) then greatest(#12, #2) else #2 end, (#11 = ">"), case when (#15 AND #18) then least(#12, #3) else #3 end, (#10 = "m"), case when (#16 AND #20) then greatest(#12, #4) else #4 end, case when (#18 AND #20) then least(#12, #5) else #5 end, (#10 = "a"), case when (#16 AND #23) then greatest(#12, #6) else #6 end, case when (#18 AND #23) then least(#12, #7) else #7 end, (#10 = "s"), case when (#16 AND #26) then greatest(#12, #8) else #8 end, case when (#18 AND #26) then least(#12, #9) else #9 end) - Get l3 - Constant - - ("in", 1, 1, 4000, 1, 4000, 1, 4000, 1, 4000) Return - With - cte l5 = - Reduce aggregates=[sum((((integer_to_bigint(((1 + #1) - #0)) * integer_to_bigint(((1 + #3) - #2))) * integer_to_bigint(((1 + #5) - #4))) * integer_to_bigint(((1 + #7) - #6))))] - Project (#2..=#9) + With Mutually Recursive + cte l1 = + Union + Project (#7, #3..=#6) + Map (regexp_split_to_array[",", case_insensitive=false](btrim(btrim(#1, "\}"), "\{")), text_to_integer(substr(array_index(#2, 1), 3)), text_to_integer(substr(array_index(#2, 2), 3)), text_to_integer(substr(array_index(#2, 3), 3)), text_to_integer(substr(array_index(#2, 4), 3)), "in") + FlatMap unnest_array(regexp_split_to_array["\n", case_insensitive=false](#0)) + Project (#1) + Map (array_index(regexp_split_to_array["\n\n", case_insensitive=false](#0), 2)) + ReadStorage materialize.public.input + Project (#6, #1..=#4) + TopK group_by=[#0..=#4] order_by=[#5 asc nulls_last] limit=1 + Project (#0..=#4, #6, #10) + Filter case when (#8 = "<") then case when (#7 = "x") then (#1 < #9) else case when (#7 = "m") then (#2 < #9) else case when (#7 = "a") then (#3 < #9) else case when (#7 = "s") then (#4 < #9) else false end end end end else case when (#8 = ">") then case when (#7 = "x") then (#1 > #9) else case when (#7 = "m") then (#2 > #9) else case when (#7 = "a") then (#3 > #9) else case when (#7 = "s") then (#4 > #9) else false end end end end else false end end + Join on=(#0 = #5) type=differential + ArrangeBy keys=[[#0]] + Filter (#0) IS NOT NULL + Get l1 + ArrangeBy keys=[[#0]] + Get l0 + cte l2 = + Reduce aggregates=[sum((((#0 + #1) + #2) + #3))] + Project (#1..=#4) Filter (#0 = "A") - Get l4 + Get l1 + cte l3 = + Project (#0..=#9, #12..=#15) + Join on=(#0 = #10 AND #1 = #11) type=differential + ArrangeBy keys=[[#0, #1]] + Filter (#0) IS NOT NULL + Get l4 + ArrangeBy keys=[[#0, #1]] + Get l0 + cte l4 = + Union + Project (#13, #28, #16, #18, #20, #21, #23, #24, #26, #27) + Map ((#10 = "x"), (#11 = ">"), case when (#14 AND #15) then greatest((#12 + 1), #2) else #2 end, (#11 = "<"), case when (#14 AND #17) then least((#12 - 1), #3) else #3 end, (#10 = "m"), case when (#15 AND #19) then greatest((#12 + 1), #4) else #4 end, case when (#17 AND #19) then least((#12 - 1), #5) else #5 end, (#10 = "a"), case when (#15 AND #22) then greatest((#12 + 1), #6) else #6 end, case when (#17 AND #22) then least((#12 - 1), #7) else #7 end, (#10 = "s"), case when (#15 AND #25) then greatest((#12 + 1), #8) else #8 end, case when (#17 AND #25) then least((#12 - 1), #9) else #9 end, 1) + Get l3 + Project (#0, #14, #17, #19, #21, #22, #24, #25, #27, #28) + Map ((#1 + 1), (#10 = "x"), (#11 = "<"), case when (#15 AND #16) then greatest(#12, #2) else #2 end, (#11 = ">"), case when (#15 AND #18) then least(#12, #3) else #3 end, (#10 = "m"), case when (#16 AND #20) then greatest(#12, #4) else #4 end, case when (#18 AND #20) then least(#12, #5) else #5 end, (#10 = "a"), case when (#16 AND #23) then greatest(#12, #6) else #6 end, case when (#18 AND #23) then least(#12, #7) else #7 end, (#10 = "s"), case when (#16 AND #26) then greatest(#12, #8) else #8 end, case when (#18 AND #26) then least(#12, #9) else #9 end) + Get l3 + Constant + - ("in", 1, 1, 4000, 1, 4000, 1, 4000, 1, 4000) Return - CrossJoin type=differential - ArrangeBy keys=[[]] - Union - Get l2 - Map (null) - Union - Negate - Project () - Get l2 - Constant - - () - ArrangeBy keys=[[]] - Union - Get l5 - Map (null) - Union - Negate - Project () - Get l5 - Constant - - () + With + cte l5 = + Reduce aggregates=[sum((((integer_to_bigint(((1 + #1) - #0)) * integer_to_bigint(((1 + #3) - #2))) * integer_to_bigint(((1 + #5) - #4))) * integer_to_bigint(((1 + #7) - #6))))] + Project (#2..=#9) + Filter (#0 = "A") + Get l4 + Return + CrossJoin type=differential + ArrangeBy keys=[[]] + Union + Get l2 + Map (null) + Union + Negate + Project () + Get l2 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l5 + Map (null) + Union + Negate + Project () + Get l5 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1220.slt b/test/sqllogictest/advent-of-code/2023/aoc_1220.slt index eda432f52bb02..13bb057f56642 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1220.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1220.slt @@ -137,7 +137,7 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM signal WHERE target = 'cn' AND pulse = 'hi'; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#1) FlatMap unnest_array(regexp_split_to_array["\n", case_insensitive=false](#0)) @@ -148,245 +148,248 @@ Explained Query: FlatMap generate_series(3, (regexp_split_to_array[" ", case_insensitive=false](#0) array_length 1), 1) Get l0 cte l2 = - Distinct project=[#0, #1] monotonic + Project (#1, #2) + Map (array_index(regexp_split_to_array[" ", case_insensitive=false](#0), 1), substr(#1, 2)) + Get l0 + Return + With Mutually Recursive + cte l3 = + Distinct project=[#0, #1] monotonic + Union + Project (#0, #2) + Filter (#1 > 0) + Map ((#1 - 1)) + Get l3 + Project (#2, #3) + Filter (#1 = 0) AND (#0 < 4100) + Map ((#0 + 1), 20) + Get l3 + Constant + - (1, 1) + cte l4 = Union - Project (#0, #2) - Filter (#1 > 0) - Map ((#1 - 1)) - Get l2 - Project (#2, #3) - Filter (#1 = 0) AND (#0 < 4100) - Map ((#0 + 1), 20) - Get l2 - Constant - - (1, 1) - cte l3 = - Union - Project (#2, #0, #3, #4) - Filter (#1 = 0) - Map ("roadcaster", 1, "lo") - Get l2 - Filter (#2 > 0) - Get l12 - Filter (#2 > 0) - Get l26 - cte l4 = - ArrangeBy keys=[[#1]] - Project (#0..=#3) - Filter (#4 = "lo") AND (#1) IS NOT NULL - Get l27 - cte l5 = - Map (array_index(regexp_split_to_array[" ", case_insensitive=false](#0), 1), substr(#1, 2)) - Get l0 - cte l6 = - Project (#0..=#3, #5) - Map ((#3 + 1)) - Join on=(#1 = #4) type=differential - Get l4 - ArrangeBy keys=[[#0]] - Project (#2) - Filter ("%" = substr(#1, 1, 1)) - Get l5 - cte l7 = - Distinct project=[#0, #2, #3, #1] - Project (#0..=#3) - Get l6 - cte l8 = - Reduce group_by=[#0..=#3] aggregates=[count(*)] - Project (#0..=#3) - Filter ((#6 < #1) OR ((#1 = #6) AND ((#7 < #2) OR ((#2 = #7) AND (#4 < #0))))) - Join on=(#3 = #5) type=differential - ArrangeBy keys=[[#3]] - Get l7 - Get l4 - cte l9 = - ArrangeBy keys=[[#0..=#3]] - Get l7 - cte l10 = - Union - Get l8 - Project (#0..=#3, #8) - Map (0) - Join on=(#0 = #4 AND #1 = #5 AND #2 = #6 AND #3 = #7) type=differential - ArrangeBy keys=[[#0..=#3]] - Union - Negate - Project (#0..=#3) - Get l8 - Get l7 - Get l9 - cte l11 = - Union - Get l10 - Map (error("more than one record produced in subquery")) + Project (#2, #0, #3, #4) + Filter (#1 = 0) + Map ("roadcaster", 1, "lo") + Get l3 + Filter (#2 > 0) + Get l12 + Filter (#2 > 0) + Get l26 + cte l5 = + ArrangeBy keys=[[#1]] Project (#0..=#3) - Filter (#4 > 1) - Reduce group_by=[#0..=#3] aggregates=[count(*)] - Project (#0..=#3) - Get l10 - cte l12 = - Project (#1, #2, #4, #10) - Map (case when (0 = (#9 % 2)) then "hi" else "lo" end) - Join on=(#0 = #5 AND #1 = #8 AND #2 = #6 AND #3 = #7) type=differential - ArrangeBy keys=[[#0, #2, #3, #1]] - Get l6 - ArrangeBy keys=[[#0..=#3]] - Union - Get l11 - Project (#0..=#3, #8) - Map (null) - Join on=(#0 = #4 AND #1 = #5 AND #2 = #6 AND #3 = #7) type=differential - ArrangeBy keys=[[#0..=#3]] - Union - Negate - Distinct project=[#0..=#3] - Project (#0..=#3) - Get l11 - Get l7 - Get l9 - cte l13 = - Filter (#1) IS NOT NULL - Get l27 - cte l14 = - Project (#0..=#3, #5) - Map ((#3 + 1)) - Join on=(#1 = #4) type=differential - ArrangeBy keys=[[#1]] - Project (#0..=#3) - Get l13 - ArrangeBy keys=[[#0]] - Project (#2) - Filter ("&" = substr(#1, 1, 1)) - Get l5 - cte l15 = - Distinct project=[#0] - Project (#1) - Get l14 - cte l16 = - ArrangeBy keys=[[#0]] - Get l15 - cte l17 = - Reduce group_by=[#0] aggregates=[count(*)] - Project (#0) - Join on=(#0 = #1) type=differential - Get l16 - ArrangeBy keys=[[#0]] - Project (#1) - Filter (#1) IS NOT NULL - Get l1 - cte l18 = - Union - Get l17 - Project (#0, #2) - Map (0) - Join on=(#0 = #1) type=differential + Filter (#4 = "lo") AND (#1) IS NOT NULL + Get l27 + cte l6 = + Project (#0..=#3, #5) + Map ((#3 + 1)) + Join on=(#1 = #4) type=differential + Get l5 ArrangeBy keys=[[#0]] - Union - Negate - Project (#0) - Get l17 - Get l15 - Get l16 - cte l19 = - Union - Get l18 - Map (error("more than one record produced in subquery")) - Project (#0) - Filter (#1 > 1) - Reduce group_by=[#0] aggregates=[count(*)] - Project (#0) - Get l18 - cte l20 = - Project (#0..=#4, #6) - Join on=(#1 = #5) type=differential - ArrangeBy keys=[[#1]] - Get l14 - ArrangeBy keys=[[#0]] - Union - Get l19 - Project (#0, #2) - Map (null) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Union - Negate - Distinct project=[#0] - Project (#0) - Get l19 - Get l15 - Get l16 - cte l21 = - Distinct project=[#0, #2, #3, #1] - Project (#0..=#3) - Get l20 - cte l22 = - Reduce group_by=[#0..=#3] aggregates=[count(*)] - Project (#0..=#3) - Filter (#7 = "hi") - TopK group_by=[#0..=#4] order_by=[#5 desc nulls_first, #6 desc nulls_first] limit=1 exp_group_size=1000 - Project (#0..=#4, #6..=#8) - Filter ((#6 < #1) OR ((#1 = #6) AND ((#7 < #2) OR ((#2 = #7) AND (#4 <= #0))))) - Join on=(#3 = #5) type=differential - ArrangeBy keys=[[#3]] - Get l21 - ArrangeBy keys=[[#1]] - Get l13 - cte l23 = - ArrangeBy keys=[[#0..=#3]] - Get l21 - cte l24 = - Union - Get l22 - Project (#0..=#3, #8) - Map (0) - Join on=(#0 = #4 AND #1 = #5 AND #2 = #6 AND #3 = #7) type=differential + Project (#1) + Filter ("%" = substr(#0, 1, 1)) + Get l2 + cte l7 = + Distinct project=[#0, #2, #3, #1] + Project (#0..=#3) + Get l6 + cte l8 = + Reduce group_by=[#0..=#3] aggregates=[count(*)] + Project (#0..=#3) + Filter ((#6 < #1) OR ((#1 = #6) AND ((#7 < #2) OR ((#2 = #7) AND (#4 < #0))))) + Join on=(#3 = #5) type=differential + ArrangeBy keys=[[#3]] + Get l7 + Get l5 + cte l9 = + ArrangeBy keys=[[#0..=#3]] + Get l7 + cte l10 = + Union + Get l8 + Project (#0..=#3, #8) + Map (0) + Join on=(#0 = #4 AND #1 = #5 AND #2 = #6 AND #3 = #7) type=differential + ArrangeBy keys=[[#0..=#3]] + Union + Negate + Project (#0..=#3) + Get l8 + Get l7 + Get l9 + cte l11 = + Union + Get l10 + Map (error("more than one record produced in subquery")) + Project (#0..=#3) + Filter (#4 > 1) + Reduce group_by=[#0..=#3] aggregates=[count(*)] + Project (#0..=#3) + Get l10 + cte l12 = + Project (#1, #2, #4, #10) + Map (case when (0 = (#9 % 2)) then "hi" else "lo" end) + Join on=(#0 = #5 AND #1 = #8 AND #2 = #6 AND #3 = #7) type=differential + ArrangeBy keys=[[#0, #2, #3, #1]] + Get l6 ArrangeBy keys=[[#0..=#3]] Union - Negate - Project (#0..=#3) - Get l22 - Get l21 - Get l23 - cte l25 = - Union - Get l24 - Map (error("more than one record produced in subquery")) - Project (#0..=#3) - Filter (#4 > 1) - Reduce group_by=[#0..=#3] aggregates=[count(*)] + Get l11 + Project (#0..=#3, #8) + Map (null) + Join on=(#0 = #4 AND #1 = #5 AND #2 = #6 AND #3 = #7) type=differential + ArrangeBy keys=[[#0..=#3]] + Union + Negate + Distinct project=[#0..=#3] + Project (#0..=#3) + Get l11 + Get l7 + Get l9 + cte l13 = + Filter (#1) IS NOT NULL + Get l27 + cte l14 = + Project (#0..=#3, #5) + Map ((#3 + 1)) + Join on=(#1 = #4) type=differential + ArrangeBy keys=[[#1]] Project (#0..=#3) - Get l24 - cte l26 = - Project (#1, #2, #4, #11) - Map (case when (#5 = #10) then "lo" else "hi" end) - Join on=(#0 = #6 AND #1 = #9 AND #2 = #7 AND #3 = #8) type=differential - ArrangeBy keys=[[#0, #2, #3, #1]] - Get l20 - ArrangeBy keys=[[#0..=#3]] + Get l13 + ArrangeBy keys=[[#0]] + Project (#1) + Filter ("&" = substr(#0, 1, 1)) + Get l2 + cte l15 = + Distinct project=[#0] + Project (#1) + Get l14 + cte l16 = + ArrangeBy keys=[[#0]] + Get l15 + cte l17 = + Reduce group_by=[#0] aggregates=[count(*)] + Project (#0) + Join on=(#0 = #1) type=differential + Get l16 + ArrangeBy keys=[[#0]] + Project (#1) + Filter (#1) IS NOT NULL + Get l1 + cte l18 = + Union + Get l17 + Project (#0, #2) + Map (0) + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] + Union + Negate + Project (#0) + Get l17 + Get l15 + Get l16 + cte l19 = + Union + Get l18 + Map (error("more than one record produced in subquery")) + Project (#0) + Filter (#1 > 1) + Reduce group_by=[#0] aggregates=[count(*)] + Project (#0) + Get l18 + cte l20 = + Project (#0..=#4, #6) + Join on=(#1 = #5) type=differential + ArrangeBy keys=[[#1]] + Get l14 + ArrangeBy keys=[[#0]] Union - Get l25 - Project (#0..=#3, #8) + Get l19 + Project (#0, #2) Map (null) - Join on=(#0 = #4 AND #1 = #5 AND #2 = #6 AND #3 = #7) type=differential - ArrangeBy keys=[[#0..=#3]] + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] Union Negate - Distinct project=[#0..=#3] - Project (#0..=#3) - Get l25 - Get l21 - Get l23 - cte l27 = - Project (#0, #5, #1..=#3) - Join on=(#0 = #4) type=differential - ArrangeBy keys=[[#0]] - Get l3 - ArrangeBy keys=[[#0]] - Filter (#0) IS NOT NULL - Get l1 - Return - Filter (#1 = "cn") AND (#4 = "hi") - Get l27 + Distinct project=[#0] + Project (#0) + Get l19 + Get l15 + Get l16 + cte l21 = + Distinct project=[#0, #2, #3, #1] + Project (#0..=#3) + Get l20 + cte l22 = + Reduce group_by=[#0..=#3] aggregates=[count(*)] + Project (#0..=#3) + Filter (#7 = "hi") + TopK group_by=[#0..=#4] order_by=[#5 desc nulls_first, #6 desc nulls_first] limit=1 exp_group_size=1000 + Project (#0..=#4, #6..=#8) + Filter ((#6 < #1) OR ((#1 = #6) AND ((#7 < #2) OR ((#2 = #7) AND (#4 <= #0))))) + Join on=(#3 = #5) type=differential + ArrangeBy keys=[[#3]] + Get l21 + ArrangeBy keys=[[#1]] + Get l13 + cte l23 = + ArrangeBy keys=[[#0..=#3]] + Get l21 + cte l24 = + Union + Get l22 + Project (#0..=#3, #8) + Map (0) + Join on=(#0 = #4 AND #1 = #5 AND #2 = #6 AND #3 = #7) type=differential + ArrangeBy keys=[[#0..=#3]] + Union + Negate + Project (#0..=#3) + Get l22 + Get l21 + Get l23 + cte l25 = + Union + Get l24 + Map (error("more than one record produced in subquery")) + Project (#0..=#3) + Filter (#4 > 1) + Reduce group_by=[#0..=#3] aggregates=[count(*)] + Project (#0..=#3) + Get l24 + cte l26 = + Project (#1, #2, #4, #11) + Map (case when (#5 = #10) then "lo" else "hi" end) + Join on=(#0 = #6 AND #1 = #9 AND #2 = #7 AND #3 = #8) type=differential + ArrangeBy keys=[[#0, #2, #3, #1]] + Get l20 + ArrangeBy keys=[[#0..=#3]] + Union + Get l25 + Project (#0..=#3, #8) + Map (null) + Join on=(#0 = #4 AND #1 = #5 AND #2 = #6 AND #3 = #7) type=differential + ArrangeBy keys=[[#0..=#3]] + Union + Negate + Distinct project=[#0..=#3] + Project (#0..=#3) + Get l25 + Get l21 + Get l23 + cte l27 = + Project (#0, #5, #1..=#3) + Join on=(#0 = #4) type=differential + ArrangeBy keys=[[#0]] + Get l4 + ArrangeBy keys=[[#0]] + Filter (#0) IS NOT NULL + Get l1 + Return + Filter (#1 = "cn") AND (#4 = "hi") + Get l27 Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1222.slt b/test/sqllogictest/advent-of-code/2023/aoc_1222.slt index dd72246898cd5..90049803002fb 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1222.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1222.slt @@ -205,49 +205,19 @@ EXPLAIN WITH MUTUALLY RECURSIVE SELECT * FROM part1, part2; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#1, #2) Map (array_index(regexp_split_to_array["\n", case_insensitive=false](#0), integer_to_bigint(#1))) FlatMap generate_series(1, (regexp_split_to_array["\n", case_insensitive=false](#0) array_length 1), 1) ReadStorage materialize.public.input cte l1 = - Distinct project=[#0] - Project (#0) - Get l7 + Project (#0) + Get l0 cte l2 = - Reduce group_by=[#0] aggregates=[any((#0 = #1))] - CrossJoin type=differential - ArrangeBy keys=[[]] - Get l1 - ArrangeBy keys=[[]] - Get l10 - cte l3 = - ArrangeBy keys=[[#0]] + Distinct project=[#0] Get l1 - cte l4 = - Union - Get l2 - Project (#0, #2) - Map (false) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Union - Negate - Project (#0) - Get l2 - Get l1 - Get l3 - cte l5 = - Union - Get l4 - Map (error("more than one record produced in subquery")) - Project (#0) - Filter (#1 > 1) - Reduce group_by=[#0] aggregates=[count(*)] - Project (#0) - Get l4 - cte l6 = + cte l3 = Project (#0, #1, #3, #5) Join on=(#0 = #2 = #4) type=delta ArrangeBy keys=[[#0]] @@ -262,183 +232,213 @@ Explained Query: Project (#0, #2) FlatMap generate_series(text_to_integer(array_index(regexp_split_to_array[",", case_insensitive=false](array_index(regexp_split_to_array["~", case_insensitive=false](#1), 1)), 3)), text_to_integer(array_index(regexp_split_to_array[",", case_insensitive=false](array_index(regexp_split_to_array["~", case_insensitive=false](#1), 2)), 3)), 1) Get l0 - cte l7 = - Union - Threshold - Union - Get l6 - Negate - Get l16 - Project (#0..=#2, #6) - Map (case when #5 then #3 else (#3 - 1) end) - Join on=(#0 = #4) type=differential - ArrangeBy keys=[[#0]] - Get l7 - ArrangeBy keys=[[#0]] - Union - Get l5 - Project (#0, #2) - Map (null) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Union - Negate - Distinct project=[#0] - Project (#0) - Get l5 - Get l1 - Get l3 - cte l8 = - Distinct project=[#0, #1] - Project (#0, #4) - Filter (#0 != #4) - Join on=(#1 = #5 AND #2 = #6 AND #7 = (#3 + 1)) type=differential - ArrangeBy keys=[[#1, #2, (#3 + 1)]] - Get l7 - ArrangeBy keys=[[#1..=#3]] - Get l7 - cte l9 = - Project (#1) - Get l8 - cte l10 = - Distinct project=[#0] - Union + Return + With Mutually Recursive + cte l4 = + Distinct project=[#0] Project (#0) - Filter (#3 = 1) - Get l7 - Get l9 - cte l11 = - Project (#0) - Get l0 - cte l12 = - Distinct project=[#0] - Get l11 - cte l13 = - CrossJoin type=differential - ArrangeBy keys=[[]] - Get l12 - ArrangeBy keys=[[]] - Get l8 - cte l14 = - ArrangeBy keys=[[#0]] - Get l9 - cte l15 = - Reduce aggregates=[count(distinct #0)] - Project (#0) - Join on=(#0 = #1 = #2) type=delta - ArrangeBy keys=[[#0]] - Get l11 - ArrangeBy keys=[[#0]] - Union - Negate - Distinct project=[#0] - Project (#0) - Filter (#3 = 1) - Join on=(#1 = #2) type=differential - ArrangeBy keys=[[#1]] - Project (#0, #2) - Filter (#0 = #1) - Get l13 - ArrangeBy keys=[[#0]] - Reduce group_by=[#0] aggregates=[count(*)] - Project (#0) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Distinct project=[#0] - Project (#2) - Get l13 - Get l14 - Get l12 - ArrangeBy keys=[[#0]] + Get l9 + cte l5 = + Reduce group_by=[#0] aggregates=[any((#0 = #1))] + CrossJoin type=differential + ArrangeBy keys=[[]] + Get l4 + ArrangeBy keys=[[]] Get l12 - cte l16 = - Get l6 - cte l17 = - Reduce group_by=[#0, #1] aggregates=[count(*)] - Project (#0, #3) - Join on=(#1 = #2) type=differential - ArrangeBy keys=[[#1]] - Get l22 - ArrangeBy keys=[[#0]] - Get l8 - cte l18 = - Distinct project=[#0] + cte l6 = + ArrangeBy keys=[[#0]] + Get l4 + cte l7 = + Union + Get l5 + Project (#0, #2) + Map (false) + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] + Union + Negate + Project (#0) + Get l5 + Get l4 + Get l6 + cte l8 = + Union + Get l7 + Map (error("more than one record produced in subquery")) + Project (#0) + Filter (#1 > 1) + Reduce group_by=[#0] aggregates=[count(*)] + Project (#0) + Get l7 + cte l9 = + Union + Threshold + Union + Get l3 + Negate + Get l16 + Project (#0..=#2, #6) + Map (case when #5 then #3 else (#3 - 1) end) + Join on=(#0 = #4) type=differential + ArrangeBy keys=[[#0]] + Get l9 + ArrangeBy keys=[[#0]] + Union + Get l8 + Project (#0, #2) + Map (null) + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] + Union + Negate + Distinct project=[#0] + Project (#0) + Get l8 + Get l4 + Get l6 + cte l10 = + Distinct project=[#0, #1] + Project (#0, #4) + Filter (#0 != #4) + Join on=(#1 = #5 AND #2 = #6 AND #7 = (#3 + 1)) type=differential + ArrangeBy keys=[[#1, #2, (#3 + 1)]] + Get l9 + ArrangeBy keys=[[#1..=#3]] + Get l9 + cte l11 = Project (#1) - Get l17 - cte l19 = - ArrangeBy keys=[[#0]] - Get l18 - cte l20 = - Reduce group_by=[#0] aggregates=[count(*)] - Project (#0) - Join on=(#0 = #1) type=differential - Get l19 - Get l14 - cte l21 = - Union - Get l20 - Project (#0, #2) - Map (0) + Get l10 + cte l12 = + Distinct project=[#0] + Union + Project (#0) + Filter (#3 = 1) + Get l9 + Get l11 + cte l13 = + CrossJoin type=differential + ArrangeBy keys=[[]] + Get l2 + ArrangeBy keys=[[]] + Get l10 + cte l14 = + ArrangeBy keys=[[#0]] + Get l11 + cte l15 = + Reduce aggregates=[count(distinct #0)] + Project (#0) Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] + Get l1 ArrangeBy keys=[[#0]] Union Negate - Project (#0) - Get l20 - Get l18 + Distinct project=[#0] + Project (#0) + Filter (#3 = 1) + Join on=(#1 = #2) type=differential + ArrangeBy keys=[[#1]] + Project (#0, #2) + Filter (#0 = #1) + Get l13 + ArrangeBy keys=[[#0]] + Reduce group_by=[#0] aggregates=[count(*)] + Project (#0) + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] + Distinct project=[#0] + Project (#2) + Get l13 + Get l14 + Get l2 + cte l16 = + Get l3 + cte l17 = + Reduce group_by=[#0, #1] aggregates=[count(*)] + Project (#0, #3) + Join on=(#1 = #2) type=differential + ArrangeBy keys=[[#1]] + Get l22 + ArrangeBy keys=[[#0]] + Get l10 + cte l18 = + Distinct project=[#0] + Project (#1) + Get l17 + cte l19 = + ArrangeBy keys=[[#0]] + Get l18 + cte l20 = + Reduce group_by=[#0] aggregates=[count(*)] + Project (#0) + Join on=(#0 = #1) type=differential Get l19 - cte l22 = - Distinct project=[#0, #1] + Get l14 + cte l21 = Union - Project (#0, #1) - Filter (#3 = 1) - Join on=(#1 = #2) type=differential - ArrangeBy keys=[[#1]] - Get l8 - ArrangeBy keys=[[#0]] - Reduce group_by=[#0] aggregates=[count(*)] - Get l9 - Project (#0, #1) - Join on=(#1 = #3 AND #2 = #4) type=differential - ArrangeBy keys=[[#1, #2]] - Get l17 - ArrangeBy keys=[[#0, #1]] - Union - Get l21 - Map (error("more than one record produced in subquery")) - Project (#0) - Filter (#1 > 1) - Reduce group_by=[#0] aggregates=[count(*)] - Project (#0) - Get l21 - Return - With - cte l23 = - Reduce aggregates=[count(*)] - Project () - Get l22 - Return - CrossJoin type=differential - ArrangeBy keys=[[]] - Union - Get l15 + Get l20 + Project (#0, #2) Map (0) - Union - Negate - Project () - Get l15 - Constant - - () - ArrangeBy keys=[[]] + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] + Union + Negate + Project (#0) + Get l20 + Get l18 + Get l19 + cte l22 = + Distinct project=[#0, #1] Union - Get l23 - Map (0) - Union - Negate - Project () - Get l23 - Constant - - () + Project (#0, #1) + Filter (#3 = 1) + Join on=(#1 = #2) type=differential + ArrangeBy keys=[[#1]] + Get l10 + ArrangeBy keys=[[#0]] + Reduce group_by=[#0] aggregates=[count(*)] + Get l11 + Project (#0, #1) + Join on=(#1 = #3 AND #2 = #4) type=differential + ArrangeBy keys=[[#1, #2]] + Get l17 + ArrangeBy keys=[[#0, #1]] + Union + Get l21 + Map (error("more than one record produced in subquery")) + Project (#0) + Filter (#1 > 1) + Reduce group_by=[#0] aggregates=[count(*)] + Project (#0) + Get l21 + Return + With + cte l23 = + Reduce aggregates=[count(*)] + Project () + Get l22 + Return + CrossJoin type=differential + ArrangeBy keys=[[]] + Union + Get l15 + Map (0) + Union + Negate + Project () + Get l15 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l23 + Map (0) + Union + Negate + Project () + Get l23 + Constant + - () Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1223.slt b/test/sqllogictest/advent-of-code/2023/aoc_1223.slt index bb5273ffc1c26..550a8dbbbe4bf 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1223.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1223.slt @@ -291,7 +291,7 @@ SELECT * FROM longest ORDER BY d DESC; ---- Explained Query: Finish order_by=[#2 desc nulls_first] output=[#0..=#3] - With Mutually Recursive + With cte l0 = Project (#0, #2) Filter ("#" != substr(#1, #2, 1)) @@ -363,46 +363,6 @@ Explained Query: Project (#0, #1) Get l3 cte l5 = - ArrangeBy keys=[[#0, #1]] - Get l3 - cte l6 = - Project (#0..=#4, #7, #8) - Filter ((#0 != #7) OR (#1 != #8)) - Join on=(#3 = #5 AND #4 = #6) type=differential - ArrangeBy keys=[[#3, #4]] - Get l9 - Get l5 - cte l7 = - Distinct project=[#0, #1] - Project (#3, #4) - Get l6 - cte l8 = - ArrangeBy keys=[[#0, #1]] - Get l4 - cte l9 = - Project (#0, #1, #4, #2, #3) - Reduce group_by=[#0, #1, #3, #4] aggregates=[min(#2)] - Union - Project (#0, #1, #6, #2, #3) - Map (1) - Join on=(#0 = #4 AND #1 = #5) type=differential - Get l5 - Get l8 - Project (#0, #1, #9, #5, #6) - Map ((#2 + 1)) - Join on=(#3 = #7 AND #4 = #8) type=differential - ArrangeBy keys=[[#3, #4]] - Get l6 - ArrangeBy keys=[[#0, #1]] - Union - Negate - Project (#0, #1) - Join on=(#0 = #2 AND #1 = #3) type=differential - ArrangeBy keys=[[#0, #1]] - Get l7 - Get l8 - Get l7 - cte l10 = Union Negate Distinct project=[#0, #1] @@ -411,26 +371,26 @@ Explained Query: FlatMap wrap2(1, 2, 12, 20, 130, 126, 141, 140) Get l4 Get l4 - cte l11 = + cte l6 = Distinct project=[#0, #1] - Get l10 - cte l12 = + Get l5 + cte l7 = Filter ((#2 < #0) OR ((#0 = #2) AND (#3 < #1))) CrossJoin type=differential ArrangeBy keys=[[]] - Get l11 + Get l6 ArrangeBy keys=[[]] Get l4 - cte l13 = + cte l8 = Distinct project=[#0, #1] Project (#2, #3) - Get l12 - cte l14 = + Get l7 + cte l9 = Reduce group_by=[#0, #1] aggregates=[count(*)] Project (#0, #1) Join on=(#2 = #4 AND #3 = #5) type=differential ArrangeBy keys=[[#2, #3]] - Get l12 + Get l7 ArrangeBy keys=[[#0, #1]] Union Negate @@ -438,53 +398,77 @@ Explained Query: Project (#0, #1) Filter (#0 = #2) AND (#1 = #3) FlatMap wrap2(1, 2, 12, 20, 130, 126, 141, 140) - Get l13 - Get l13 - cte l15 = + Get l8 + Get l8 + cte l10 = ArrangeBy keys=[[#0, #1]] - Get l11 - cte l16 = - Union - Get l14 - Project (#0, #1, #4) - Map (0) - Join on=(#0 = #2 AND #1 = #3) type=differential - ArrangeBy keys=[[#0, #1]] - Union - Negate - Project (#0, #1) - Get l14 - Get l11 - Get l15 - cte l17 = - Project (#0, #1, #3, #2) - Reduce group_by=[#0, #1, #3] aggregates=[max(#2)] - Union - Project (#7, #8, #19, #20) - Filter (1 != ((#3 >> #18) % 2)) - Map (bigint_to_integer(#17), (#2 + #6), (#3 + (1 << #18))) - Join on=(#0 = #4 AND #1 = #5 AND #7 = #9 = #11 = #13 = #15 AND #8 = #10 = #12 = #14 = #16) type=delta - ArrangeBy keys=[[#0, #1]] - Get l17 - ArrangeBy keys=[[#0, #1], [#3, #4]] - Get l9 - Get l8 - ArrangeBy keys=[[#0, #1]] - Get l10 - Get l15 + Get l3 + cte l11 = + ArrangeBy keys=[[#0, #1]] + Get l4 + Return + With Mutually Recursive + cte l12 = + Project (#0..=#4, #7, #8) + Filter ((#0 != #7) OR (#1 != #8)) + Join on=(#3 = #5 AND #4 = #6) type=differential + ArrangeBy keys=[[#3, #4]] + Get l14 + Get l10 + cte l13 = + Distinct project=[#0, #1] + Project (#3, #4) + Get l12 + cte l14 = + Project (#0, #1, #4, #2, #3) + Reduce group_by=[#0, #1, #3, #4] aggregates=[min(#2)] + Union + Project (#0, #1, #6, #2, #3) + Map (1) + Join on=(#0 = #4 AND #1 = #5) type=differential + Get l10 + Get l11 + Project (#0, #1, #9, #5, #6) + Map ((#2 + 1)) + Join on=(#3 = #7 AND #4 = #8) type=differential + ArrangeBy keys=[[#3, #4]] + Get l12 ArrangeBy keys=[[#0, #1]] Union - Get l16 - Map (error("more than one record produced in subquery")) + Negate Project (#0, #1) - Filter (#2 > 1) - Reduce group_by=[#0, #1] aggregates=[count(*)] + Join on=(#0 = #2 AND #1 = #3) type=differential + ArrangeBy keys=[[#0, #1]] + Get l13 + Get l11 + Get l13 + cte l15 = + Project (#0, #1, #3, #2) + Reduce group_by=[#0, #1, #3] aggregates=[max(#2)] + Union + Project (#7, #8, #15, #16) + Filter (1 != ((#3 >> #14) % 2)) + Map (bigint_to_integer(#13), (#2 + #6), (#3 + (1 << #14))) + Join on=(#0 = #4 AND #1 = #5 AND #7 = #9 = #11 AND #8 = #10 = #12) type=delta + ArrangeBy keys=[[#0, #1]] + Get l15 + ArrangeBy keys=[[#0, #1], [#3, #4]] + Get l14 + ArrangeBy keys=[[#0, #1]] + Get l5 + ArrangeBy keys=[[#0, #1]] + Union + Get l9 + Map (0) + Union + Negate Project (#0, #1) - Get l16 - Constant - - (12, 20, 0, 0) - Return - Get l17 + Get l9 + Get l6 + Constant + - (12, 20, 0, 0) + Return + Get l15 Source materialize.public.input diff --git a/test/sqllogictest/advent-of-code/2023/aoc_1225.slt b/test/sqllogictest/advent-of-code/2023/aoc_1225.slt index 5b8982ca74d4e..dc130828329cf 100644 --- a/test/sqllogictest/advent-of-code/2023/aoc_1225.slt +++ b/test/sqllogictest/advent-of-code/2023/aoc_1225.slt @@ -128,7 +128,7 @@ EXPLAIN WITH MUTUALLY RECURSIVE (RETURN AT RECURSION LIMIT 50) SELECT * FROM part1; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#3, #4) Map (regexp_split_to_array[" ", case_insensitive=false](#0), btrim(array_index(#2, 1), ":"), btrim(array_index(#2, integer_to_bigint(#1)), ",")) @@ -144,137 +144,139 @@ Explained Query: Get l0 Project (#1) Get l0 - cte l2 = - Project (#1) - Get l7 - cte l3 = - Reduce aggregates=[sum(#0), count(#0)] - Get l2 - cte l4 = - Project (#2) - Map ((#0 / bigint_to_numeric(case when (#1 = 0) then null else #1 end))) + Return + With Mutually Recursive + cte l2 = + Project (#1) + Get l7 + cte l3 = + Reduce aggregates=[sum(#0), count(#0)] + Get l2 + cte l4 = + Project (#2) + Map ((#0 / bigint_to_numeric(case when (#1 = 0) then null else #1 end))) + Union + Get l3 + Map (null, 0) + Union + Negate + Project () + Get l3 + Constant + - () + cte l5 = + Reduce aggregates=[sum((#0 * #0)), sum(#0), count(#0)] + Get l2 + cte l6 = + Project (#3) + Map (sqrtnumeric(case when ((#0) IS NULL OR (#1) IS NULL OR (case when (#2 = 0) then null else #2 end) IS NULL OR (case when (0 = (#2 - 1)) then null else (#2 - 1) end) IS NULL) then null else greatest(((#0 - ((#1 * #1) / bigint_to_numeric(case when (#2 = 0) then null else #2 end))) / bigint_to_numeric(case when (0 = (#2 - 1)) then null else (#2 - 1) end)), 0) end)) + Union + Get l5 + Map (null, null, 0) + Union + Negate + Project () + Get l5 + Constant + - () + cte [recursion_limit=50, return_at_limit] l7 = + Union + Threshold + Union + Get l1 + Negate + Get l8 + Reduce group_by=[#0] aggregates=[sum(((#1 - #2) / #3))] + Project (#0, #3..=#5) + Join on=(#1 = #2) type=delta + ArrangeBy keys=[[], [#1]] + Union + Filter (#1) IS NOT NULL + Get l0 + Project (#1, #0) + Filter (#0) IS NOT NULL + Get l0 + ArrangeBy keys=[[#0]] + Filter (#0) IS NOT NULL + Get l7 + ArrangeBy keys=[[]] + Union + Get l4 + Map (null) + Union + Negate + Project () + Get l4 + Constant + - () + ArrangeBy keys=[[]] + Union + Get l6 + Map (null) + Union + Negate + Project () + Get l6 + Constant + - () + cte [recursion_limit=50, return_at_limit] l8 = + Get l1 + Return + With + cte l9 = + Reduce aggregates=[count(*)] + Project () + Filter (#1 < 0) + Get l7 + cte l10 = Union - Get l3 - Map (null, 0) + Get l9 + Map (0) Union Negate Project () - Get l3 + Get l9 Constant - () - cte l5 = - Reduce aggregates=[sum((#0 * #0)), sum(#0), count(#0)] - Get l2 - cte l6 = - Project (#3) - Map (sqrtnumeric(case when ((#0) IS NULL OR (#1) IS NULL OR (case when (#2 = 0) then null else #2 end) IS NULL OR (case when (0 = (#2 - 1)) then null else (#2 - 1) end) IS NULL) then null else greatest(((#0 - ((#1 * #1) / bigint_to_numeric(case when (#2 = 0) then null else #2 end))) / bigint_to_numeric(case when (0 = (#2 - 1)) then null else (#2 - 1) end)), 0) end)) + cte l11 = + Reduce aggregates=[count(*)] + Project () + Filter (#1 > 0) + Get l7 + cte l12 = Union - Get l5 - Map (null, null, 0) + Get l11 + Map (0) Union Negate Project () - Get l5 + Get l11 Constant - () - cte [recursion_limit=50, return_at_limit] l7 = - Union - Threshold - Union - Get l1 - Negate - Get l8 - Reduce group_by=[#0] aggregates=[sum(((#1 - #2) / #3))] - Project (#0, #3..=#5) - Join on=(#1 = #2) type=delta - ArrangeBy keys=[[], [#1]] - Union - Filter (#1) IS NOT NULL - Get l0 - Project (#1, #0) - Filter (#0) IS NOT NULL - Get l0 - ArrangeBy keys=[[#0]] - Filter (#0) IS NOT NULL - Get l7 + Return + Project (#2) + Map ((#0 * #1)) + CrossJoin type=differential ArrangeBy keys=[[]] Union - Get l4 + Get l10 Map (null) Union Negate Project () - Get l4 + Get l10 Constant - () ArrangeBy keys=[[]] Union - Get l6 + Get l12 Map (null) Union Negate Project () - Get l6 + Get l12 Constant - () - cte [recursion_limit=50, return_at_limit] l8 = - Get l1 - Return - With - cte l9 = - Reduce aggregates=[count(*)] - Project () - Filter (#1 < 0) - Get l7 - cte l10 = - Union - Get l9 - Map (0) - Union - Negate - Project () - Get l9 - Constant - - () - cte l11 = - Reduce aggregates=[count(*)] - Project () - Filter (#1 > 0) - Get l7 - cte l12 = - Union - Get l11 - Map (0) - Union - Negate - Project () - Get l11 - Constant - - () - Return - Project (#2) - Map ((#0 * #1)) - CrossJoin type=differential - ArrangeBy keys=[[]] - Union - Get l10 - Map (null) - Union - Negate - Project () - Get l10 - Constant - - () - ArrangeBy keys=[[]] - Union - Get l12 - Map (null) - Union - Negate - Project () - Get l12 - Constant - - () Source materialize.public.input diff --git a/test/sqllogictest/arrays.slt b/test/sqllogictest/arrays.slt index 4b1a15e33e9ec..cd7f56c2b8227 100644 --- a/test/sqllogictest/arrays.slt +++ b/test/sqllogictest/arrays.slt @@ -1450,3 +1450,13 @@ query T SELECT ARRAY[1,3,7,NULL] @> ARRAY[1,3,7,NULL] AS contains; ---- false + +# Make sure we can index into a CAST-ed array. + +statement ok +CREATE TABLE jsons (payload jsonb, random_index int, random_id uuid); + +statement ok +CREATE MATERIALIZED VIEW json_mv AS ( + SELECT * FROM jsons WHERE random_id = CAST(payload->>'my_field' AS uuid[])[random_index] +) diff --git a/test/sqllogictest/autogenerated/all_parts_essential.slt b/test/sqllogictest/autogenerated/all_parts_essential.slt index f4e671109b0cc..0117da6386857 100644 --- a/test/sqllogictest/autogenerated/all_parts_essential.slt +++ b/test/sqllogictest/autogenerated/all_parts_essential.slt @@ -444,8 +444,9 @@ Explained Query: Project (#0, #4) // { arity: 2 } ReadIndex on=orders pk_orders_orderkey=[*** full scan ***] // { arity: 9 } cte l1 = - Filter (null OR ((#0 <= 100) AND (#0 >= 59) AND (#11 >= 1998-03-22))) // { arity: 16 } - ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } + Project (#4, #11) // { arity: 2 } + Filter (null OR ((#0 <= 100) AND (#0 >= 59) AND (#11 >= 1998-03-22))) // { arity: 16 } + ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } Return // { arity: 2 } Union // { arity: 2 } Project (#1, #0) // { arity: 2 } @@ -457,14 +458,13 @@ Explained Query: implementation %1[#0]UKA » %0:l1[#1]Kif ArrangeBy keys=[[#1]] // { arity: 2 } - Project (#4, #11) // { arity: 2 } - Get l1 // { arity: 16 } + Get l1 // { arity: 2 } ArrangeBy keys=[[#0]] // { arity: 1 } Distinct project=[#0] // { arity: 1 } Project (#2) // { arity: 1 } Get l0 // { arity: 5 } - Project (#4) // { arity: 1 } - Get l1 // { arity: 16 } + Project (#0) // { arity: 1 } + Get l1 // { arity: 2 } Project (#4, #1) // { arity: 2 } Filter ((#2 = #3) OR ((#0 <= 100) AND (#0 >= 59) AND (#2 >= 1998-03-22))) // { arity: 5 } Get l0 // { arity: 5 } diff --git a/test/sqllogictest/bytea.slt b/test/sqllogictest/bytea.slt index 456b9c41834f9..7e63db31f2fd1 100644 --- a/test/sqllogictest/bytea.slt +++ b/test/sqllogictest/bytea.slt @@ -75,6 +75,119 @@ SELECT bit_length('DEADBEEF'::text); ---- 64 +query I +SELECT bit_count('\x1234567890'::bytea); +---- +15 + +query I +SELECT bit_count('\x00'::bytea); +---- +0 + +query I +SELECT bit_count('\x0F'::bytea); +---- +4 + +query I +SELECT bit_count('\xFF'::bytea); +---- +8 + +query I +SELECT bit_count('\xF0FF'::bytea); +---- +12 + +query I +SELECT get_byte('\x1234567890'::bytea, 4); +---- +144 + +query I +SELECT get_bit('\x1234567890'::bytea, 30); +---- +1 + +query II +SELECT n, get_bit('\x1234567890'::bytea, n) FROM generate_series(0, 39) as n ORDER BY n DESC; +---- +39 1 +38 0 +37 0 +36 1 +35 0 +34 0 +33 0 +32 0 +31 0 +30 1 +29 1 +28 1 +27 1 +26 0 +25 0 +24 0 +23 0 +22 1 +21 0 +20 1 +19 0 +18 1 +17 1 +16 0 +15 0 +14 0 +13 1 +12 1 +11 0 +10 1 +9 0 +8 0 +7 0 +6 0 +5 0 +4 1 +3 0 +2 0 +1 1 +0 0 + + +query I +SELECT get_bit('\xF00a'::bytea, 13); +---- +0 + +query I +SELECT get_bit('\xF00a'::bytea, 5); +---- +1 + +query II +SELECT n, get_bit('\xF00a'::bytea, n) FROM generate_series(0, 15) as n ORDER BY n DESC; +---- +15 0 +14 0 +13 0 +12 0 +11 1 +10 0 +9 1 +8 0 +7 1 +6 1 +5 1 +4 1 +3 0 +2 0 +1 0 +0 0 + +statement error index 16 out of valid range, 0..15 +SELECT get_bit('\xF00a'::bytea, 16); + statement error SELECT length('deadbeef'::text, 'utf-8') diff --git a/test/sqllogictest/cast.slt b/test/sqllogictest/cast.slt index 6a2492851a78a..ea67690a2103b 100644 --- a/test/sqllogictest/cast.slt +++ b/test/sqllogictest/cast.slt @@ -125,3 +125,16 @@ SELECT date('2000') query error db error: ERROR: function date\(unknown, unknown\) does not exist SELECT date('2000', 'a') + +query T +SELECT CAST(5 + 3 AS text); +---- +8 + +query T +SELECT (5 + 3)::text; +---- +8 + +query error db error: ERROR: operator does not exist: integer \+ text +SELECT 5 + 3::text; diff --git a/test/sqllogictest/chbench.slt b/test/sqllogictest/chbench.slt index 3c243e80081fb..f2941fc664332 100644 --- a/test/sqllogictest/chbench.slt +++ b/test/sqllogictest/chbench.slt @@ -448,9 +448,10 @@ Explained Query: Finish order_by=[#0 asc nulls_last] output=[#0, #1] With cte l0 = - Filter (#8 < 2012-01-02 00:00:00) AND (#8 >= 2007-01-02 00:00:00) // { arity: 9 } - Map (date_to_timestamp(#4)) // { arity: 9 } - ReadIndex on=order fk_order_customer=[*** full scan ***] // { arity: 8 } + Project (#0..=#2, #4, #6) // { arity: 5 } + Filter (#8 < 2012-01-02 00:00:00) AND (#8 >= 2007-01-02 00:00:00) // { arity: 9 } + Map (date_to_timestamp(#4)) // { arity: 9 } + ReadIndex on=order fk_order_customer=[*** full scan ***] // { arity: 8 } Return // { arity: 2 } Reduce group_by=[#0] aggregates=[count(*)] // { arity: 2 } Project (#4) // { arity: 1 } @@ -458,8 +459,7 @@ Explained Query: implementation %1[#0..=#3]UKKKKA » %0:l0[#0..=#3]UKKKKiif ArrangeBy keys=[[#0..=#3]] // { arity: 5 } - Project (#0..=#2, #4, #6) // { arity: 5 } - Get l0 // { arity: 9 } + Get l0 // { arity: 5 } ArrangeBy keys=[[#0..=#3]] // { arity: 4 } Distinct project=[#0..=#3] // { arity: 4 } Project (#0..=#3) // { arity: 4 } @@ -468,8 +468,8 @@ Explained Query: implementation %0:l0[#2, #1, #0]UKKKiif » %1:orderline[#2, #1, #0]KKKAiif ArrangeBy keys=[[#2, #1, #0]] // { arity: 4 } - Project (#0..=#2, #4) // { arity: 4 } - Get l0 // { arity: 9 } + Project (#0..=#3) // { arity: 4 } + Get l0 // { arity: 5 } ArrangeBy keys=[[#2, #1, #0]] // { arity: 10 } ReadIndex on=orderline fk_orderline_order=[differential join] // { arity: 10 } @@ -1618,8 +1618,9 @@ Explained Query: Finish order_by=[#0 asc nulls_last] output=[#0..=#2] With cte l0 = - Map (substr(char_to_text(#11), 1, 1)) // { arity: 23 } - ReadIndex on=customer fk_customer_district=[*** full scan ***] // { arity: 22 } + Project (#0..=#2, #9, #16, #22) // { arity: 6 } + Map (substr(char_to_text(#11), 1, 1)) // { arity: 23 } + ReadIndex on=customer fk_customer_district=[*** full scan ***] // { arity: 22 } cte l1 = Project (#0..=#4) // { arity: 5 } Filter (#4 > (#5 / bigint_to_numeric(case when (#6 = 0) then null else #6 end))) // { arity: 7 } @@ -1627,14 +1628,14 @@ Explained Query: implementation %1[×]UA » %0:l0[×]ef ArrangeBy keys=[[]] // { arity: 5 } - Project (#0..=#2, #9, #16) // { arity: 5 } - Filter ((#22 = "1") OR (#22 = "2") OR (#22 = "3") OR (#22 = "4") OR (#22 = "5") OR (#22 = "6") OR (#22 = "7")) // { arity: 23 } - Get l0 // { arity: 23 } + Project (#0..=#4) // { arity: 5 } + Filter ((#5 = "1") OR (#5 = "2") OR (#5 = "3") OR (#5 = "4") OR (#5 = "5") OR (#5 = "6") OR (#5 = "7")) // { arity: 6 } + Get l0 // { arity: 6 } ArrangeBy keys=[[]] // { arity: 2 } Reduce aggregates=[sum(#0), count(*)] // { arity: 2 } - Project (#16) // { arity: 1 } - Filter (#16 > 0) AND ((#22 = "1") OR (#22 = "2") OR (#22 = "3") OR (#22 = "4") OR (#22 = "5") OR (#22 = "6") OR (#22 = "7")) // { arity: 23 } - Get l0 // { arity: 23 } + Project (#4) // { arity: 1 } + Filter (#4 > 0) AND ((#5 = "1") OR (#5 = "2") OR (#5 = "3") OR (#5 = "4") OR (#5 = "5") OR (#5 = "6") OR (#5 = "7")) // { arity: 6 } + Get l0 // { arity: 6 } cte l2 = Project (#0..=#2) // { arity: 3 } Get l1 // { arity: 5 } diff --git a/test/sqllogictest/distinct_arrangements.slt b/test/sqllogictest/distinct_arrangements.slt index 00f8118eaceb2..06ee4fea36a57 100644 --- a/test/sqllogictest/distinct_arrangements.slt +++ b/test/sqllogictest/distinct_arrangements.slt @@ -379,7 +379,6 @@ Arrange ReduceMinsMaxes Arrange ReduceMinsMaxes Arrange ReduceMinsMaxes Arrange recursive err -Arrange recursive err ArrangeAccumulable [val: empty] ArrangeBy[[Column(0)]] ArrangeBy[[Column(0)]] @@ -408,7 +407,6 @@ Arranged MinsMaxesHierarchical input Arranged MinsMaxesHierarchical input Arranged MinsMaxesHierarchical input Distinct recursive err -Distinct recursive err DistinctBy DistinctByErrorCheck ReduceAccumulable @@ -984,7 +982,7 @@ AccumulableErrorCheck 9 Arrange␠ReduceMinsMaxes 3 Arrange␠export␠iterative 2 Arrange␠export␠iterative␠err 2 -Arrange␠recursive␠err 4 +Arrange␠recursive␠err 3 ArrangeAccumulable␠[val:␠empty] 9 ArrangeBy[[CallBinary␠{␠func:␠JsonbGetString␠{␠stringify:␠true␠},␠expr1:␠Column(1),␠expr2:␠Literal(Ok(Row{[String("id")]}),␠ColumnType␠{␠scalar_type:␠String,␠nullable:␠false␠})␠}]] 2 ArrangeBy[[CallBinary␠{␠func:␠JsonbGetString␠{␠stringify:␠true␠},␠expr1:␠Column(2),␠expr2:␠Literal(Ok(Row{[String("id")]}),␠ColumnType␠{␠scalar_type:␠String,␠nullable:␠false␠})␠}]] 1 @@ -1031,7 +1029,7 @@ Arranged␠DistinctBy 47 Arranged␠MinsMaxesHierarchical␠input 14 Arranged␠ReduceInaccumulable 3 Arranged␠TopK␠input 68 -Distinct␠recursive␠err 4 +Distinct␠recursive␠err 3 DistinctBy 47 DistinctByErrorCheck 47 ReduceAccumulable 9 diff --git a/test/sqllogictest/explain/aggregates.slt b/test/sqllogictest/explain/aggregates.slt index a85fd80dc156c..a8845d6989e8f 100644 --- a/test/sqllogictest/explain/aggregates.slt +++ b/test/sqllogictest/explain/aggregates.slt @@ -235,18 +235,18 @@ EXPLAIN SELECT t1.a, array_agg(t1.c), array_agg(t2.c) FROM t t1 INNER JOIN t t2 Explained Query: With cte l0 = - Filter (#2) IS NOT NULL - ReadStorage materialize.public.t + Project (#0, #2) + Filter (#2) IS NOT NULL + ReadStorage materialize.public.t Return Project (#0, #1, #1) Reduce group_by=[#0] aggregates=[array_agg[order_by=[]](row(array[#1]))] Project (#0, #1) Join on=(#1 = #2) type=differential ArrangeBy keys=[[#1]] - Project (#0, #2) - Get l0 + Get l0 ArrangeBy keys=[[#0]] - Project (#2) + Project (#1) Get l0 Source materialize.public.t diff --git a/test/sqllogictest/explain/view.slt b/test/sqllogictest/explain/locally_optimized_plan.slt similarity index 87% rename from test/sqllogictest/explain/view.slt rename to test/sqllogictest/explain/locally_optimized_plan.slt index ce4319aec28a8..4fc5aec67f39c 100644 --- a/test/sqllogictest/explain/view.slt +++ b/test/sqllogictest/explain/locally_optimized_plan.slt @@ -32,19 +32,6 @@ WHERE mode cockroach -# Must explain the "Raw Plan". -query T multiline -EXPLAIN RAW PLAN FOR -VIEW v; ----- -Project (#0, #1, #3) - Filter (#1 = 100) - LeftOuterJoin (integer_to_bigint(#0) = #2) - Get materialize.public.accounts - Get materialize.public.account_details - -EOF - # Must explain the "Locally Optimized Plan". query T multiline EXPLAIN LOCALLY OPTIMIZED PLAN FOR @@ -158,7 +145,7 @@ FROM WHERE balance = 100; -# Ensure that flag whas used during planning. +# Ensure that flag was used during planning. query T multiline EXPLAIN LOCALLY OPTIMIZED PLAN FOR VIEW v; @@ -220,3 +207,51 @@ Return Get l1 EOF + +## Constant views +## (Regression tests for https://github.com/MaterializeInc/database-issues/issues/8985 ) + +statement ok +CREATE VIEW v2 AS SELECT 1; + +query T multiline +EXPLAIN LOCALLY OPTIMIZED PLAN FOR +REPLAN VIEW v2 +---- +Constant + - (1) + +EOF + +query T multiline +EXPLAIN LOCALLY OPTIMIZED PLAN FOR +CREATE VIEW v3 AS SELECT 5; +---- +Constant + - (5) + +EOF + +# LOCALLY OPTIMIZED PLAN FOR constant MV +query T multiline +EXPLAIN LOCALLY OPTIMIZED PLAN FOR +CREATE MATERIALIZED VIEW v3 AS SELECT 5; +---- +Constant + - (5) + +Target cluster: quickstart + +EOF + +# LOCALLY OPTIMIZED PLAN FOR constant peek +query T multiline +EXPLAIN LOCALLY OPTIMIZED PLAN FOR +SELECT 5; +---- +Constant + - (5) + +Target cluster: mz_catalog_server + +EOF diff --git a/test/sqllogictest/explain/optimized_plan_as_text.slt b/test/sqllogictest/explain/optimized_plan_as_text.slt index 28bb5111e20d3..3032ebac70573 100644 --- a/test/sqllogictest/explain/optimized_plan_as_text.slt +++ b/test/sqllogictest/explain/optimized_plan_as_text.slt @@ -246,7 +246,7 @@ Explained Query: ReadIndex on=t t_a_idx=[*** full scan ***] Negate Distinct project=[#0{b}] - Project (#1) + Project (#1{b}) ReadStorage materialize.public.mv Source materialize.public.mv @@ -269,7 +269,7 @@ Explained Query: Project (#0{a}) ReadIndex on=t t_a_idx=[*** full scan ***] Negate - Project (#1) + Project (#1{b}) ReadStorage materialize.public.mv Source materialize.public.mv @@ -398,10 +398,10 @@ Explained Query: CrossJoin type=differential ArrangeBy keys=[[]] Distinct project=[#0{b}] - Project (#1) + Project (#1{b}) Get l0 ArrangeBy keys=[[]] - Project (#1) + Project (#1{b}) ReadStorage materialize.public.mv Source materialize.public.mv @@ -421,7 +421,7 @@ SELECT (SELECT iv.a FROM iv WHERE iv.b = t.b LIMIT 1), (SELECT mv.a FROM mv WHER Explained Query: With cte l0 = - Project (#1) + Project (#1{b}) ReadIndex on=t t_a_idx=[*** full scan ***] cte l1 = Distinct project=[#0{b}] @@ -446,7 +446,7 @@ Explained Query: Filter (#1{b}) IS NOT NULL ReadStorage materialize.public.mv Return - Project (#2, #4) + Project (#2{a}, #4{a}) Join on=(#0{b} = #1{b} = #3{b}) type=delta ArrangeBy keys=[[#0{b}]] Get l0 @@ -498,7 +498,7 @@ Explained Query: Get l0 cte l2 = ArrangeBy keys=[[#0{b}]] - Project (#1) + Project (#1{b}) Get l0 cte l3 = Project (#0{a}..=#2{a}) @@ -516,11 +516,11 @@ Explained Query: Get l2 ArrangeBy keys=[[#0{b}]] Distinct project=[#0{b}] - Project (#1) + Project (#1{b}) Get l3 Project () ReadIndex on=t t_a_idx=[*** full scan ***] - Project (#0{a}, #2) + Project (#0{a}, #2{a}) Get l3 Used Indexes: @@ -659,7 +659,7 @@ Explained Query: ArrangeBy keys=[[]] Get l0 cte l2 = - Project (#0{a}, #1{b}, #3) + Project (#0{a}, #1{b}, #3{max}) Filter (#0{a} != #3{max}) Join on=(#0{a} = #2{a}) type=differential ArrangeBy keys=[[#0{a}]] @@ -672,7 +672,7 @@ Explained Query: Get l0 Get l1 Return - Project (#0{a}..=#2{max}, #4) + Project (#0{a}..=#2{max}, #4{max}) Filter (#0{a} != #4{max}) Join on=(#0{a} = #3{a}) type=differential ArrangeBy keys=[[#0{a}]] @@ -735,12 +735,12 @@ Explained Query: ArrangeBy keys=[[#1{b}]] Get l0 Return - Project (#0{a}, #2) + Project (#0{a}, #2{a}) Join on=(#1{b} = #3{b} = #4{b}) type=delta Get l1 Get l1 ArrangeBy keys=[[#0{b}]] - Project (#1) + Project (#1{b}) Get l0 Used Indexes: @@ -936,10 +936,10 @@ WHERE a = 0 GROUP BY a ---- Explained Query: - Project (#1{max_b}, #0) + Project (#1, #0{max_b}) Map (0) Reduce aggregates=[max(#0{b})] - Project (#1) + Project (#1{b}) ReadIndex on=materialize.public.t t_a_idx=[lookup value=(0)] Used Indexes: @@ -1623,3 +1623,113 @@ Source materialize.public.t6 Target cluster: no_replicas EOF + +# `count(*)` is planned as `count(true)`. We take care in EXPLAIN to show `count(true)` as `count(*)` to avoid confusing +# users. +query T multiline +EXPLAIN OPTIMIZED PLAN AS TEXT FOR +SELECT count(*) +FROM t5; +---- +Explained Query: + With + cte l0 = + Reduce aggregates=[count(*)] + Project () + ReadStorage materialize.public.t5 + Return + Union + Get l0 + Map (0) + Union + Negate + Project () + Get l0 + Constant + - () + +Source materialize.public.t5 + +Target cluster: no_replicas + +EOF + +query error DISTINCT \* not supported as function args +EXPLAIN OPTIMIZED PLAN AS TEXT FOR +SELECT count(distinct *) +FROM t5; + +# `count(true)` is currently also printed as `count(*)` in EXPLAIN, which I'd say is fine. +query T multiline +EXPLAIN OPTIMIZED PLAN AS TEXT FOR +SELECT count(true) +FROM t5; +---- +Explained Query: + With + cte l0 = + Reduce aggregates=[count(*)] + Project () + ReadStorage materialize.public.t5 + Return + Union + Get l0 + Map (0) + Union + Negate + Project () + Get l0 + Constant + - () + +Source materialize.public.t5 + +Target cluster: no_replicas + +EOF + +# But `count(DISTINCT true)` means an entirely different thing, so EXPLAIN shouldn't conflate it with `count(*)`. +query T multiline +EXPLAIN OPTIMIZED PLAN AS TEXT FOR +SELECT count(DISTINCT true) +FROM t5; +---- +Explained Query: + With + cte l0 = + Reduce aggregates=[count(distinct true)] + Project () + ReadStorage materialize.public.t5 + Return + Union + Get l0 + Map (0) + Union + Negate + Project () + Get l0 + Constant + - () + +Source materialize.public.t5 + +Target cluster: no_replicas + +EOF + +# EXPLAIN statement without an explicit stage or format, so that we test the default +# (Currently defaults to `EXPLAIN OPTIMIZED PLAN AS TEXT FOR`.) +query T multiline +EXPLAIN +SELECT a+b FROM t4; +---- +Explained Query: + Project (#3) + Map ((#0 + #1)) + ReadStorage materialize.public.t4 + +Source materialize.public.t4 + +Target cluster: no_replicas + +EOF diff --git a/test/sqllogictest/explain/optimized_plan_as_text_redacted.slt b/test/sqllogictest/explain/optimized_plan_as_text_redacted.slt index a3d6c2c0bc2c7..0516fe3ccd205 100644 --- a/test/sqllogictest/explain/optimized_plan_as_text_redacted.slt +++ b/test/sqllogictest/explain/optimized_plan_as_text_redacted.slt @@ -267,10 +267,10 @@ Explained Query: CrossJoin type=differential ArrangeBy keys=[[]] Distinct project=[#0{b}] - Project (#1) + Project (#1{b}) Get l0 ArrangeBy keys=[[]] - Project (#1) + Project (#1{b}) ReadStorage materialize.public.mv Source materialize.public.mv @@ -290,7 +290,7 @@ SELECT (SELECT iv.a FROM iv WHERE iv.b = t.b LIMIT 1), (SELECT mv.a FROM mv WHER Explained Query: With cte l0 = - Project (#1) + Project (#1{b}) ReadIndex on=t t_a_idx=[*** full scan ***] cte l1 = Distinct project=[#0{b}] @@ -315,7 +315,7 @@ Explained Query: Filter (#1{b}) IS NOT NULL ReadStorage materialize.public.mv Return - Project (#2, #4) + Project (#2{a}, #4{a}) Join on=(#0{b} = #1{b} = #3{b}) type=delta ArrangeBy keys=[[#0{b}]] Get l0 @@ -367,7 +367,7 @@ Explained Query: Get l0 cte l2 = ArrangeBy keys=[[#0{b}]] - Project (#1) + Project (#1{b}) Get l0 cte l3 = Project (#0{a}..=#2{a}) @@ -385,11 +385,11 @@ Explained Query: Get l2 ArrangeBy keys=[[#0{b}]] Distinct project=[#0{b}] - Project (#1) + Project (#1{b}) Get l3 Project () ReadIndex on=t t_a_idx=[*** full scan ***] - Project (#0{a}, #2) + Project (#0{a}, #2{a}) Get l3 Used Indexes: @@ -408,10 +408,10 @@ WHERE a = 0 GROUP BY a ---- Explained Query: - Project (#1{max_b}, #0) + Project (#1, #0{max_b}) Map (█) Reduce aggregates=[max(#0{b})] - Project (#1) + Project (#1{b}) ReadIndex on=materialize.public.t t_a_idx=[lookup value=(█)] Used Indexes: diff --git a/test/sqllogictest/explain/physical_plan_aggregates.slt b/test/sqllogictest/explain/physical_plan_aggregates.slt index f467dc71dfc81..2ad3178087387 100644 --- a/test/sqllogictest/explain/physical_plan_aggregates.slt +++ b/test/sqllogictest/explain/physical_plan_aggregates.slt @@ -457,15 +457,14 @@ Explained Query: raw=true arrangements[0]={ key=[#1], permutation={#0: #1, #1: #0}, thinning=(#0) } types=[integer, text] - Get::Collection l0 - project=(#0, #2) + Get::PassArrangements l0 raw=true ArrangeBy raw=true arrangements[0]={ key=[#0], permutation=id, thinning=() } types=[text] Get::Collection l0 - project=(#2) + project=(#1) raw=true With cte l0 = @@ -473,6 +472,7 @@ Explained Query: raw=true Source materialize.public.t + project=(#0, #2) filter=((#2) IS NOT NULL) Target cluster: quickstart diff --git a/test/sqllogictest/explain/raw_plan_as_text.slt b/test/sqllogictest/explain/raw_plan_as_text.slt index 841f23086a679..e5e013ade808b 100644 --- a/test/sqllogictest/explain/raw_plan_as_text.slt +++ b/test/sqllogictest/explain/raw_plan_as_text.slt @@ -661,3 +661,32 @@ With Mutually Recursive [recursion_limit=5, return_at_limit] Target cluster: mz_catalog_server EOF + +statement ok +CREATE TABLE accounts(id int, balance int); + +statement ok +CREATE TABLE account_details(id bigint, address string); + +statement ok +CREATE OR REPLACE VIEW v AS +SELECT + * +FROM + accounts a + LEFT JOIN account_details ad USING(id) +WHERE + balance = 100; + +# Must explain the "Raw Plan". +query T multiline +EXPLAIN RAW PLAN AS TEXT FOR +VIEW v; +---- +Project (#0, #1, #3) + Filter (#1 = 100) + LeftOuterJoin (integer_to_bigint(#0) = #2) + Get materialize.public.accounts + Get materialize.public.account_details + +EOF diff --git a/test/sqllogictest/freshmart.slt b/test/sqllogictest/freshmart.slt new file mode 100644 index 0000000000000..35082915c0639 --- /dev/null +++ b/test/sqllogictest/freshmart.slt @@ -0,0 +1,957 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +statement ok +CREATE TABLE materialized_view_refresh_log ( + view_name TEXT, + last_refresh TIMESTAMP DEFAULT now(), + refresh_duration DOUBLE PRECISION DEFAULT 0 +); + +statement ok +CREATE TABLE products ( + product_id INTEGER NOT NULL, + product_name VARCHAR(255) NOT NULL, + base_price NUMERIC(10, 2) NOT NULL, + category_id INTEGER NOT NULL, + supplier_id INTEGER NOT NULL, + available BOOLEAN NOT NULL, + last_update_time TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +statement ok +CREATE TABLE categories ( + category_id INTEGER NOT NULL, + category_name VARCHAR(255) NOT NULL, + parent_id INT +); + +statement ok +CREATE TABLE suppliers ( + supplier_id INTEGER NOT NULL, + supplier_name VARCHAR(255) NOT NULL +); + +statement ok +CREATE TABLE sales ( + sale_id INTEGER NOT NULL, + product_id INTEGER NOT NULL, + sale_price NUMERIC(10, 2) NOT NULL, + sale_date TIMESTAMP NOT NULL, + price NUMERIC(10, 2) NOT NULL +); + +statement ok +CREATE TABLE inventory ( + inventory_id INTEGER NOT NULL, + product_id INTEGER NOT NULL, + stock INTEGER NOT NULL, + warehouse_id INTEGER NOT NULL, + restock_date TIMESTAMP NOT NULL +); + +statement ok +CREATE TABLE promotions ( + promotion_id INTEGER NOT NULL, + product_id INTEGER NOT NULL, + promotion_discount NUMERIC(10, 2) NOT NULL, + start_date TIMESTAMP NOT NULL, + end_date TIMESTAMP NOT NULL, + active BOOLEAN NOT NULL, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() +); + +statement ok +CREATE TABLE shopping_cart ( + product_id INT NOT NULL, + product_name TEXT NOT NULL, + category_id INT NOT NULL, + price NUMERIC(10, 2) NOT NULL, + ts TIMESTAMP NOT NULL DEFAULT NOW() +); + +statement ok +CREATE INDEX idx_products_product_name ON products(product_name); + +statement ok +CREATE INDEX idx_sales_product_id ON sales(product_id); + +statement ok +CREATE INDEX idx_sales_sale_date ON sales(sale_date); + +statement ok +CREATE INDEX idx_sales_product_id_sale_date ON sales(product_id, sale_date); + +statement ok +CREATE INDEX idx_promotions_product_id ON promotions(product_id); + +statement ok +CREATE INDEX idx_promotions_active ON promotions(active); + +statement ok +CREATE INDEX idx_promotions_product_id_active ON promotions(product_id, active); + +statement ok +CREATE INDEX idx_inventory_product_id ON inventory(product_id); + +query T multiline +EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS TEXT FOR +WITH recent_prices AS ( + SELECT grp.product_id, AVG(price) AS avg_price + FROM (SELECT DISTINCT product_id FROM sales) grp, + LATERAL ( + SELECT product_id, price + FROM sales + WHERE sales.product_id = grp.product_id + ORDER BY sale_date DESC LIMIT 10 + ) sub + GROUP BY grp.product_id +), +promotion_effect AS ( + SELECT + p.product_id, + MIN(pr.promotion_discount) AS promotion_discount + FROM promotions pr + JOIN products p ON pr.product_id = p.product_id + WHERE pr.active = TRUE + GROUP BY p.product_id +), +popularity_score AS ( + SELECT + s.product_id, + RANK() OVER (PARTITION BY p.category_id ORDER BY COUNT(s.sale_id) DESC) AS popularity_rank, + COUNT(s.sale_id) AS sale_count + FROM sales s + JOIN products p ON s.product_id = p.product_id + GROUP BY s.product_id, p.category_id +), +inventory_status AS ( + SELECT + i.product_id, + SUM(i.stock) AS total_stock, + RANK() OVER (ORDER BY SUM(i.stock) DESC) AS stock_rank + FROM inventory i + GROUP BY i.product_id +), +high_demand_products AS ( + SELECT + p.product_id, + AVG(s.sale_price) AS avg_sale_price, + COUNT(s.sale_id) AS total_sales + FROM products p + JOIN sales s ON p.product_id = s.product_id + GROUP BY p.product_id + HAVING COUNT(s.sale_id) > (SELECT AVG(total_sales) FROM (SELECT COUNT(*) AS total_sales FROM sales GROUP BY product_id) subquery) +), +dynamic_pricing AS ( + SELECT + p.product_id, + p.base_price, + CASE + WHEN pop.popularity_rank <= 3 THEN 1.2 + WHEN pop.popularity_rank BETWEEN 4 AND 10 THEN 1.1 + ELSE 0.9 + END AS popularity_adjustment, + rp.avg_price, + COALESCE(1.0 - (pe.promotion_discount / 100), 1) AS promotion_discount, + CASE + WHEN inv.stock_rank <= 3 THEN 1.1 + WHEN inv.stock_rank BETWEEN 4 AND 10 THEN 1.05 + ELSE 1 + END AS stock_adjustment, + CASE + WHEN p.base_price > rp.avg_price THEN 1 + (p.base_price - rp.avg_price) / rp.avg_price + ELSE 1 - (rp.avg_price - p.base_price) / rp.avg_price + END AS demand_multiplier, + hd.avg_sale_price, + CASE + WHEN p.product_name ilike '%cheap%' THEN 0.8 + ELSE 1.0 + END AS additional_discount + FROM products p + LEFT JOIN recent_prices rp ON p.product_id = rp.product_id + LEFT JOIN promotion_effect pe ON p.product_id = pe.product_id + JOIN popularity_score pop ON p.product_id = pop.product_id + LEFT JOIN inventory_status inv ON p.product_id = inv.product_id + LEFT JOIN high_demand_products hd ON p.product_id = hd.product_id +) +SELECT + dp.product_id, + dp.base_price * dp.popularity_adjustment * dp.promotion_discount * dp.stock_adjustment * dp.demand_multiplier * dp.additional_discount AS adjusted_price, + p.last_update_time +FROM dynamic_pricing dp +JOIN products p ON dp.product_id = p.product_id; +---- +Explained Query: + With + cte l0 = + Project (#0{product_id}, #3) + Map ((#1{sum_price} / bigint_to_numeric(case when (#2{count} = 0) then null else #2{count} end))) + Reduce group_by=[#0{product_id}] aggregates=[sum(#1{price}), count(*)] + Project (#0{product_id}, #2{price}) + TopK group_by=[#0{product_id}] order_by=[#1{sale_date} desc nulls_first] limit=10 + Project (#1{product_id}, #3{sale_date}, #4{price}) + ReadIndex on=sales idx_sales_product_id=[*** full scan ***] + cte l1 = + Project (#0{product_id}) + ReadIndex on=products idx_products_product_name=[*** full scan ***] + cte l2 = + ArrangeBy keys=[[#0{product_id}]] + Get l1 + cte l3 = + Reduce group_by=[#0{product_id}] aggregates=[min(#1{promotion_discount})] + Project (#1{product_id}, #2{promotion_discount}) + Join on=(#1{product_id} = #8{product_id}) type=differential + ArrangeBy keys=[[#1{product_id}]] + ReadIndex on=materialize.public.promotions idx_promotions_active=[lookup value=(true)] + Get l2 + cte l4 = + Distinct project=[#0{product_id}] + Get l1 + cte l5 = + ArrangeBy keys=[[#1{product_id}]] + ReadIndex on=sales idx_sales_product_id=[differential join] + cte l6 = + Project (#0{product_id}..=#2{base_price}, #10..=#12) + Map (case when (#5) IS NULL then null else #4 end, case when (#8) IS NULL then null else #7{min_promotion_discount} end) + Join on=(#0{product_id} = #3{product_id} = #6{product_id} = #9) type=delta + ArrangeBy keys=[[#0{product_id}]] + Project (#0{product_id}..=#2{base_price}) + ReadIndex on=products idx_products_product_name=[*** full scan ***] + ArrangeBy keys=[[#0{product_id}]] + Union + Map (true) + Get l0 + Map (null, null) + Threshold + Union + Negate + Project (#0{product_id}) + Get l0 + Get l4 + ArrangeBy keys=[[#0{product_id}]] + Union + Map (true) + Get l3 + Map (null, null) + Threshold + Union + Negate + Project (#0{product_id}) + Get l3 + Get l4 + ArrangeBy keys=[[#0]] + Project (#2, #3) + Filter (#2) IS NOT NULL + Map (record_get[0](record_get[1](#1)), record_get[0](#1)) + FlatMap unnest_list(#0{rank}) + Project (#1{rank}) + Reduce group_by=[#1{category_id}] aggregates=[rank[order_by=[#0{product_id} desc nulls_first]](row(list[row(#0{product_id}, #1{category_id}, #2{count})], #2{count}))] + Reduce group_by=[#0{product_id}, #1{category_id}] aggregates=[count(*)] + Project (#1{product_id}, #6{category_id}) + Join on=(#1{product_id} = #5{product_id}) type=differential + Get l5 + ArrangeBy keys=[[#0{product_id}]] + Project (#0{product_id}, #3{category_id}) + ReadIndex on=products idx_products_product_name=[*** full scan ***] + cte l7 = + Project (#2, #3) + Filter (#2) IS NOT NULL + Map (record_get[0](record_get[1](#1)), record_get[0](#1)) + FlatMap unnest_list(#0{rank}) + Reduce aggregates=[rank[order_by=[#0{product_id} desc nulls_first]](row(list[row(#0{product_id}, #1{sum_stock})], #1{sum_stock}))] + Reduce group_by=[#0{product_id}] aggregates=[sum(#1{stock})] + Project (#1{product_id}, #2{stock}) + ReadIndex on=inventory idx_inventory_product_id=[*** full scan ***] + cte l8 = + Project (#0{product_id}) + Filter (bigint_to_numeric(#1{count}) > (#2{sum_count} / bigint_to_numeric(case when (#3{count} = 0) then null else #3{count} end))) + CrossJoin type=differential + ArrangeBy keys=[[]] + Reduce group_by=[#0{product_id}] aggregates=[count(*)] + Project (#0{product_id}) + Join on=(#0{product_id} = #2{product_id}) type=differential + Get l2 + Get l5 + ArrangeBy keys=[[]] + Reduce aggregates=[sum(#0{count}), count(*)] + Project (#1{count}) + Reduce group_by=[#0{product_id}] aggregates=[count(*)] + Project (#1{product_id}) + ReadIndex on=sales idx_sales_product_id=[*** full scan ***] + cte l9 = + Distinct project=[#0{product_id}] + Project (#0{product_id}) + Get l6 + Return + Project (#0{product_id}, #13, #11{last_update_time}) + Map (case when (#8) IS NULL then null else #7 end, (((((#2{base_price} * case when (#3 <= 3) then 1.2 else case when ((#3 <= 10) AND (#3 >= 4)) then 1.1 else 0.9 end end) * coalesce((1 - (#5 / 100)), 1)) * case when (#12 <= 3) then 1.1 else case when ((#12 <= 10) AND (#12 >= 4)) then 1.05 else 1 end end) * case when (#2{base_price} > #4) then (1 + ((#2{base_price} - #4) / #4)) else (1 - ((#4 - #2{base_price}) / #4)) end) * case when ilike["%cheap%"](varchar_to_text(#1{product_name})) then 0.8 else 1 end)) + Join on=(#0{product_id} = #6{product_id} = #9{product_id} = #10{product_id}) type=delta + ArrangeBy keys=[[#0{product_id}]] + Get l6 + ArrangeBy keys=[[#0{product_id}]] + Union + Map (true) + Get l7 + Map (null, null) + Threshold + Union + Negate + Project (#0) + Get l7 + Get l9 + ArrangeBy keys=[[#0{product_id}]] + Union + Get l8 + Threshold + Union + Negate + Get l8 + Get l9 + ArrangeBy keys=[[#0{product_id}]] + Project (#0{product_id}, #6{last_update_time}) + ReadIndex on=products idx_products_product_name=[*** full scan ***] + +Used Indexes: + - materialize.public.idx_products_product_name (*** full scan ***) + - materialize.public.idx_sales_product_id (*** full scan ***, differential join) + - materialize.public.idx_promotions_active (lookup) + - materialize.public.idx_inventory_product_id (*** full scan ***) + +Target cluster: quickstart + +EOF + +statement ok +CREATE VIEW dynamic_pricing AS +WITH recent_prices AS ( + SELECT grp.product_id, AVG(price) AS avg_price + FROM (SELECT DISTINCT product_id FROM sales) grp, + LATERAL ( + SELECT product_id, price + FROM sales + WHERE sales.product_id = grp.product_id + ORDER BY sale_date DESC LIMIT 10 + ) sub + GROUP BY grp.product_id +), +promotion_effect AS ( + SELECT + p.product_id, + MIN(pr.promotion_discount) AS promotion_discount + FROM promotions pr + JOIN products p ON pr.product_id = p.product_id + WHERE pr.active = TRUE + GROUP BY p.product_id +), +popularity_score AS ( + SELECT + s.product_id, + RANK() OVER (PARTITION BY p.category_id ORDER BY COUNT(s.sale_id) DESC) AS popularity_rank, + COUNT(s.sale_id) AS sale_count + FROM sales s + JOIN products p ON s.product_id = p.product_id + GROUP BY s.product_id, p.category_id +), +inventory_status AS ( + SELECT + i.product_id, + SUM(i.stock) AS total_stock, + RANK() OVER (ORDER BY SUM(i.stock) DESC) AS stock_rank + FROM inventory i + GROUP BY i.product_id +), +high_demand_products AS ( + SELECT + p.product_id, + AVG(s.sale_price) AS avg_sale_price, + COUNT(s.sale_id) AS total_sales + FROM products p + JOIN sales s ON p.product_id = s.product_id + GROUP BY p.product_id + HAVING COUNT(s.sale_id) > (SELECT AVG(total_sales) FROM (SELECT COUNT(*) AS total_sales FROM sales GROUP BY product_id) subquery) +), +dynamic_pricing AS ( + SELECT + p.product_id, + p.base_price, + CASE + WHEN pop.popularity_rank <= 3 THEN 1.2 + WHEN pop.popularity_rank BETWEEN 4 AND 10 THEN 1.1 + ELSE 0.9 + END AS popularity_adjustment, + rp.avg_price, + COALESCE(1.0 - (pe.promotion_discount / 100), 1) AS promotion_discount, + CASE + WHEN inv.stock_rank <= 3 THEN 1.1 + WHEN inv.stock_rank BETWEEN 4 AND 10 THEN 1.05 + ELSE 1 + END AS stock_adjustment, + CASE + WHEN p.base_price > rp.avg_price THEN 1 + (p.base_price - rp.avg_price) / rp.avg_price + ELSE 1 - (rp.avg_price - p.base_price) / rp.avg_price + END AS demand_multiplier, + hd.avg_sale_price, + CASE + WHEN p.product_name ilike '%cheap%' THEN 0.8 + ELSE 1.0 + END AS additional_discount + FROM products p + LEFT JOIN recent_prices rp ON p.product_id = rp.product_id + LEFT JOIN promotion_effect pe ON p.product_id = pe.product_id + JOIN popularity_score pop ON p.product_id = pop.product_id + LEFT JOIN inventory_status inv ON p.product_id = inv.product_id + LEFT JOIN high_demand_products hd ON p.product_id = hd.product_id +) +SELECT + dp.product_id, + dp.base_price * dp.popularity_adjustment * dp.promotion_discount * dp.stock_adjustment * dp.demand_multiplier * dp.additional_discount AS adjusted_price, + p.last_update_time +FROM dynamic_pricing dp +JOIN products p ON dp.product_id = p.product_id; + +statement ok +CREATE INDEX IF NOT EXISTS dynamic_pricing_product_id_idx ON dynamic_pricing (product_id); + +statement ok +CREATE MATERIALIZED VIEW mv_dynamic_pricing AS +WITH recent_prices AS ( + SELECT grp.product_id, AVG(price) AS avg_price + FROM (SELECT DISTINCT product_id FROM sales) grp, + LATERAL ( + SELECT product_id, price + FROM sales + WHERE sales.product_id = grp.product_id + ORDER BY sale_date DESC LIMIT 10 + ) sub + GROUP BY grp.product_id +), +promotion_effect AS ( + SELECT + p.product_id, + MIN(pr.promotion_discount) AS promotion_discount + FROM promotions pr + JOIN products p ON pr.product_id = p.product_id + WHERE pr.active = TRUE + GROUP BY p.product_id +), +popularity_score AS ( + SELECT + s.product_id, + RANK() OVER (PARTITION BY p.category_id ORDER BY COUNT(s.sale_id) DESC) AS popularity_rank, + COUNT(s.sale_id) AS sale_count + FROM sales s + JOIN products p ON s.product_id = p.product_id + GROUP BY s.product_id, p.category_id +), +inventory_status AS ( + SELECT + i.product_id, + SUM(i.stock) AS total_stock, + RANK() OVER (ORDER BY SUM(i.stock) DESC) AS stock_rank + FROM inventory i + GROUP BY i.product_id +), +high_demand_products AS ( + SELECT + p.product_id, + AVG(s.sale_price) AS avg_sale_price, + COUNT(s.sale_id) AS total_sales + FROM products p + JOIN sales s ON p.product_id = s.product_id + GROUP BY p.product_id + HAVING COUNT(s.sale_id) > (SELECT AVG(total_sales) FROM (SELECT COUNT(*) AS total_sales FROM sales GROUP BY product_id) subquery) +), +dynamic_pricing AS ( + SELECT + p.product_id, + p.base_price, + CASE + WHEN pop.popularity_rank <= 3 THEN 1.2 + WHEN pop.popularity_rank BETWEEN 4 AND 10 THEN 1.1 + ELSE 0.9 + END AS popularity_adjustment, + rp.avg_price, + COALESCE(1.0 - (pe.promotion_discount / 100), 1) AS promotion_discount, + CASE + WHEN inv.stock_rank <= 3 THEN 1.1 + WHEN inv.stock_rank BETWEEN 4 AND 10 THEN 1.05 + ELSE 1 + END AS stock_adjustment, + CASE + WHEN p.base_price > rp.avg_price THEN 1 + (p.base_price - rp.avg_price) / rp.avg_price + ELSE 1 - (rp.avg_price - p.base_price) / rp.avg_price + END AS demand_multiplier, + hd.avg_sale_price, + CASE + WHEN p.product_name ilike '%cheap%' THEN 0.8 + ELSE 1.0 + END AS additional_discount + FROM products p + LEFT JOIN recent_prices rp ON p.product_id = rp.product_id + LEFT JOIN promotion_effect pe ON p.product_id = pe.product_id + JOIN popularity_score pop ON p.product_id = pop.product_id + LEFT JOIN inventory_status inv ON p.product_id = inv.product_id + LEFT JOIN high_demand_products hd ON p.product_id = hd.product_id +) +SELECT + dp.product_id, + dp.base_price * dp.popularity_adjustment * dp.promotion_discount * dp.stock_adjustment * dp.demand_multiplier * dp.additional_discount AS adjusted_price, + p.last_update_time +FROM dynamic_pricing dp +JOIN products p ON dp.product_id = p.product_id; + +statement ok +CREATE INDEX idx_product_id ON mv_dynamic_pricing(product_id); + +query T multiline +EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS TEXT FOR +SELECT + sc.product_id, + sc.product_name, + c.category_id, + c.category_name, + dp.adjusted_price AS price, + COALESCE(SUM(i.stock), 0) as available_stock +FROM + shopping_cart sc +JOIN + products p ON sc.product_id = p.product_id +JOIN + categories c ON p.category_id = c.category_id +JOIN + dynamic_pricing dp ON p.product_id = dp.product_id +LEFT JOIN + inventory i ON p.product_id = i.product_id +GROUP BY + sc.product_id, + sc.product_name, + c.category_id, + c.category_name, + dp.adjusted_price; +---- +Explained Query: + With + cte l0 = + Project (#0{product_id}, #1{product_name}, #3{category_id}, #5{category_name}, #7{adjusted_price}) + Join on=(#0{product_id} = #2{product_id} = #6{product_id} AND #3{category_id} = #4{category_id}) type=delta + ArrangeBy keys=[[#0{product_id}]] + Project (#0{product_id}, #1{product_name}) + ReadStorage materialize.public.shopping_cart + ArrangeBy keys=[[#0{product_id}], [#1{category_id}]] + Project (#0{product_id}, #3{category_id}) + ReadIndex on=products idx_products_product_name=[*** full scan ***] + ArrangeBy keys=[[#0{category_id}]] + Project (#0{category_id}, #1{category_name}) + ReadStorage materialize.public.categories + ArrangeBy keys=[[#0{product_id}]] + ReadIndex on=dynamic_pricing dynamic_pricing_product_id_idx=[delta join lookup] + cte l1 = + ArrangeBy keys=[[#0{product_id}]] + Get l0 + cte l2 = + Project (#0{product_id}..=#4{adjusted_price}, #7{stock}) + Join on=(#0{product_id} = #6{product_id}) type=differential + Get l1 + ArrangeBy keys=[[#1{product_id}]] + ReadIndex on=inventory idx_inventory_product_id=[differential join] + Return + Project (#0{product_id}..=#4{adjusted_price}, #6) + Map (coalesce(#5{sum_stock}, 0)) + Reduce group_by=[#0{product_id}..=#4{adjusted_price}] aggregates=[sum(#5{stock})] + Union + Map (null) + Union + Negate + Project (#0{product_id}..=#4{adjusted_price}) + Join on=(#0{product_id} = #5{product_id}) type=differential + Get l1 + ArrangeBy keys=[[#0{product_id}]] + Distinct project=[#0{product_id}] + Project (#0{product_id}) + Get l2 + Get l0 + Get l2 + +Source materialize.public.categories +Source materialize.public.shopping_cart + +Used Indexes: + - materialize.public.idx_products_product_name (*** full scan ***) + - materialize.public.idx_inventory_product_id (differential join) + - materialize.public.dynamic_pricing_product_id_idx (delta join lookup) + +Target cluster: quickstart + +EOF + +statement ok +CREATE VIEW dynamic_price_shopping_cart AS SELECT + sc.product_id, + sc.product_name, + c.category_id, + c.category_name, + dp.adjusted_price AS price, + COALESCE(SUM(i.stock), 0) as available_stock +FROM + shopping_cart sc +JOIN + products p ON sc.product_id = p.product_id +JOIN + categories c ON p.category_id = c.category_id +JOIN + dynamic_pricing dp ON p.product_id = dp.product_id +LEFT JOIN + inventory i ON p.product_id = i.product_id +GROUP BY + sc.product_id, + sc.product_name, + c.category_id, + c.category_name, + dp.adjusted_price; + +statement ok +CREATE DEFAULT INDEX IF NOT EXISTS dynamic_price_shopping_cart_idx ON dynamic_price_shopping_cart; + +query T multiline +EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS TEXT FOR +WITH MUTUALLY RECURSIVE + rollup(category_id int, total numeric(20,10), item_count int) AS ( + -- Base: calculate each category's direct total and item count + SELECT + c.category_id, + COALESCE(SUM(d.price), 0)::numeric(20,10), + COUNT(d.price) + FROM categories c + LEFT JOIN dynamic_price_shopping_cart d + ON c.category_id = d.category_id + GROUP BY c.category_id + UNION ALL + -- Recursive: bubble each category's totals upward to its parent + SELECT + c.parent_id, + r.total, + r.item_count + FROM rollup r + JOIN categories c + ON r.category_id = c.category_id + WHERE c.parent_id IS NOT NULL + ), + totals(category_id int, total numeric(20,10), item_count int) AS ( + SELECT + c.category_id, + SUM(r.total)::numeric(20,10) AS total, + SUM(r.item_count) AS item_count + FROM categories c + JOIN rollup r + ON c.category_id = r.category_id + GROUP BY c.category_id + HAVING SUM(r.item_count) > 0 -- only include categories with items + ), + has_subcategories(category_id int, has_subcategory boolean) AS ( + SELECT + a.category_id, + count(*) FILTER (WHERE b.parent_id IS NOT NULL) > 0 AS has_subcategory + FROM categories a + LEFT JOIN categories b ON a.category_id = b.parent_id + GROUP BY a.category_id + ) +SELECT + t.category_id, + c.parent_id, + s.has_subcategory, + c.category_name, + t.total, + t.item_count +FROM totals t +JOIN categories c USING (category_id) +JOIN has_subcategories s USING (category_id); +---- +Explained Query: + With + cte l0 = + Project (#0{category_id}) + ReadStorage materialize.public.categories + cte l1 = + ArrangeBy keys=[[#0{category_id}]] + Get l0 + cte l2 = + Project (#0{category_id}, #2{price}) + Join on=(#0{category_id} = #1{category_id}) type=differential + Get l1 + ArrangeBy keys=[[#0{category_id}]] + Project (#2{category_id}, #4{price}) + ReadIndex on=dynamic_price_shopping_cart dynamic_price_shopping_cart_idx=[*** full scan ***] + Return + With Mutually Recursive + cte l3 = + Project (#0{category_id}, #1, #3) + Map (bigint_to_integer(#2{count_price})) + Union + Project (#0{category_id}, #3, #2{count_price}) + Map (adjust_numeric_scale(coalesce(#1{sum_price}, 0))) + Reduce group_by=[#0{category_id}] aggregates=[sum(#1{price}), count(#1{price})] + Union + Map (null) + Union + Negate + Project (#0{category_id}) + Join on=(#0{category_id} = #1{category_id}) type=differential + Get l1 + ArrangeBy keys=[[#0{category_id}]] + Distinct project=[#0{category_id}] + Project (#0{category_id}) + Get l2 + Get l0 + Get l2 + Project (#4{parent_id}, #1, #5) + Map (integer_to_bigint(#2)) + Join on=(#0 = #3{category_id}) type=differential + ArrangeBy keys=[[#0{category_id}]] + Get l3 + ArrangeBy keys=[[#0{category_id}]] + Project (#0{category_id}, #2{parent_id}) + Filter (#2{parent_id}) IS NOT NULL + ReadStorage materialize.public.categories + Return + With + cte l4 = + Project (#0{category_id}) + ReadStorage materialize.public.categories + cte l5 = + ArrangeBy keys=[[#0{category_id}]] + Get l4 + cte l6 = + Project (#0{category_id}) + Join on=(#0{category_id} = #1{parent_id}) type=differential + Get l5 + ArrangeBy keys=[[#0{parent_id}]] + Project (#2{parent_id}) + Filter (#2{parent_id}) IS NOT NULL + ReadStorage materialize.public.categories + Return + Project (#0{category_id}, #5{parent_id}, #9, #4{category_name}, #1{sum}, #8) + Filter (#2{sum} > 0) + Map (bigint_to_integer(#2{sum}), (#7{count} > 0)) + Join on=(#0{category_id} = #3{category_id} = #6{category_id}) type=delta + ArrangeBy keys=[[#0{category_id}]] + Reduce group_by=[#0{category_id}] aggregates=[sum(#1), sum(#2)] + Project (#0{category_id}, #2, #3) + Join on=(#0{category_id} = #1{category_id}) type=differential + Get l5 + ArrangeBy keys=[[#0{category_id}]] + Get l3 + ArrangeBy keys=[[#0{category_id}]] + ReadStorage materialize.public.categories + ArrangeBy keys=[[#0{category_id}]] + Reduce group_by=[#0{category_id}] aggregates=[count((null OR (#1{category_id}) IS NOT NULL))] + Union + Map (null) + Union + Negate + Project (#0{category_id}) + Join on=(#0{category_id} = #1{category_id}) type=differential + Get l5 + ArrangeBy keys=[[#0{category_id}]] + Distinct project=[#0{category_id}] + Get l6 + Get l4 + Project (#0{category_id}, #0{category_id}) + Get l6 + +Source materialize.public.categories + +Used Indexes: + - materialize.public.dynamic_price_shopping_cart_idx (*** full scan ***) + +Target cluster: quickstart + +EOF + +statement ok +CREATE VIEW category_totals AS +WITH MUTUALLY RECURSIVE + rollup(category_id int, total numeric(20,10), item_count int) AS ( + -- Base: calculate each category's direct total and item count + SELECT + c.category_id, + COALESCE(SUM(d.price), 0)::numeric(20,10), + COUNT(d.price) + FROM categories c + LEFT JOIN dynamic_price_shopping_cart d + ON c.category_id = d.category_id + GROUP BY c.category_id + UNION ALL + -- Recursive: bubble each category's totals upward to its parent + SELECT + c.parent_id, + r.total, + r.item_count + FROM rollup r + JOIN categories c + ON r.category_id = c.category_id + WHERE c.parent_id IS NOT NULL + ), + totals(category_id int, total numeric(20,10), item_count int) AS ( + SELECT + c.category_id, + SUM(r.total)::numeric(20,10) AS total, + SUM(r.item_count) AS item_count + FROM categories c + JOIN rollup r + ON c.category_id = r.category_id + GROUP BY c.category_id + HAVING SUM(r.item_count) > 0 -- only include categories with items + ), + has_subcategories(category_id int, has_subcategory boolean) AS ( + SELECT + a.category_id, + count(*) FILTER (WHERE b.parent_id IS NOT NULL) > 0 AS has_subcategory + FROM categories a + LEFT JOIN categories b ON a.category_id = b.parent_id + GROUP BY a.category_id + ) +SELECT + t.category_id, + c.parent_id, + s.has_subcategory, + c.category_name, + t.total, + t.item_count +FROM totals t +JOIN categories c USING (category_id) +JOIN has_subcategories s USING (category_id); + +statement ok +CREATE DEFAULT INDEX IF NOT EXISTS category_totals_category_id_idx ON category_totals; + +statement ok +CREATE INDEX IF NOT EXISTS category_totals_parent_id_idx ON category_totals (parent_id); + +query T multiline +EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS TEXT FOR SELECT EXISTS( SELECT 1 FROM inventory WHERE product_id = 1 ); +---- +Explained Query: + With + cte l0 = + Distinct project=[] + Project () + ReadIndex on=materialize.public.inventory idx_inventory_product_id=[lookup value=(1)] + Return + Union + Map (true) + Get l0 + Map (false) + Union + Negate + Get l0 + Constant + - () + +Used Indexes: + - materialize.public.idx_inventory_product_id (lookup) + +Target cluster: quickstart + +EOF + +query T multiline +EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS TEXT FOR WITH selected_product AS ( + SELECT product_id, product_name, category_id, base_price + FROM products + WHERE product_id != 1 + LIMIT 1 + ) +SELECT product_id, product_name, category_id, base_price + FROM selected_product; +---- +Explained Query: + TopK limit=1 + Project (#0{product_id}, #1{product_name}, #3{category_id}, #2{base_price}) + Filter (#0{product_id} != 1) + ReadIndex on=products idx_products_product_name=[*** full scan ***] + +Used Indexes: + - materialize.public.idx_products_product_name (*** full scan ***) + +Target cluster: quickstart + +EOF +query T multiline +EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS TEXT FOR +SELECT product_id, adjusted_price, last_update_time +FROM dynamic_pricing +WHERE product_id = 5; +---- +Explained Query (fast path): + Project (#0{product_id}..=#2{last_update_time}) + ReadIndex on=materialize.public.dynamic_pricing dynamic_pricing_product_id_idx=[lookup value=(5)] + +Used Indexes: + - materialize.public.dynamic_pricing_product_id_idx (lookup) + +Target cluster: quickstart + +EOF + +query T multiline +EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS TEXT FOR +SELECT product_id, adjusted_price, last_update_time +FROM mv_dynamic_pricing +WHERE product_id = 5; +---- +Explained Query (fast path): + Project (#0{product_id}..=#2{last_update_time}) + ReadIndex on=materialize.public.mv_dynamic_pricing idx_product_id=[lookup value=(5)] + +Used Indexes: + - materialize.public.idx_product_id (lookup) + +Target cluster: quickstart + +EOF + +query T multiline +EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS TEXT FOR +SELECT DISTINCT category_id, category_name +FROM categories +ORDER BY category_name; +---- +Explained Query: + Finish order_by=[#1{category_name} asc nulls_last] output=[#0, #1] + Distinct project=[#0{category_id}, #1{category_name}] + Project (#0{category_id}, #1{category_name}) + ReadStorage materialize.public.categories + +Source materialize.public.categories + +Target cluster: quickstart + +EOF + +query T multiline +EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS TEXT FOR +SELECT + category_name, + item_count, + total as subtotal +FROM category_totals +ORDER BY category_name ASC; +---- +Explained Query (fast path): + Finish order_by=[#0 asc nulls_last] output=[#0..=#2] + Project (#3{category_name}, #5{item_count}, #4{total}) + ReadIndex on=materialize.public.category_totals category_totals_category_id_idx=[*** full scan ***] + +Used Indexes: + - materialize.public.category_totals_category_id_idx (*** full scan ***) + +Target cluster: quickstart + +EOF diff --git a/test/sqllogictest/joins.slt b/test/sqllogictest/joins.slt index cda86e031474f..8bc87303bbfd6 100644 --- a/test/sqllogictest/joins.slt +++ b/test/sqllogictest/joins.slt @@ -549,8 +549,9 @@ EXPLAIN WITH(arity, join implementations) SELECT name, id FROM v4362 WHERE name Explained Query: With cte l0 = - Filter (#1 = 1) // { arity: 2 } - ReadStorage materialize.public.t4362 // { arity: 2 } + Project (#0) // { arity: 1 } + Filter (#1 = 1) // { arity: 2 } + ReadStorage materialize.public.t4362 // { arity: 2 } Return // { arity: 2 } Project (#0, #1) // { arity: 2 } Join on=(#0 = #2) type=differential // { arity: 3 } @@ -560,14 +561,13 @@ Explained Query: ReadStorage materialize.public.t4362 // { arity: 2 } ArrangeBy keys=[[#0]] // { arity: 1 } Union // { arity: 1 } - Project (#0) // { arity: 1 } - Get l0 // { arity: 2 } + Get l0 // { arity: 1 } Map (error("more than one record produced in subquery")) // { arity: 1 } Project () // { arity: 0 } Filter (#0 > 1) // { arity: 1 } Reduce aggregates=[count(*)] // { arity: 1 } Project () // { arity: 0 } - Get l0 // { arity: 2 } + Get l0 // { arity: 1 } Source materialize.public.t4362 diff --git a/test/sqllogictest/ldbc_bi.slt b/test/sqllogictest/ldbc_bi.slt index 4df0be1726e7d..9bea856e5a4c4 100644 --- a/test/sqllogictest/ldbc_bi.slt +++ b/test/sqllogictest/ldbc_bi.slt @@ -448,14 +448,14 @@ Explained Query: Filter (#0{creationdate} < 2010-06-11 09:21:46 UTC) // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } Return // { arity: 7 } - Project (#0..=#2, #4, #7, #5{sum}, #8) // { arity: 7 } + Project (#0..=#2, #4{count}, #7, #5{sum}, #8) // { arity: 7 } Map ((#5{sum} / bigint_to_numeric(case when (#6{count} = 0) then null else #6{count} end)), (bigint_to_numeric(#4{count}) / #3)) // { arity: 9 } Reduce group_by=[#1..=#4] aggregates=[count(*), sum(integer_to_bigint(#0{length})), count(integer_to_bigint(#0{length}))] // { arity: 7 } CrossJoin type=differential // { arity: 5 } implementation %1[×]U » %0:message[×]if ArrangeBy keys=[[]] // { arity: 4 } - Project (#8, #13..=#15) // { arity: 4 } + Project (#8{length}, #13..=#15) // { arity: 4 } Filter (#0{creationdate} < 2010-06-11 09:21:46 UTC) AND (#4{content}) IS NOT NULL // { arity: 16 } Map (extract_year_tstz(#0{creationdate}), (#12{parentmessageid}) IS NOT NULL, case when (#8{length} < 40) then 0 else case when (#8{length} < 80) then 1 else case when (#8{length} < 160) then 2 else 3 end end end) // { arity: 16 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } @@ -520,7 +520,7 @@ Explained Query: Finish order_by=[#3 desc nulls_first, #0{name} asc nulls_last] limit=100 output=[#0..=#3] With cte l0 = - Project (#5, #6) // { arity: 2 } + Project (#5{id}, #6{name}) // { arity: 2 } Join on=(#0{id} = #8{typetagclassid}) type=differential // { arity: 9 } implementation %0:tagclass[#0]KAe » %1:tag[#3]KAe @@ -532,7 +532,7 @@ Explained Query: Filter (#0{id}) IS NOT NULL // { arity: 2 } Get l0 // { arity: 2 } cte l2 = - Project (#1{count}, #3, #4) // { arity: 3 } + Project (#1{name}, #3{count}, #4{count}) // { arity: 3 } Join on=(#0{id} = #2{id}) type=differential // { arity: 5 } implementation %1[#0]UKA » %0:l1[#0]K @@ -540,7 +540,7 @@ Explained Query: Get l1 // { arity: 2 } ArrangeBy keys=[[#0{id}]] // { arity: 3 } Reduce group_by=[#0{id}] aggregates=[count(case when (#2{creationdate} < 2010-09-16 00:00:00 UTC) then #1{messageid} else null end), count(case when (#2{creationdate} >= 2010-09-16 00:00:00 UTC) then #1{messageid} else null end)] // { arity: 3 } - Project (#0{id}, #2{creationdate}, #4) // { arity: 3 } + Project (#0{id}, #2{messageid}, #4{creationdate}) // { arity: 3 } Filter (#4{creationdate} < 2010-12-25 00:00:00 UTC) AND (#4{creationdate} >= 2010-06-08 00:00:00 UTC) // { arity: 17 } Join on=(#0{id} = #3{tagid} AND #2{messageid} = #5{messageid}) type=delta // { arity: 17 } implementation @@ -563,7 +563,7 @@ Explained Query: Negate // { arity: 1 } Project (#0{name}) // { arity: 1 } Get l2 // { arity: 3 } - Project (#1) // { arity: 1 } + Project (#1{name}) // { arity: 1 } Get l0 // { arity: 2 } Get l2 // { arity: 3 } @@ -616,7 +616,7 @@ LIMIT 20 Explained Query: Finish order_by=[#4{count} desc nulls_first, #0{containerforumid} asc nulls_last] limit=20 output=[#0..=#4] Reduce group_by=[#0{containerforumid}, #2{title}, #1{creationdate}, #3{moderatorpersonid}] aggregates=[count(*)] // { arity: 5 } - Project (#10, #13, #15, #16) // { arity: 4 } + Project (#10{containerforumid}, #13{creationdate}, #15{title}, #16{moderatorpersonid}) // { arity: 4 } Filter (#33{name} = "China") AND (#16{moderatorpersonid}) IS NOT NULL AND (#31{partofcountryid}) IS NOT NULL // { arity: 37 } Join on=(#1{messageid} = #36{messageid} AND #10{containerforumid} = #14{id} AND #16{moderatorpersonid} = #18{id} AND #25{locationcityid} = #28{id} AND #31{partofcountryid} = #32{id}) type=delta // { arity: 37 } implementation @@ -638,7 +638,7 @@ Explained Query: ReadIndex on=country country_id=[delta join lookup] // { arity: 4 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#10) // { arity: 1 } + Project (#10{messageid}) // { arity: 1 } Join on=(#0{id} = #8{typetagclassid} AND #5{id} = #11{tagid}) type=delta // { arity: 12 } implementation %0:tagclass » %1:tag[#3]KA » %2:message_hastag_tag[#2]KA @@ -745,7 +745,7 @@ Explained Query: cte l0 = Project (#0{id}) // { arity: 1 } TopK order_by=[#1{maxnumberofmembers} desc nulls_first, #0{id} asc nulls_last] limit=100 // { arity: 2 } - Project (#0{id}, #2) // { arity: 2 } + Project (#0{id}, #2{maxnumberofmembers}) // { arity: 2 } Filter (#1{creationdate} > 2010-02-12 00:00:00 UTC) // { arity: 3 } ReadIndex on=top100popularforumsq04 top100popularforumsq04_id=[*** full scan ***] // { arity: 3 } cte l1 = @@ -757,7 +757,7 @@ Explained Query: ReadIndex on=person person_id=[differential join] // { arity: 11 } ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Distinct project=[#0{personid}] // { arity: 1 } - Project (#3) // { arity: 1 } + Project (#3{personid}) // { arity: 1 } Join on=(#0{id} = #2{forumid}) type=differential // { arity: 4 } implementation %1:forum_hasmember_person[#1]KA » %0:l0[#0]K @@ -769,7 +769,7 @@ Explained Query: ArrangeBy keys=[[#1{id}]] // { arity: 4 } Get l1 // { arity: 4 } cte l3 = - Project (#0{creationdate}..=#3{lastname}, #5) // { arity: 5 } + Project (#0{creationdate}..=#3{lastname}, #5{messageid}) // { arity: 5 } Join on=(#1{id} = #13{creatorpersonid} AND #14{containerforumid} = #17{id}) type=delta // { arity: 18 } implementation %0:l2 » %1:message[#9]KA » %2[#0]UKA @@ -794,7 +794,7 @@ Explained Query: Get l2 // { arity: 4 } ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Get l3 // { arity: 5 } Get l1 // { arity: 4 } Get l3 // { arity: 5 } @@ -846,7 +846,7 @@ Explained Query: Finish order_by=[#4 desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=100 output=[#0..=#4] With cte l0 = - Project (#1{messageid}, #5, #16) // { arity: 3 } + Project (#1{name}, #5{messageid}, #16{creatorpersonid}) // { arity: 3 } Join on=(#0{id} = #6{tagid} AND #5{messageid} = #8{messageid}) type=delta // { arity: 20 } implementation %0:tag » %1:message_hastag_tag[#2]KA » %2:message[#1]KA @@ -860,28 +860,28 @@ Explained Query: ReadIndex on=message message_messageid=[delta join lookup] // { arity: 13 } cte l1 = Reduce group_by=[#0{parentmessageid}] aggregates=[count(*)] // { arity: 2 } - Project (#12) // { arity: 1 } + Project (#12{parentmessageid}) // { arity: 1 } Filter (#12{parentmessageid}) IS NOT NULL // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } cte l2 = Reduce group_by=[#0{messageid}] aggregates=[count(*)] // { arity: 2 } - Project (#2) // { arity: 1 } + Project (#2{messageid}) // { arity: 1 } ReadIndex on=person_likes_message person_likes_message_personid=[*** full scan ***] // { arity: 3 } cte l3 = Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Get l0 // { arity: 3 } Return // { arity: 5 } Map (((bigint_to_numeric((1 * #3{count})) + (2 * #1{sum})) + (10 * #2{sum}))) // { arity: 5 } Reduce group_by=[#0{creatorpersonid}] aggregates=[sum(coalesce(case when (#2) IS NULL then null else #1{count} end, 0)), sum(coalesce(case when (#4) IS NULL then null else #3{count} end, 0)), count(*)] // { arity: 4 } - Project (#1{count}, #3{count}, #4, #6, #7) // { arity: 5 } + Project (#1{creatorpersonid}, #3{count}, #4, #6{count}, #7) // { arity: 5 } Join on=(#0{messageid} = #2{parentmessageid} = #5{messageid}) type=delta // { arity: 8 } implementation %0:l0 » %1[#0]K » %2[#0]K %1 » %0:l0[#0]Kef » %2[#0]K %2 » %0:l0[#0]Kef » %1[#0]K ArrangeBy keys=[[#0{messageid}]] // { arity: 2 } - Project (#1{creatorpersonid}, #2) // { arity: 2 } + Project (#1{messageid}, #2{creatorpersonid}) // { arity: 2 } Filter (#0{name} = "Sikh_Empire") // { arity: 3 } Get l0 // { arity: 3 } ArrangeBy keys=[[#0{parentmessageid}]] // { arity: 3 } @@ -973,7 +973,7 @@ Explained Query: Finish order_by=[#1{sum} desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=100 output=[#0, #1] With cte l0 = - Project (#6, #17) // { arity: 2 } + Project (#6{messageid}, #17{creatorpersonid}) // { arity: 2 } Join on=(#0{id} = #7{tagid} AND #6{messageid} = #9{messageid}) type=delta // { arity: 21 } implementation %0:tag » %1:message_hastag_tag[#2]KA » %2:message[#1]KA @@ -989,7 +989,7 @@ Explained Query: ArrangeBy keys=[[#0{messageid}]] // { arity: 2 } Get l0 // { arity: 2 } cte l2 = - Project (#0{messageid}, #1{creatorpersonid}, #3) // { arity: 3 } + Project (#0{messageid}, #1{creatorpersonid}, #3{personid}) // { arity: 3 } Join on=(#0{messageid} = #4{messageid}) type=differential // { arity: 5 } implementation %1:person_likes_message[#2]KA » %0:l1[#0]K @@ -1002,7 +1002,7 @@ Explained Query: Map (null) // { arity: 2 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Join on=(#0{messageid} = #2{messageid}) type=differential // { arity: 3 } implementation %1[#0]UKA » %0:l1[#0]K @@ -1011,12 +1011,12 @@ Explained Query: Distinct project=[#0{messageid}] // { arity: 1 } Project (#0{messageid}) // { arity: 1 } Get l2 // { arity: 3 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Get l0 // { arity: 2 } - Project (#1{personid}, #2) // { arity: 2 } + Project (#1{creatorpersonid}, #2{personid}) // { arity: 2 } Get l2 // { arity: 3 } cte l4 = - Project (#0{creatorpersonid}, #3) // { arity: 2 } + Project (#0{creatorpersonid}, #3{popularityscore}) // { arity: 2 } Join on=(#1{personid} = #2{person2id}) type=differential // { arity: 4 } implementation %1:popularityscoreq06[#0]UKA » %0:l3[#1]K @@ -1079,7 +1079,7 @@ Explained Query: Finish order_by=[#1{sum} desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=100 output=[#0, #1] With cte l0 = - Project (#6, #17) // { arity: 2 } + Project (#6{messageid}, #17{creatorpersonid}) // { arity: 2 } Join on=(#0{id} = #7{tagid} AND #6{messageid} = #9{messageid}) type=delta // { arity: 21 } implementation %0:tag » %1:message_hastag_tag[#2]KA » %2:message[#1]KA @@ -1095,7 +1095,7 @@ Explained Query: ArrangeBy keys=[[#0{messageid}]] // { arity: 2 } Get l0 // { arity: 2 } cte l2 = - Project (#0{messageid}, #1{creatorpersonid}, #3) // { arity: 3 } + Project (#0{messageid}, #1{creatorpersonid}, #3{personid}) // { arity: 3 } Join on=(#0{messageid} = #4{messageid}) type=differential // { arity: 5 } implementation %1:person_likes_message[#2]KA » %0:l1[#0]K @@ -1108,7 +1108,7 @@ Explained Query: Map (null) // { arity: 2 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Join on=(#0{messageid} = #2{messageid}) type=differential // { arity: 3 } implementation %1[#0]UKA » %0:l1[#0]K @@ -1117,12 +1117,12 @@ Explained Query: Distinct project=[#0{messageid}] // { arity: 1 } Project (#0{messageid}) // { arity: 1 } Get l2 // { arity: 3 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Get l0 // { arity: 2 } - Project (#1{personid}, #2) // { arity: 2 } + Project (#1{creatorpersonid}, #2{personid}) // { arity: 2 } Get l2 // { arity: 3 } cte l4 = - Project (#0{creatorpersonid}, #3) // { arity: 2 } + Project (#0{creatorpersonid}, #3{popularityscore}) // { arity: 2 } Join on=(#1{personid} = #2{person2id}) type=differential // { arity: 4 } implementation %1:popularityscoreq06[#0]UKA » %0:l3[#1]K @@ -1186,7 +1186,7 @@ Explained Query: Finish order_by=[#1{sum} desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=100 output=[#0, #1] With cte l0 = - Project (#1{messageid}, #5, #16) // { arity: 3 } + Project (#1{name}, #5{messageid}, #16{creatorpersonid}) // { arity: 3 } Join on=(#0{id} = #6{tagid} AND #5{messageid} = #8{messageid}) type=delta // { arity: 20 } implementation %0:tag » %1:message_hastag_tag[#2]KA » %2:message[#1]KA @@ -1199,7 +1199,7 @@ Explained Query: ArrangeBy keys=[[#1{messageid}]] // { arity: 13 } ReadIndex on=message message_messageid=[delta join lookup] // { arity: 13 } cte l1 = - Project (#0{name}..=#2{creatorpersonid}, #4) // { arity: 4 } + Project (#0{name}..=#2{creatorpersonid}, #4{personid}) // { arity: 4 } Join on=(#1{messageid} = #5{messageid}) type=differential // { arity: 6 } implementation %1:person_likes_message[#2]KA » %0:l0[#1]K @@ -1208,32 +1208,32 @@ Explained Query: ArrangeBy keys=[[#2{messageid}]] // { arity: 3 } ReadIndex on=person_likes_message person_likes_message_messageid=[differential join] // { arity: 3 } cte l2 = - Filter (#0{name} = "Bob_Geldof") // { arity: 3 } - Get l0 // { arity: 3 } + Project (#1{messageid}, #2{creatorpersonid}) // { arity: 2 } + Filter (#0{name} = "Bob_Geldof") // { arity: 3 } + Get l0 // { arity: 3 } cte l3 = Distinct project=[#0{creatorpersonid}, #1{personid}] // { arity: 2 } Union // { arity: 2 } Map (null) // { arity: 2 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Join on=(#0{messageid} = #2{messageid}) type=differential // { arity: 3 } implementation %1[#0]UKA » %0:l2[#0]Kef ArrangeBy keys=[[#0{messageid}]] // { arity: 2 } - Project (#1{creatorpersonid}, #2) // { arity: 2 } - Get l2 // { arity: 3 } + Get l2 // { arity: 2 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Get l1 // { arity: 4 } - Project (#2) // { arity: 1 } - Get l2 // { arity: 3 } - Project (#2, #3) // { arity: 2 } + Project (#1{creatorpersonid}) // { arity: 1 } + Get l2 // { arity: 2 } + Project (#2{creatorpersonid}, #3{personid}) // { arity: 2 } Filter (#0{name} = "Bob_Geldof") // { arity: 4 } Get l1 // { arity: 4 } cte l4 = - Project (#0{creatorpersonid}, #3) // { arity: 2 } + Project (#0{creatorpersonid}, #3{popularityscore}) // { arity: 2 } Join on=(#1{personid} = #2{person2id}) type=differential // { arity: 4 } implementation %1:popularityscoreq06[#0]UKA » %0:l3[#1]K @@ -1303,7 +1303,7 @@ Explained Query: Finish order_by=[#1{count} desc nulls_first, #0{name} asc nulls_last] limit=100 output=[#0, #1] With cte l0 = - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id}) type=differential // { arity: 8 } implementation %1:tag[#0]KAe » %0:message_hastag_tag[#2]KAe @@ -1312,7 +1312,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 5 } ReadIndex on=materialize.public.tag tag_name=[lookup value=("Slovenia")] // { arity: 5 } cte l1 = - Project (#2, #18) // { arity: 2 } + Project (#2{messageid}, #18{name}) // { arity: 2 } Join on=(#0{messageid} = #13{parentmessageid} AND #2{messageid} = #15{messageid} AND #16{tagid} = #17{id}) type=delta // { arity: 21 } implementation %0:l0 » %1:message[#12]KA » %2:message_hastag_tag[#1]KA » %3:tag[#0]KA @@ -1333,7 +1333,7 @@ Explained Query: Get l1 // { arity: 2 } Return // { arity: 2 } Reduce group_by=[#0{name}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{name}) // { arity: 1 } Join on=(#0{messageid} = #2{messageid}) type=differential // { arity: 3 } implementation %0:l1[#0]K » %1[#0]K @@ -1428,7 +1428,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 5 } ReadIndex on=materialize.public.tag tag_name=[lookup value=("Abbas_I_of_Persia")] // { arity: 5 } cte l2 = - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Join on=(#1{id} = #11{personid} AND #12{tagid} = #13{id}) type=delta // { arity: 18 } implementation %0:l0 » %1:person_hasinterest_tag[#0]K » %2:l1[#0]KAe @@ -1436,12 +1436,12 @@ Explained Query: %2:l1 » %1:person_hasinterest_tag[#1]KA » %0:l0[#1]KA Get l0 // { arity: 11 } ArrangeBy keys=[[#0{personid}], [#1{tagid}]] // { arity: 2 } - Project (#1{tagid}, #2) // { arity: 2 } + Project (#1{personid}, #2{tagid}) // { arity: 2 } ReadIndex on=person_hasinterest_tag person_hasinterest_tag_tagid=[*** full scan ***] // { arity: 3 } Get l1 // { arity: 5 } cte l3 = Reduce group_by=[#0{creatorpersonid}] aggregates=[count(*)] // { arity: 2 } - Project (#17) // { arity: 1 } + Project (#17{creatorpersonid}) // { arity: 1 } Filter (#8{creationdate} < 2010-06-28 00:00:00 UTC) AND (2010-06-14 00:00:00 UTC < #8{creationdate}) // { arity: 32 } Join on=(#0{id} = #7{tagid} AND #6{messageid} = #9{messageid} AND #17{creatorpersonid} = #22{id}) type=delta // { arity: 32 } implementation @@ -1459,7 +1459,7 @@ Explained Query: ArrangeBy keys=[[#0{creatorpersonid}]] // { arity: 2 } Get l3 // { arity: 2 } cte l5 = - Project (#0{id}, #2) // { arity: 2 } + Project (#0{id}, #2{count}) // { arity: 2 } Join on=(#0{id} = #1{creatorpersonid}) type=differential // { arity: 3 } implementation %1:l4[#0]UKA » %0:l2[#0]K @@ -1473,7 +1473,7 @@ Explained Query: Project (#3, #4) // { arity: 2 } Map (coalesce(#0{id}, #1{creatorpersonid}), (integer_to_bigint(case when (#0{id}) IS NULL then 0 else 100 end) + coalesce(#2{count}, 0))) // { arity: 5 } Union // { arity: 3 } - Project (#2{count}, #0, #1{creatorpersonid}) // { arity: 3 } + Project (#2, #0{creatorpersonid}, #1{count}) // { arity: 3 } Map (null) // { arity: 3 } Union // { arity: 2 } Negate // { arity: 2 } @@ -1491,7 +1491,7 @@ Explained Query: Negate // { arity: 1 } Get l6 // { arity: 1 } Get l2 // { arity: 1 } - Project (#0{id}, #0{id}, #1{id}) // { arity: 3 } + Project (#0{id}, #0{id}, #1{count}) // { arity: 3 } Get l5 // { arity: 2 } Return // { arity: 5 } Map (coalesce(#2{sum}, 0), (bigint_to_numeric(#1) + #3)) // { arity: 5 } @@ -1506,14 +1506,14 @@ Explained Query: Get l7 // { arity: 2 } ArrangeBy keys=[[#0{person1id}], [case when (#2) IS NULL then null else #1{person2id} end]] // { arity: 3 } Union // { arity: 3 } - Project (#1{person2id}..=#3) // { arity: 3 } + Project (#1{person1id}..=#3) // { arity: 3 } Map (true) // { arity: 4 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } Map (null, null) // { arity: 3 } Threshold // { arity: 1 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{person1id}) // { arity: 1 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } Distinct project=[#0] // { arity: 1 } Union // { arity: 1 } @@ -1535,7 +1535,7 @@ Explained Query: Get l7 // { arity: 2 } Distinct project=[#0{person2id}] // { arity: 1 } Union // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{person2id}) // { arity: 1 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } Constant // { arity: 1 } - (null) @@ -1582,7 +1582,7 @@ SELECT Person.id AS "person.id" Explained Query: Finish order_by=[#4{sum_count} desc nulls_first, #0{id} asc nulls_last] limit=100 output=[#0..=#4] Reduce group_by=[#0{id}..=#2{lastname}] aggregates=[count(*), sum(#3{count})] // { arity: 5 } - Project (#1{firstname}..=#3{count}, #25) // { arity: 4 } + Project (#1{id}..=#3{lastname}, #25{count}) // { arity: 4 } Filter (#23{parentmessageid}) IS NULL AND (#11{creationdate} <= 2012-11-24 00:00:00 UTC) AND (#11{creationdate} >= 2012-08-29 00:00:00 UTC) // { arity: 26 } Join on=(#1{id} = #20{creatorpersonid} AND #12{messageid} = #24{rootpostid}) type=delta // { arity: 26 } implementation @@ -1595,7 +1595,7 @@ Explained Query: ReadIndex on=message message_messageid=[delta join lookup] message_creatorpersonid=[delta join lookup] // { arity: 13 } ArrangeBy keys=[[#0{rootpostid}]] // { arity: 2 } Reduce group_by=[#0{rootpostid}] aggregates=[count(*)] // { arity: 2 } - Project (#2) // { arity: 1 } + Project (#2{rootpostid}) // { arity: 1 } Filter (#0{creationdate} <= 2012-11-24 00:00:00 UTC) AND (#0{creationdate} >= 2012-08-29 00:00:00 UTC) // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } @@ -1700,9 +1700,9 @@ Explained Query: cte l2 = Distinct project=[#0{person2id}] // { arity: 1 } Union // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{person2id}) // { arity: 1 } Get l1 // { arity: 4 } - Project (#6) // { arity: 1 } + Project (#6{person2id}) // { arity: 1 } Join on=(#2{person2id} = #5{person1id}) type=differential // { arity: 7 } implementation %0:l1[#2]KAe » %1:l0[#1]KAe @@ -1711,7 +1711,7 @@ Explained Query: Get l0 // { arity: 3 } Return // { arity: 3 } Reduce group_by=[#0{person2id}, #1{name}] aggregates=[count(*)] // { arity: 3 } - Project (#0{person2id}, #9) // { arity: 2 } + Project (#0{person2id}, #9{name}) // { arity: 2 } Join on=(#0{person2id} = #1{id} = #2{creatorpersonid} AND #3{messageid} = #4{messageid} = #6{messageid} AND #7{tagid} = #8{id}) type=delta // { arity: 12 } implementation %0 » %1[#0]UKA » %2[#0]K » %3[#0]UKA » %4:message_hastag_tag[#1]KA » %5:tag[#0]KA @@ -1725,7 +1725,7 @@ Explained Query: Threshold // { arity: 1 } Union // { arity: 1 } Distinct project=[#0{person2id}] // { arity: 1 } - Project (#6) // { arity: 1 } + Project (#6{person2id}) // { arity: 1 } Join on=(#0{person2id} = #2{person1id} AND #3{person2id} = #5{person1id}) type=delta // { arity: 7 } implementation %0:l2 » %1:person_knows_person[#1]KA » %2:l0[#1]KA @@ -1740,7 +1740,7 @@ Explained Query: Get l2 // { arity: 1 } ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#16{name} = "Italy") AND (#1{id}) IS NOT NULL AND (#14{partofcountryid}) IS NOT NULL // { arity: 19 } Join on=(#8{locationcityid} = #11{id} AND #14{partofcountryid} = #15{id}) type=delta // { arity: 19 } implementation @@ -1755,11 +1755,11 @@ Explained Query: ReadIndex on=country country_id=[delta join lookup] // { arity: 4 } ArrangeBy keys=[[#0{creatorpersonid}], [#1{messageid}]] // { arity: 2 } Distinct project=[#1{creatorpersonid}, #0{messageid}] // { arity: 2 } - Project (#1{creatorpersonid}, #9) // { arity: 2 } + Project (#1{messageid}, #9{creatorpersonid}) // { arity: 2 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id} AND #6{typetagclassid} = #7{id}) type=delta // { arity: 12 } implementation %0:message_hastag_tag » %1:tag[#0]KA » %2:tagclass[#0]KAe @@ -1835,7 +1835,7 @@ SELECT count(*) Explained Query: With cte l0 = - Project (#1{person2id}, #21) // { arity: 2 } + Project (#1{id}, #21{person2id}) // { arity: 2 } Filter (#16{name} = "India") AND (#19{creationdate} <= 2013-01-10 00:00:00 UTC) AND (2012-09-28 00:00:00 UTC <= #19{creationdate}) AND (#14{partofcountryid}) IS NOT NULL // { arity: 22 } Join on=(#1{id} = #20{person1id} AND #8{locationcityid} = #11{id} AND #14{partofcountryid} = #15{id}) type=delta // { arity: 22 } implementation @@ -1932,7 +1932,7 @@ Explained Query: ArrangeBy keys=[[#1{id}]] // { arity: 11 } ReadIndex on=person person_id=[differential join] // { arity: 11 } cte l1 = - Project (#1{messageid}, #12) // { arity: 2 } + Project (#1{id}, #12{messageid}) // { arity: 2 } Filter (#19{length} < 120) AND (#11{creationdate} > 2012-06-03 00:00:00 UTC) AND (#15{content}) IS NOT NULL // { arity: 25 } Join on=(#1{id} = #20{creatorpersonid}) type=differential // { arity: 25 } implementation @@ -1942,13 +1942,13 @@ Explained Query: ReadIndex on=materialize.public.message message_rootpostlanguage=[lookup values=[("es"); ("pt"); ("ta")]] // { arity: 14 } Return // { arity: 2 } Reduce group_by=[#0{count_messageid}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{count_messageid}) // { arity: 1 } Reduce group_by=[#0{id}] aggregates=[count(#1{messageid})] // { arity: 2 } Union // { arity: 2 } Map (null) // { arity: 2 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Join on=(#1{id} = #11{id}) type=differential // { arity: 12 } implementation %1[#0]UKA » %0:l0[#1]KA @@ -1957,7 +1957,7 @@ Explained Query: Distinct project=[#0{id}] // { arity: 1 } Project (#0{id}) // { arity: 1 } Get l1 // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } ReadIndex on=person person_id=[*** full scan ***] // { arity: 11 } Get l1 // { arity: 2 } @@ -2003,7 +2003,7 @@ Explained Query: ArrangeBy keys=[[]] // { arity: 11 } ReadIndex on=person person_id=[*** full scan ***] // { arity: 11 } ArrangeBy keys=[[]] // { arity: 6 } - Project (#0{creationdate}, #1{messageid}, #3{content}, #4{length}, #8, #9) // { arity: 6 } + Project (#0{creationdate}, #1{messageid}, #3{rootpostlanguage}, #4{content}, #8{length}, #9{creatorpersonid}) // { arity: 6 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } cte l1 = Project (#0{creationdate}..=#11{messageid}) // { arity: 12 } @@ -2011,7 +2011,7 @@ Explained Query: implementation %1[#0]UKA » %0:l0[#12]Kiif ArrangeBy keys=[[#12{rootpostlanguage}]] // { arity: 13 } - Project (#0{creationdate}..=#10{email}, #12{rootpostlanguage}, #13) // { arity: 13 } + Project (#0{creationdate}..=#10{email}, #12{messageid}, #13{rootpostlanguage}) // { arity: 13 } Filter (#15{length} < 120) AND (#11{creationdate} > 2012-06-03 00:00:00 UTC) AND (#14{content}) IS NOT NULL AND (#1{id} = #16{creatorpersonid}) // { arity: 17 } Get l0 // { arity: 17 } ArrangeBy keys=[[#0{rootpostlanguage}]] // { arity: 1 } @@ -2020,16 +2020,16 @@ Explained Query: Filter (#0{rootpostlanguage} = varchar_to_text(#1)) // { arity: 2 } FlatMap unnest_array({"es", "ta", "pt"}) // { arity: 2 } Distinct project=[#0{rootpostlanguage}] // { arity: 1 } - Project (#13) // { arity: 1 } + Project (#13{rootpostlanguage}) // { arity: 1 } Get l0 // { arity: 17 } Return // { arity: 2 } Reduce group_by=[#0{count_messageid}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{count_messageid}) // { arity: 1 } Reduce group_by=[#0{id}] aggregates=[count(#1{messageid})] // { arity: 2 } Union // { arity: 2 } - Project (#1{messageid}, #11) // { arity: 2 } + Project (#1{id}, #11{messageid}) // { arity: 2 } Get l1 // { arity: 12 } - Project (#1, #22) // { arity: 2 } + Project (#1{id}, #22) // { arity: 2 } Map (null) // { arity: 23 } Join on=(#0{creationdate} = #11{creationdate} AND #1{id} = #12{id} AND #2{firstname} = #13{firstname} AND #3{lastname} = #14{lastname} AND #4{gender} = #15{gender} AND #5{birthday} = #16{birthday} AND #6{locationip} = #17{locationip} AND #7{browserused} = #18{browserused} AND #8{locationcityid} = #19{locationcityid} AND #9{speaks} = #20{speaks} AND #10{email} = #21{email}) type=differential // { arity: 22 } implementation @@ -2097,7 +2097,7 @@ Explained Query: Finish order_by=[#3 desc nulls_first, #0{id} asc nulls_last] limit=100 output=[#0..=#3] With cte l0 = - Project (#0{id}, #2{partofcontinentid}..=#6{creationdate}, #8{firstname}..=#15{email}, #17, #18) // { arity: 16 } + Project (#0{id}, #2{url}..=#6{url}, #8{creationdate}..=#15{browserused}, #17{speaks}, #18{email}) // { arity: 16 } Filter (#1{name} = "India") AND (#8{creationdate} < 2012-11-09 00:00:00 UTC) AND (#0{id}) IS NOT NULL // { arity: 19 } Join on=(#0{id} = #7{partofcountryid} AND #4{id} = #16{locationcityid}) type=delta // { arity: 19 } implementation @@ -2111,7 +2111,7 @@ Explained Query: ArrangeBy keys=[[#8{locationcityid}]] // { arity: 11 } ReadIndex on=person person_locationcityid=[delta join lookup] // { arity: 11 } cte l1 = - Project (#0{id}..=#15{email}, #17) // { arity: 17 } + Project (#0{id}..=#15{email}, #17{messageid}) // { arity: 17 } Filter (#16{creationdate} <= 2012-11-09 00:00:00 UTC) AND (#16{creationdate} >= #6{creationdate}) // { arity: 29 } Join on=(#7{id} = #25{creatorpersonid}) type=differential // { arity: 29 } implementation @@ -2126,9 +2126,9 @@ Explained Query: Filter (bigint_to_numeric(#2{count_messageid}) < ((24155 - ((12 * extract_year_tstz(#1{creationdate})) + extract_month_tstz(#1{creationdate}))) + 1)) // { arity: 3 } Reduce group_by=[#1{id}, #0{creationdate}] aggregates=[count(#2{messageid})] // { arity: 3 } Union // { arity: 3 } - Project (#6, #7, #16) // { arity: 3 } + Project (#6{creationdate}, #7{id}, #16{messageid}) // { arity: 3 } Get l1 // { arity: 17 } - Project (#6, #7, #32) // { arity: 3 } + Project (#6{creationdate}, #7{id}, #32) // { arity: 3 } Map (null) // { arity: 33 } Join on=(#0{id} = #16{id} AND #1{url} = #17{url} AND #2{partofcontinentid} = #18{partofcontinentid} AND #3{id} = #19{id} AND #4{name} = #20{name} AND #5{url} = #21{url} AND #6{creationdate} = #22{creationdate} AND #7{id} = #23{id} AND #8{firstname} = #24{firstname} AND #9{lastname} = #25{lastname} AND #10{gender} = #26{gender} AND #11{birthday} = #27{birthday} AND #12{locationip} = #28{locationip} AND #13{browserused} = #29{browserused} AND #14{speaks} = #30{speaks} AND #15{email} = #31{email}) type=differential // { arity: 32 } implementation @@ -2152,7 +2152,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 1 } Get l3 // { arity: 1 } cte l5 = - Project (#1{creatorpersonid}, #23) // { arity: 2 } + Project (#1{id}, #23{creatorpersonid}) // { arity: 2 } Filter (#0{creationdate} < 2012-11-09 00:00:00 UTC) // { arity: 28 } Join on=(#1{id} = #12{personid} AND #13{messageid} = #15{messageid} AND #23{creatorpersonid} = #27{id}) type=delta // { arity: 28 } implementation @@ -2182,14 +2182,14 @@ Explained Query: Distinct project=[#0{id}] // { arity: 1 } Get l3 // { arity: 1 } cte l8 = - Project (#0{id}, #2{sum}, #3) // { arity: 3 } + Project (#0{id}, #2{count}, #3{sum}) // { arity: 3 } Join on=(#0{id} = #1{creatorpersonid}) type=differential // { arity: 4 } implementation %1[#0]UKA » %0:l4[#0]K Get l4 // { arity: 1 } ArrangeBy keys=[[#0{creatorpersonid}]] // { arity: 3 } Reduce group_by=[#0{creatorpersonid}] aggregates=[count(*), sum(case when #1 then 1 else 0 end)] // { arity: 3 } - Project (#1, #3) // { arity: 2 } + Project (#1{creatorpersonid}, #3) // { arity: 2 } Join on=(#0{id} = #2{id}) type=differential // { arity: 4 } implementation %0:l5[#0]K » %1[#0]K @@ -2310,7 +2310,7 @@ Explained Query: ArrangeBy keys=[[#1{id}], [#8{locationcityid}]] // { arity: 11 } ReadIndex on=person person_id=[delta join lookup] person_locationcityid=[delta join lookup] // { arity: 11 } cte l3 = - Project (#4, #5, #9, #21) // { arity: 4 } + Project (#4{id}, #5{name}, #9{id}, #21{person2id}) // { arity: 4 } Filter (#1{name} = "Philippines") AND (#38{name} = "Taiwan") AND (#0{id}) IS NOT NULL AND (#36{partofcountryid}) IS NOT NULL // { arity: 41 } Join on=(#0{id} = #7{partofcountryid} AND #4{id} = #16{locationcityid} AND #9{id} = #20{person1id} AND #21{person2id} = #23{id} AND #30{locationcityid} = #33{id} AND #36{partofcountryid} = #37{id}) type=delta // { arity: 41 } implementation @@ -2332,10 +2332,10 @@ Explained Query: cte l4 = Map (case when #2 then #1{person2id} else #0{id} end, case when #2 then #0{id} else #1{person2id} end) // { arity: 5 } Union // { arity: 3 } - Project (#2..=#4) // { arity: 3 } + Project (#2{id}..=#4) // { arity: 3 } Map (false) // { arity: 5 } Get l3 // { arity: 4 } - Project (#3, #2, #4) // { arity: 3 } + Project (#3{person2id}, #2{id}, #4) // { arity: 3 } Map (true) // { arity: 5 } Get l3 // { arity: 4 } cte l5 = @@ -2354,7 +2354,7 @@ Explained Query: Get l5 // { arity: 2 } ArrangeBy keys=[[#0{creatorpersonid}, #1{creatorpersonid}]] // { arity: 2 } Distinct project=[#1{creatorpersonid}, #0{creatorpersonid}] // { arity: 2 } - Project (#9, #22) // { arity: 2 } + Project (#9{creatorpersonid}, #22{creatorpersonid}) // { arity: 2 } Join on=(#1{messageid} = #25{parentmessageid}) type=differential // { arity: 26 } implementation %0:l6[#1]KA » %1:message[#12]KA @@ -2390,7 +2390,7 @@ Explained Query: Get l9 // { arity: 2 } ArrangeBy keys=[[#0{personid}, #1{creatorpersonid}]] // { arity: 2 } Distinct project=[#1{personid}, #0{creatorpersonid}] // { arity: 2 } - Project (#9, #14) // { arity: 2 } + Project (#9{creatorpersonid}, #14{personid}) // { arity: 2 } Join on=(#1{messageid} = #15{messageid}) type=differential // { arity: 16 } implementation %0:l6[#1]KA » %1:person_likes_message[#2]KA @@ -2398,7 +2398,7 @@ Explained Query: ArrangeBy keys=[[#2{messageid}]] // { arity: 3 } ReadIndex on=person_likes_message person_likes_message_messageid=[differential join] // { arity: 3 } cte l11 = - Project (#0{id}..=#3{person2id}, #6) // { arity: 5 } + Project (#0{id}..=#3{person2id}, #6{sum}) // { arity: 5 } Join on=(#2{id} = #4 AND #3{person2id} = #5) type=differential // { arity: 7 } implementation %1[#0, #1]UKKA » %0:l3[#2, #3]KK @@ -2422,17 +2422,17 @@ Explained Query: Get l10 // { arity: 2 } Get l9 // { arity: 2 } Return // { arity: 4 } - Project (#0{id}, #1{person2id}, #3{sum}, #4) // { arity: 4 } + Project (#0{id}, #1{person2id}, #3{name}, #4{sum}) // { arity: 4 } TopK group_by=[#2{id}] order_by=[#4{sum} desc nulls_first, #0{id} asc nulls_last, #1{person2id} asc nulls_last] limit=1 // { arity: 5 } Union // { arity: 5 } Map (null) // { arity: 5 } Union // { arity: 4 } Negate // { arity: 4 } - Project (#2{id}, #3{name}, #0{id}, #1{person2id}) // { arity: 4 } + Project (#2{id}, #3{person2id}, #0{id}, #1{name}) // { arity: 4 } Get l11 // { arity: 5 } - Project (#2{id}, #3{name}, #0{id}, #1{person2id}) // { arity: 4 } + Project (#2{id}, #3{person2id}, #0{id}, #1{name}) // { arity: 4 } Get l3 // { arity: 4 } - Project (#2{id}, #3{name}, #0{id}, #1{person2id}, #4{sum}) // { arity: 5 } + Project (#2{id}, #3{person2id}, #0{id}, #1{name}, #4{sum}) // { arity: 5 } Get l11 // { arity: 5 } Used Indexes: @@ -2517,9 +2517,9 @@ SELECT coalesce(w, -1) FROM results ORDER BY w ASC LIMIT 20 ---- Explained Query: Finish order_by=[#1{min} asc nulls_last] limit=20 output=[#2] - With Mutually Recursive + With cte l0 = - Project (#1{person2id}, #2) // { arity: 2 } + Project (#1{person1id}, #2{person2id}) // { arity: 2 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } cte l1 = ArrangeBy keys=[[greatest(#0{person1id}, #1{person2id}), least(#0{person1id}, #1{person2id})]] // { arity: 2 } @@ -2527,7 +2527,7 @@ Explained Query: cte l2 = ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#0{creationdate} <= 2012-11-10 00:00:00 UTC) AND (#0{creationdate} >= 2012-11-06 00:00:00 UTC) AND (#1{id}) IS NOT NULL // { arity: 4 } ReadIndex on=forum forum_id=[*** full scan ***] // { arity: 4 } cte l3 = @@ -2537,64 +2537,66 @@ Explained Query: Get l1 // { arity: 2 } ArrangeBy keys=[[#1, #0]] // { arity: 3 } Reduce group_by=[least(#0{person1id}, #1{person2id}), greatest(#0{person1id}, #1{person2id})] aggregates=[sum(case when (#2{parentmessageid}) IS NULL then 10 else 5 end)] // { arity: 3 } - Project (#1{person2id}, #2{parentmessageid}, #6) // { arity: 3 } + Project (#1{person1id}, #2{person2id}, #6{parentmessageid}) // { arity: 3 } Join on=(#1{person1id} = #4{creatorpersonid} AND #2{person2id} = #7{creatorpersonid} AND #3{messageid} = #9{parentmessageid} AND #5{containerforumid} = #10{id} AND #8{containerforumid} = #11{id}) type=delta // { arity: 12 } implementation - %0:person_knows_person » %1:message[#1]KA » %3:l2[#0]UK » %2:message[#0, #2]KK » %4:l2[#0]UK - %1:message » %3:l2[#0]UK » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UK - %2:message » %4:l2[#0]UK » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UK - %3:l2 » %1:message[#2]KA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UK - %4:l2 » %2:message[#1]KA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UK + %0:person_knows_person » %1:message[#1]KA » %3:l2[#0]UKA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %1:message » %3:l2[#0]UKA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %2:message » %4:l2[#0]UKA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UKA + %3:l2 » %1:message[#2]KA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %4:l2 » %2:message[#1]KA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UKA ArrangeBy keys=[[#1{person1id}], [#2{person2id}]] // { arity: 3 } ReadIndex on=person_knows_person person_knows_person_person1id=[delta join 1st input (full scan)] person_knows_person_person2id=[delta join lookup] // { arity: 3 } ArrangeBy keys=[[#0{messageid}, #1{creatorpersonid}], [#1{creatorpersonid}], [#2{containerforumid}]] // { arity: 4 } - Project (#1{creatorpersonid}, #9, #10, #12) // { arity: 4 } + Project (#1{messageid}, #9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 4 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{creatorpersonid}, #2{parentmessageid}], [#1{containerforumid}]] // { arity: 3 } - Project (#9, #10, #12) // { arity: 3 } + Project (#9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 3 } Filter (#12{parentmessageid}) IS NOT NULL // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } Get l2 // { arity: 1 } Get l2 // { arity: 1 } - cte l4 = - Project (#2{min}, #0, #1{person2id}) // { arity: 3 } - Map (1450) // { arity: 3 } - Reduce group_by=[#0{person2id}] aggregates=[min(#1)] // { arity: 2 } - Distinct project=[#0{person2id}, #1] // { arity: 2 } - Union // { arity: 2 } - Project (#3, #5) // { arity: 2 } - Map ((#1 + #4)) // { arity: 6 } - Join on=(#0 = #2{person1id}) type=differential // { arity: 5 } - implementation - %0:l4[#0]UK » %1[#0]K - ArrangeBy keys=[[#0]] // { arity: 2 } - Project (#1, #2) // { arity: 2 } - Get l4 // { arity: 3 } - ArrangeBy keys=[[#0{person1id}]] // { arity: 3 } - Project (#0{person1id}, #1{person2id}, #3) // { arity: 3 } - Map ((10 / bigint_to_double((coalesce(#2{sum}, 0) + 10)))) // { arity: 4 } - Union // { arity: 3 } - Map (null) // { arity: 3 } - Union // { arity: 2 } - Negate // { arity: 2 } - Project (#0{person1id}, #1{person2id}) // { arity: 2 } - Join on=(#2 = least(#0{person1id}, #1{person2id}) AND #3 = greatest(#0{person1id}, #1{person2id})) type=differential // { arity: 4 } - implementation - %1[#1, #0]UKK » %0:l1[greatest(#0, #1), least(#0, #1)]KK - Get l1 // { arity: 2 } - ArrangeBy keys=[[#1, #0]] // { arity: 2 } - Distinct project=[#0, #1] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Get l3 // { arity: 5 } - Get l0 // { arity: 2 } - Project (#0{person1id}, #1{person2id}, #4) // { arity: 3 } - Get l3 // { arity: 5 } - Constant // { arity: 2 } - - (1450, 0) Return // { arity: 3 } - Project (#1{min}, #2{min}, #2{min}) // { arity: 3 } - Filter (#1{person2id} = 15393162796819) AND (#2{min} = #2{min}) // { arity: 3 } - Get l4 // { arity: 3 } + With Mutually Recursive + cte l4 = + Project (#2, #0{person2id}, #1{min}) // { arity: 3 } + Map (1450) // { arity: 3 } + Reduce group_by=[#0{person2id}] aggregates=[min(#1)] // { arity: 2 } + Distinct project=[#0{person2id}, #1] // { arity: 2 } + Union // { arity: 2 } + Project (#3{person2id}, #5) // { arity: 2 } + Map ((#1 + #4)) // { arity: 6 } + Join on=(#0 = #2{person1id}) type=differential // { arity: 5 } + implementation + %0:l4[#0]UK » %1[#0]K + ArrangeBy keys=[[#0]] // { arity: 2 } + Project (#1{person2id}, #2{min}) // { arity: 2 } + Get l4 // { arity: 3 } + ArrangeBy keys=[[#0{person1id}]] // { arity: 3 } + Project (#0{person1id}, #1{person2id}, #3) // { arity: 3 } + Map ((10 / bigint_to_double((coalesce(#2{sum}, 0) + 10)))) // { arity: 4 } + Union // { arity: 3 } + Map (null) // { arity: 3 } + Union // { arity: 2 } + Negate // { arity: 2 } + Project (#0{person1id}, #1{person2id}) // { arity: 2 } + Join on=(#2 = least(#0{person1id}, #1{person2id}) AND #3 = greatest(#0{person1id}, #1{person2id})) type=differential // { arity: 4 } + implementation + %1[#1, #0]UKK » %0:l1[greatest(#0, #1), least(#0, #1)]KK + Get l1 // { arity: 2 } + ArrangeBy keys=[[#1, #0]] // { arity: 2 } + Distinct project=[#0, #1] // { arity: 2 } + Project (#2, #3) // { arity: 2 } + Get l3 // { arity: 5 } + Get l0 // { arity: 2 } + Project (#0{person1id}, #1{person2id}, #4{sum}) // { arity: 3 } + Get l3 // { arity: 5 } + Constant // { arity: 2 } + - (1450, 0) + Return // { arity: 3 } + Project (#1{person2id}, #2{min}, #2{min}) // { arity: 3 } + Filter (#1{person2id} = 15393162796819) AND (#2{min} = #2{min}) // { arity: 3 } + Get l4 // { arity: 3 } Used Indexes: - materialize.public.forum_id (*** full scan ***) @@ -2713,9 +2715,9 @@ EXPLAIN WITH(humanized expressions, arity, join implementations) WITH MUTUALLY R SELECT coalesce(min(w), -1) FROM results ---- Explained Query: - With Mutually Recursive + With cte l0 = - Project (#1{person2id}, #2) // { arity: 2 } + Project (#1{person1id}, #2{person2id}) // { arity: 2 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } cte l1 = ArrangeBy keys=[[greatest(#0{person1id}, #1{person2id}), least(#0{person1id}, #1{person2id})]] // { arity: 2 } @@ -2723,7 +2725,7 @@ Explained Query: cte l2 = ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#0{creationdate} <= 2012-11-10 00:00:00 UTC) AND (#0{creationdate} >= 2012-11-06 00:00:00 UTC) AND (#1{id}) IS NOT NULL // { arity: 4 } ReadIndex on=forum forum_id=[*** full scan ***] // { arity: 4 } cte l3 = @@ -2733,21 +2735,21 @@ Explained Query: Get l1 // { arity: 2 } ArrangeBy keys=[[#1, #0]] // { arity: 3 } Reduce group_by=[least(#0{person1id}, #1{person2id}), greatest(#0{person1id}, #1{person2id})] aggregates=[sum(case when (#2{parentmessageid}) IS NULL then 10 else 5 end)] // { arity: 3 } - Project (#1{person2id}, #2{parentmessageid}, #6) // { arity: 3 } + Project (#1{person1id}, #2{person2id}, #6{parentmessageid}) // { arity: 3 } Join on=(#1{person1id} = #4{creatorpersonid} AND #2{person2id} = #7{creatorpersonid} AND #3{messageid} = #9{parentmessageid} AND #5{containerforumid} = #10{id} AND #8{containerforumid} = #11{id}) type=delta // { arity: 12 } implementation - %0:person_knows_person » %1:message[#1]KA » %3:l2[#0]UK » %2:message[#0, #2]KK » %4:l2[#0]UK - %1:message » %3:l2[#0]UK » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UK - %2:message » %4:l2[#0]UK » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UK - %3:l2 » %1:message[#2]KA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UK - %4:l2 » %2:message[#1]KA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UK + %0:person_knows_person » %1:message[#1]KA » %3:l2[#0]UKA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %1:message » %3:l2[#0]UKA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %2:message » %4:l2[#0]UKA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UKA + %3:l2 » %1:message[#2]KA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %4:l2 » %2:message[#1]KA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UKA ArrangeBy keys=[[#1{person1id}], [#2{person2id}]] // { arity: 3 } ReadIndex on=person_knows_person person_knows_person_person1id=[delta join 1st input (full scan)] person_knows_person_person2id=[delta join lookup] // { arity: 3 } ArrangeBy keys=[[#0{messageid}, #1{creatorpersonid}], [#1{creatorpersonid}], [#2{containerforumid}]] // { arity: 4 } - Project (#1{creatorpersonid}, #9, #10, #12) // { arity: 4 } + Project (#1{messageid}, #9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 4 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{creatorpersonid}, #2{parentmessageid}], [#1{containerforumid}]] // { arity: 3 } - Project (#9, #10, #12) // { arity: 3 } + Project (#9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 3 } Filter (#12{parentmessageid}) IS NOT NULL // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } Get l2 // { arity: 1 } @@ -2769,222 +2771,224 @@ Explained Query: Project (#2, #3) // { arity: 2 } Get l3 // { arity: 5 } Get l0 // { arity: 2 } - Project (#0{person1id}, #1{person2id}, #4) // { arity: 3 } + Project (#0{person1id}, #1{person2id}, #4{sum}) // { arity: 3 } Get l3 // { arity: 5 } - cte l5 = - Project (#1{person2id}, #3) // { arity: 2 } - Join on=(#0 = #2{person1id}) type=differential // { arity: 4 } - implementation - %0:l8[#0]K » %1:l4[#0]K - ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } - Get l8 // { arity: 2 } - ArrangeBy keys=[[#0{person1id}]] // { arity: 2 } - Project (#0{person1id}, #1{person2id}) // { arity: 2 } - Get l4 // { arity: 3 } - cte l6 = - Union // { arity: 2 } - Project (#1, #0{person2id}) // { arity: 2 } - Get l5 // { arity: 2 } - Get l8 // { arity: 2 } - cte l7 = - Distinct project=[] // { arity: 0 } - Project () // { arity: 0 } - Join on=(#0{person2id} = #1{person2id}) type=differential // { arity: 2 } + Return // { arity: 1 } + With Mutually Recursive + cte l5 = + Project (#1, #3{person2id}) // { arity: 2 } + Join on=(#0 = #2{person1id}) type=differential // { arity: 4 } implementation - %0:l6[#0]Kf » %1:l6[#0]Kf - ArrangeBy keys=[[#0{person2id}]] // { arity: 1 } - Project (#0{person2id}) // { arity: 1 } - Filter #1 // { arity: 2 } - Get l6 // { arity: 2 } - ArrangeBy keys=[[#0{person2id}]] // { arity: 1 } - Project (#0{person2id}) // { arity: 1 } - Filter NOT(#1) // { arity: 2 } - Get l6 // { arity: 2 } - cte l8 = - Distinct project=[#0{person2id}, #1] // { arity: 2 } + %0:l8[#0]K » %1:l4[#0]K + ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } + Get l8 // { arity: 2 } + ArrangeBy keys=[[#0{person1id}]] // { arity: 2 } + Project (#0{person1id}, #1{person2id}) // { arity: 2 } + Get l4 // { arity: 3 } + cte l6 = Union // { arity: 2 } - Project (#1, #0{person2id}) // { arity: 2 } - CrossJoin type=differential // { arity: 2 } + Project (#1{person2id}, #0) // { arity: 2 } + Get l5 // { arity: 2 } + Get l8 // { arity: 2 } + cte l7 = + Distinct project=[] // { arity: 0 } + Project () // { arity: 0 } + Join on=(#0{person2id} = #1{person2id}) type=differential // { arity: 2 } implementation - %0:l5[×] » %1[×] - ArrangeBy keys=[[]] // { arity: 2 } - Get l5 // { arity: 2 } - ArrangeBy keys=[[]] // { arity: 0 } - Union // { arity: 0 } - Negate // { arity: 0 } - Get l7 // { arity: 0 } - Constant // { arity: 0 } - - () - Project (#1, #0) // { arity: 2 } - Map (true, -1) // { arity: 2 } - Get l7 // { arity: 0 } - Constant // { arity: 2 } - - (1450, true) - - (15393162796819, false) - cte l9 = - TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Filter (#4 = false) // { arity: 6 } + %0:l6[#0]Kf » %1:l6[#0]Kf + ArrangeBy keys=[[#0{person2id}]] // { arity: 1 } + Project (#0{person2id}) // { arity: 1 } + Filter #1 // { arity: 2 } + Get l6 // { arity: 2 } + ArrangeBy keys=[[#0{person2id}]] // { arity: 1 } + Project (#0{person2id}) // { arity: 1 } + Filter NOT(#1) // { arity: 2 } + Get l6 // { arity: 2 } + cte l8 = + Distinct project=[#0{person2id}, #1] // { arity: 2 } + Union // { arity: 2 } + Project (#1{person2id}, #0) // { arity: 2 } + CrossJoin type=differential // { arity: 2 } + implementation + %0:l5[×] » %1[×] + ArrangeBy keys=[[]] // { arity: 2 } + Get l5 // { arity: 2 } + ArrangeBy keys=[[]] // { arity: 0 } + Union // { arity: 0 } + Negate // { arity: 0 } + Get l7 // { arity: 0 } + Constant // { arity: 0 } + - () + Project (#1, #0) // { arity: 2 } + Map (true, -1) // { arity: 2 } + Get l7 // { arity: 0 } + Constant // { arity: 2 } + - (1450, true) + - (15393162796819, false) + cte l9 = + TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Filter (#4 = false) // { arity: 6 } + Get l17 // { arity: 6 } + cte l10 = + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2{person2id}) // { arity: 3 } Get l17 // { arity: 6 } - cte l10 = - Distinct project=[#0..=#2] // { arity: 3 } + cte l11 = + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Get l10 // { arity: 3 } + cte l12 = Project (#0..=#2) // { arity: 3 } - Get l17 // { arity: 6 } - cte l11 = - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Get l10 // { arity: 3 } - cte l12 = - Project (#0..=#2) // { arity: 3 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1[#0..=#2]UKKKA » %0:l11[#0..=#2]UKKK - Get l11 // { arity: 3 } - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Get l9 // { arity: 5 } - cte l13 = - TopK group_by=[#0, #1, #2{person2id}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } - Union // { arity: 5 } - Project (#3, #4, #1, #7, #8) // { arity: 5 } - Map ((#6 + #2), false) // { arity: 9 } - Join on=(#0{person1id} = #5) type=differential // { arity: 7 } - implementation - %0:l4[#0]K » %1:l9[#2]K - ArrangeBy keys=[[#0{person1id}]] // { arity: 3 } - Get l4 // { arity: 3 } - ArrangeBy keys=[[#2]] // { arity: 4 } - Project (#0..=#3) // { arity: 4 } - Get l9 // { arity: 5 } - Project (#0..=#3, #9) // { arity: 5 } - Map ((#4 OR #8)) // { arity: 10 } - Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } - implementation - %0:l17[#0..=#2]KKK » %1[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 5 } - Project (#0..=#4) // { arity: 5 } - Get l17 // { arity: 6 } - ArrangeBy keys=[[#0..=#2]] // { arity: 4 } - Union // { arity: 4 } - Map (true) // { arity: 4 } - Get l12 // { arity: 3 } - Project (#0..=#2, #6) // { arity: 4 } - Map (false) // { arity: 7 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1:l11[#0..=#2]UKKK » %0[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Union // { arity: 3 } - Negate // { arity: 3 } - Get l12 // { arity: 3 } - Get l10 // { arity: 3 } - Get l11 // { arity: 3 } - cte l14 = - Reduce aggregates=[min((#0 + #1))] // { arity: 1 } - Project (#1, #3) // { arity: 2 } - Join on=(#0{person2id} = #2{person2id}) type=differential // { arity: 4 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } implementation - %0:l13[#0]Kef » %1:l13[#0]Kef - ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = false) // { arity: 5 } - Get l13 // { arity: 5 } - ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = true) // { arity: 5 } - Get l13 // { arity: 5 } - cte l15 = - Project (#1) // { arity: 1 } - Map ((#0{min} / 2)) // { arity: 2 } - Union // { arity: 1 } - Get l14 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l14 // { arity: 1 } - Constant // { arity: 0 } - - () - cte l16 = - Distinct project=[] // { arity: 0 } - Project () // { arity: 0 } - Filter #1 AND (#0{person2id} = -1) // { arity: 2 } - Get l8 // { arity: 2 } - cte l17 = - Distinct project=[#0..=#5] // { arity: 6 } - Union // { arity: 6 } - Project (#1, #0, #0, #2..=#4) // { arity: 6 } - Map (0, false, 0) // { arity: 5 } - Union // { arity: 2 } - Map (1450, false) // { arity: 2 } - Get l16 // { arity: 0 } - Map (15393162796819, true) // { arity: 2 } - Get l16 // { arity: 0 } - Project (#0..=#3, #7, #8) // { arity: 6 } - Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } - CrossJoin type=delta // { arity: 7 } - implementation - %0:l13 » %1[×]U » %2[×]U - %1 » %2[×]U » %0:l13[×] - %2 » %1[×]U » %0:l13[×] - ArrangeBy keys=[[]] // { arity: 5 } - Get l13 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - TopK limit=1 // { arity: 1 } - Project (#4) // { arity: 1 } + %1[#0..=#2]UKKKA » %0:l11[#0..=#2]UKKK + Get l11 // { arity: 3 } + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2) // { arity: 3 } + Get l9 // { arity: 5 } + cte l13 = + TopK group_by=[#0, #1, #2{person2id}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } + Union // { arity: 5 } + Project (#3, #4, #1{person2id}, #7, #8) // { arity: 5 } + Map ((#6 + #2), false) // { arity: 9 } + Join on=(#0{person1id} = #5) type=differential // { arity: 7 } + implementation + %0:l4[#0]K » %1:l9[#2]K + ArrangeBy keys=[[#0{person1id}]] // { arity: 3 } + Get l4 // { arity: 3 } + ArrangeBy keys=[[#2]] // { arity: 4 } + Project (#0..=#3) // { arity: 4 } Get l9 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - Union // { arity: 1 } - Get l15 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l15 // { arity: 1 } - Constant // { arity: 0 } - - () - Return // { arity: 1 } - With - cte l18 = - Project (#0..=#3) // { arity: 4 } - Join on=(#4 = #5{max}) type=differential // { arity: 6 } - implementation - %1[#0]UK » %0:l17[#4]K - ArrangeBy keys=[[#4]] // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Get l17 // { arity: 6 } - ArrangeBy keys=[[#0{max}]] // { arity: 1 } - Reduce aggregates=[max(#0)] // { arity: 1 } - Project (#5) // { arity: 1 } - Get l17 // { arity: 6 } - cte l19 = - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } - Reduce group_by=[#0, #2] aggregates=[min((#1 + #3))] // { arity: 3 } - Project (#0, #2, #3, #5) // { arity: 4 } - Join on=(#1{person2id} = #4{person2id}) type=differential // { arity: 6 } + Project (#0..=#3, #9) // { arity: 5 } + Map ((#4 OR #8)) // { arity: 10 } + Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } implementation - %0:l18[#1]Kef » %1:l18[#1]Kef - ArrangeBy keys=[[#1{person2id}]] // { arity: 3 } - Project (#1{person2id}..=#3) // { arity: 3 } - Filter (#0 = false) // { arity: 4 } - Get l18 // { arity: 4 } - ArrangeBy keys=[[#1{person2id}]] // { arity: 3 } - Project (#1{person2id}..=#3) // { arity: 3 } - Filter (#0 = true) // { arity: 4 } - Get l18 // { arity: 4 } + %0:l17[#0..=#2]KKK » %1[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 5 } + Project (#0..=#4) // { arity: 5 } + Get l17 // { arity: 6 } + ArrangeBy keys=[[#0..=#2]] // { arity: 4 } + Union // { arity: 4 } + Map (true) // { arity: 4 } + Get l12 // { arity: 3 } + Project (#0..=#2, #6) // { arity: 4 } + Map (false) // { arity: 7 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + implementation + %1:l11[#0..=#2]UKKK » %0[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Union // { arity: 3 } + Negate // { arity: 3 } + Get l12 // { arity: 3 } + Get l10 // { arity: 3 } + Get l11 // { arity: 3 } + cte l14 = + Reduce aggregates=[min((#0 + #1))] // { arity: 1 } + Project (#1, #3) // { arity: 2 } + Join on=(#0{person2id} = #2{person2id}) type=differential // { arity: 4 } + implementation + %0:l13[#0]Kef » %1:l13[#0]Kef + ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } + Project (#2{person2id}, #3) // { arity: 2 } + Filter (#0 = false) // { arity: 5 } + Get l13 // { arity: 5 } + ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } + Project (#2{person2id}, #3) // { arity: 2 } + Filter (#0 = true) // { arity: 5 } + Get l13 // { arity: 5 } + cte l15 = + Project (#1) // { arity: 1 } + Map ((#0{min} / 2)) // { arity: 2 } + Union // { arity: 1 } + Get l14 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l14 // { arity: 1 } + Constant // { arity: 0 } + - () + cte l16 = + Distinct project=[] // { arity: 0 } + Project () // { arity: 0 } + Filter #1 AND (#0{person2id} = -1) // { arity: 2 } + Get l8 // { arity: 2 } + cte l17 = + Distinct project=[#0..=#5] // { arity: 6 } + Union // { arity: 6 } + Project (#1, #0, #0, #2..=#4) // { arity: 6 } + Map (0, false, 0) // { arity: 5 } + Union // { arity: 2 } + Map (1450, false) // { arity: 2 } + Get l16 // { arity: 0 } + Map (15393162796819, true) // { arity: 2 } + Get l16 // { arity: 0 } + Project (#0..=#3, #7, #8) // { arity: 6 } + Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } + CrossJoin type=delta // { arity: 7 } + implementation + %0:l13 » %1[×]U » %2[×]U + %1 » %2[×]U » %0:l13[×] + %2 » %1[×]U » %0:l13[×] + ArrangeBy keys=[[]] // { arity: 5 } + Get l13 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + TopK limit=1 // { arity: 1 } + Project (#4) // { arity: 1 } + Get l9 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + Union // { arity: 1 } + Get l15 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l15 // { arity: 1 } + Constant // { arity: 0 } + - () Return // { arity: 1 } - Project (#1) // { arity: 1 } - Map (coalesce(#0{min_min}, -1)) // { arity: 2 } - Union // { arity: 1 } - Get l19 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l19 // { arity: 1 } - Constant // { arity: 0 } - - () + With + cte l18 = + Project (#0..=#3) // { arity: 4 } + Join on=(#4 = #5{max}) type=differential // { arity: 6 } + implementation + %1[#0]UK » %0:l17[#4]K + ArrangeBy keys=[[#4]] // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Get l17 // { arity: 6 } + ArrangeBy keys=[[#0{max}]] // { arity: 1 } + Reduce aggregates=[max(#0)] // { arity: 1 } + Project (#5) // { arity: 1 } + Get l17 // { arity: 6 } + cte l19 = + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#2{min}) // { arity: 1 } + Reduce group_by=[#0, #2] aggregates=[min((#1 + #3))] // { arity: 3 } + Project (#0, #2, #3, #5) // { arity: 4 } + Join on=(#1{person2id} = #4{person2id}) type=differential // { arity: 6 } + implementation + %0:l18[#1]Kef » %1:l18[#1]Kef + ArrangeBy keys=[[#1{person2id}]] // { arity: 3 } + Project (#1..=#3) // { arity: 3 } + Filter (#0 = false) // { arity: 4 } + Get l18 // { arity: 4 } + ArrangeBy keys=[[#1{person2id}]] // { arity: 3 } + Project (#1..=#3) // { arity: 3 } + Filter (#0 = true) // { arity: 4 } + Get l18 // { arity: 4 } + Return // { arity: 1 } + Project (#1) // { arity: 1 } + Map (coalesce(#0{min_min}, -1)) // { arity: 2 } + Union // { arity: 1 } + Get l19 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l19 // { arity: 1 } + Constant // { arity: 0 } + - () Used Indexes: - materialize.public.forum_id (*** full scan ***) @@ -3076,7 +3080,7 @@ Explained Query: cte l0 = ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#1{id}) IS NOT NULL // { arity: 11 } ReadIndex on=person person_id=[*** full scan ***] // { arity: 11 } cte l1 = @@ -3086,7 +3090,7 @@ Explained Query: ArrangeBy keys=[[#1{name}]] // { arity: 4 } ReadIndex on=tag tag_name=[lookup] // { arity: 4 } cte l3 = - Project (#0{id}, #2) // { arity: 2 } + Project (#0{id}, #2{messageid}) // { arity: 2 } Join on=(#0{id} = #1{creatorpersonid} AND #2{messageid} = #3{messageid}) type=delta // { arity: 4 } implementation %0:l0 » %1[#0]K » %2[#0]UKA @@ -3095,12 +3099,12 @@ Explained Query: Get l0 // { arity: 1 } ArrangeBy keys=[[#0{creatorpersonid}], [#1{messageid}]] // { arity: 2 } Distinct project=[#1{creatorpersonid}, #0{messageid}] // { arity: 2 } - Project (#1{creatorpersonid}, #9) // { arity: 2 } + Project (#1{messageid}, #9{creatorpersonid}) // { arity: 2 } Filter (2012-10-07 00:00:00 = date_to_timestamp(timestamp_with_time_zone_to_date(#0{creationdate}))) // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id}) type=differential // { arity: 8 } implementation %1:tag[#0]KAe » %0:l1[#2]KAe @@ -3111,7 +3115,7 @@ Explained Query: ArrangeBy keys=[[#1{person1id}], [#2{person2id}]] // { arity: 3 } ReadIndex on=person_knows_person person_knows_person_person1id=[delta join lookup] person_knows_person_person2id=[delta join lookup] // { arity: 3 } cte l5 = - Project (#0{id}, #1{messageid}, #4) // { arity: 3 } + Project (#0{id}, #1{messageid}, #4{person2id}) // { arity: 3 } Join on=(#0{id} = #3{person1id} AND #4{person2id} = #5{id}) type=delta // { arity: 6 } implementation %0:l3 » %1:l4[#1]KA » %2[#0]UKA @@ -3125,7 +3129,7 @@ Explained Query: Project (#0{id}) // { arity: 1 } Get l3 // { arity: 2 } cte l6 = - Project (#0{id}, #2) // { arity: 2 } + Project (#0{id}, #2{messageid}) // { arity: 2 } Join on=(#0{id} = #1{creatorpersonid} AND #2{messageid} = #3{messageid}) type=delta // { arity: 4 } implementation %0:l0 » %1[#0]K » %2[#0]UKA @@ -3134,12 +3138,12 @@ Explained Query: Get l0 // { arity: 1 } ArrangeBy keys=[[#0{creatorpersonid}], [#1{messageid}]] // { arity: 2 } Distinct project=[#1{creatorpersonid}, #0{messageid}] // { arity: 2 } - Project (#1{creatorpersonid}, #9) // { arity: 2 } + Project (#1{messageid}, #9{creatorpersonid}) // { arity: 2 } Filter (2012-12-14 00:00:00 = date_to_timestamp(timestamp_with_time_zone_to_date(#0{creationdate}))) // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id}) type=differential // { arity: 8 } implementation %1:tag[#0]KAe » %0:l1[#2]KAe @@ -3147,7 +3151,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 5 } ReadIndex on=materialize.public.tag tag_name=[lookup value=("Thailand_Noriega")] // { arity: 5 } cte l7 = - Project (#0{id}, #1{messageid}, #4) // { arity: 3 } + Project (#0{id}, #1{messageid}, #4{person2id}) // { arity: 3 } Join on=(#0{id} = #3{person1id} AND #4{person2id} = #5{id}) type=delta // { arity: 6 } implementation %0:l6 » %1:l4[#1]KA » %2[#0]UKA @@ -3249,7 +3253,7 @@ Explained Query: Finish order_by=[#1{count_messageid} desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=10 output=[#0, #1] With cte l0 = - Project (#0{creationdate}, #1{messageid}, #9, #10, #12) // { arity: 5 } + Project (#0{creationdate}, #1{messageid}, #9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 5 } Join on=(#1{messageid} = #13{messageid}) type=differential // { arity: 14 } implementation %1[#0]UKA » %0:message[#1]KA @@ -3257,7 +3261,7 @@ Explained Query: ReadIndex on=message message_messageid=[differential join] // { arity: 13 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id}) type=differential // { arity: 4 } implementation %1[#0]UKA » %0:message_hastag_tag[#2]KA @@ -3272,7 +3276,7 @@ Explained Query: ArrangeBy keys=[[#1{forumid}], [#2{personid}]] // { arity: 3 } ReadIndex on=forum_hasmember_person forum_hasmember_person_forumid=[delta join lookup] forum_hasmember_person_personid=[delta join lookup] // { arity: 3 } cte l2 = - Project (#1{messageid}, #4, #6) // { arity: 3 } + Project (#1{creatorpersonid}, #4{messageid}, #6{containerforumid}) // { arity: 3 } Filter (#2{containerforumid} != #6{containerforumid}) AND (#5{creatorpersonid} != #7{creatorpersonid}) AND ((#0{creationdate} + 12:00:00) < #3{creationdate}) // { arity: 15 } Join on=(#2{containerforumid} = #10{forumid} = #13{forumid} AND #4{messageid} = #8{parentmessageid} AND #5{creatorpersonid} = #14{personid} AND #7{creatorpersonid} = #11{personid}) type=delta // { arity: 15 } implementation @@ -3282,20 +3286,20 @@ Explained Query: %3:l1 » %4:l1[#1]KA » %0:l0[#2]K » %1:l0[#2]K » %2:l0[#0, #1]KK %4:l1 » %3:l1[#1]KA » %0:l0[#2]K » %1:l0[#2]K » %2:l0[#0, #1]KK ArrangeBy keys=[[#2{containerforumid}]] // { arity: 3 } - Project (#0{creationdate}, #2{containerforumid}, #3) // { arity: 3 } + Project (#0{creationdate}, #2{creatorpersonid}, #3{containerforumid}) // { arity: 3 } Get l0 // { arity: 5 } ArrangeBy keys=[[#1{messageid}, #2{creatorpersonid}], [#2{creatorpersonid}]] // { arity: 4 } Project (#0{creationdate}..=#3{containerforumid}) // { arity: 4 } Get l0 // { arity: 5 } ArrangeBy keys=[[#0{creatorpersonid}, #1{parentmessageid}]] // { arity: 2 } - Project (#2, #4) // { arity: 2 } + Project (#2{creatorpersonid}, #4{parentmessageid}) // { arity: 2 } Filter (#4{parentmessageid}) IS NOT NULL // { arity: 5 } Get l0 // { arity: 5 } Get l1 // { arity: 3 } Get l1 // { arity: 3 } cte l3 = Distinct project=[#0{creatorpersonid}, #1{containerforumid}] // { arity: 2 } - Project (#0{creatorpersonid}, #2) // { arity: 2 } + Project (#0{creatorpersonid}, #2{containerforumid}) // { arity: 2 } Get l2 // { arity: 3 } Return // { arity: 2 } Reduce group_by=[#0{creatorpersonid}] aggregates=[count(distinct #1{messageid})] // { arity: 2 } @@ -3316,7 +3320,7 @@ Explained Query: Get l3 // { arity: 2 } ArrangeBy keys=[[#0{personid}, #1{forumid}]] // { arity: 2 } Distinct project=[#1{personid}, #0{forumid}] // { arity: 2 } - Project (#1{personid}, #2) // { arity: 2 } + Project (#1{forumid}, #2{personid}) // { arity: 2 } ReadIndex on=forum_hasmember_person forum_hasmember_person_forumid=[*** full scan ***] // { arity: 3 } Get l3 // { arity: 2 } @@ -3369,21 +3373,21 @@ Explained Query: With cte l0 = ArrangeBy keys=[[#1{person2id}]] // { arity: 2 } - Project (#0{personid}, #9) // { arity: 2 } + Project (#0{personid}, #9{person2id}) // { arity: 2 } Join on=(#0{personid} = #8{person1id} AND #1{tagid} = #2{id}) type=delta // { arity: 10 } implementation %0:person_hasinterest_tag » %1:tag[#0]KAe » %2:person_knows_person[#1]KA %1:tag » %0:person_hasinterest_tag[#1]KA » %2:person_knows_person[#1]KA %2:person_knows_person » %0:person_hasinterest_tag[#0]K » %1:tag[#0]KAe ArrangeBy keys=[[#0{personid}], [#1{tagid}]] // { arity: 2 } - Project (#1{tagid}, #2) // { arity: 2 } + Project (#1{personid}, #2{tagid}) // { arity: 2 } ReadIndex on=person_hasinterest_tag person_hasinterest_tag_tagid=[*** full scan ***] // { arity: 3 } ArrangeBy keys=[[#0{id}]] // { arity: 5 } ReadIndex on=materialize.public.tag tag_name=[lookup value=("Fyodor_Dostoyevsky")] // { arity: 5 } ArrangeBy keys=[[#1{person1id}]] // { arity: 3 } ReadIndex on=person_knows_person person_knows_person_person1id=[delta join lookup] // { arity: 3 } cte l1 = - Project (#0{personid}, #2) // { arity: 2 } + Project (#0{personid}, #2{personid}) // { arity: 2 } Filter (#0{personid} != #2{personid}) // { arity: 4 } Join on=(#1{person2id} = #3{person2id}) type=differential // { arity: 4 } implementation @@ -3412,7 +3416,7 @@ Explained Query: Get l2 // { arity: 2 } ArrangeBy keys=[[#0{person2id}, #1{person1id}]] // { arity: 2 } Distinct project=[#1{person2id}, #0{person1id}] // { arity: 2 } - Project (#1{person2id}, #2) // { arity: 2 } + Project (#1{person1id}, #2{person2id}) // { arity: 2 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } Get l2 // { arity: 2 } @@ -3467,7 +3471,7 @@ materialize.public.pathq19: Project (#0{person1id}, #1{person2id}, #3) // { arity: 3 } Map (greatest(f64toi64(roundf64((40 - sqrtf64(bigint_to_double(#2{count}))))), 1)) // { arity: 4 } Reduce group_by=[#0{person1id}, #1{person2id}] aggregates=[count(*)] // { arity: 3 } - Project (#16, #17) // { arity: 2 } + Project (#16{person1id}, #17{person2id}) // { arity: 2 } Filter (#0{creatorpersonid} != #11{creatorpersonid}) AND (#16{person1id} < #17{person2id}) // { arity: 18 } Join on=(#1{parentmessageid} = #3{messageid} AND #16{person1id} = least(#0{creatorpersonid}, #11{creatorpersonid}) AND #17{person2id} = greatest(#0{creatorpersonid}, #11{creatorpersonid})) type=delta // { arity: 18 } implementation @@ -3475,7 +3479,7 @@ materialize.public.pathq19: %1:message » %0:message[#1]KA » %2:person_knows_person[#1, #2]KKAf %2:person_knows_person » %0:message[×] » %1:message[#1]KA ArrangeBy keys=[[], [#1{parentmessageid}]] // { arity: 2 } - Project (#9, #12) // { arity: 2 } + Project (#9{creatorpersonid}, #12{parentmessageid}) // { arity: 2 } Filter (#12{parentmessageid}) IS NOT NULL // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#1{messageid}]] // { arity: 13 } @@ -3485,7 +3489,7 @@ materialize.public.pathq19: Return // { arity: 3 } Union // { arity: 3 } Get l0 // { arity: 3 } - Project (#1{person1id}, #0{person2id}, #2) // { arity: 3 } + Project (#1{person2id}, #0{person1id}, #2) // { arity: 3 } Get l0 // { arity: 3 } Used Indexes: @@ -3569,7 +3573,7 @@ Explained Query: Project (#1{id}, #1{id}, #12) // { arity: 3 } Map (0) // { arity: 13 } ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } - Project (#0, #4, #6) // { arity: 3 } + Project (#0, #4{dst}, #6) // { arity: 3 } Map ((#2 + bigint_to_double(#5{w}))) // { arity: 7 } Join on=(#1 = #3{src}) type=differential // { arity: 6 } implementation @@ -3591,7 +3595,7 @@ Explained Query: Get l0 // { arity: 3 } ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#1{id}) IS NOT NULL // { arity: 12 } ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } Return // { arity: 3 } @@ -3603,7 +3607,7 @@ Explained Query: Get l1 // { arity: 3 } ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{min}) // { arity: 1 } Get l1 // { arity: 3 } Used Indexes: @@ -3670,101 +3674,103 @@ FROM paths WHERE w = (SELECT MIN(w) FROM paths) ---- Explained Query: - With Mutually Recursive + With cte l0 = ArrangeBy keys=[[#8{locationcityid}]] // { arity: 11 } ReadIndex on=person person_locationcityid=[lookup] // { arity: 11 } cte l1 = - ArrangeBy keys=[[]] // { arity: 1 } + ArrangeBy keys=[[#0{src}]] // { arity: 3 } + ReadIndex on=pathq19 pathq19_src=[delta join lookup] // { arity: 3 } + Return // { arity: 3 } + With Mutually Recursive + cte l2 = + ArrangeBy keys=[[]] // { arity: 1 } + Union // { arity: 1 } + Project (#1) // { arity: 1 } + Map ((#0 / 2)) // { arity: 2 } + Get l7 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l7 // { arity: 1 } + Constant // { arity: 0 } + - () + cte l3 = + TopK group_by=[#0{id}, #1{id}] order_by=[#2 asc nulls_last] limit=1 // { arity: 3 } + Union // { arity: 3 } + Project (#1{id}, #1{id}, #12) // { arity: 3 } + Map (0) // { arity: 13 } + ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } + Project (#0, #5{dst}, #7) // { arity: 3 } + Filter coalesce((#2 < #3), true) // { arity: 8 } + Map ((#2 + bigint_to_double(#6{w}))) // { arity: 8 } + Join on=(#1 = #4{src}) type=delta // { arity: 7 } + implementation + %0:l3 » %2:l1[#0]KA » %1:l2[×] + %1:l2 » %0:l3[×] » %2:l1[#0]KA + %2:l1 » %0:l3[#1]K » %1:l2[×] + ArrangeBy keys=[[], [#1{id}]] // { arity: 3 } + Filter (#1{id}) IS NOT NULL // { arity: 3 } + Get l3 // { arity: 3 } + Get l2 // { arity: 1 } + Get l1 // { arity: 3 } + cte l4 = + TopK group_by=[#0{id}, #1{id}] order_by=[#2 asc nulls_last] limit=1 // { arity: 3 } + Union // { arity: 3 } + Project (#1{id}, #1{id}, #12) // { arity: 3 } + Map (0) // { arity: 13 } + ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } + Project (#0, #5{dst}, #7) // { arity: 3 } + Filter coalesce((#2 < #3), true) // { arity: 8 } + Map ((#2 + bigint_to_double(#6{w}))) // { arity: 8 } + Join on=(#1 = #4{src}) type=delta // { arity: 7 } + implementation + %0:l4 » %2:l1[#0]KA » %1:l2[×] + %1:l2 » %0:l4[×] » %2:l1[#0]KA + %2:l1 » %0:l4[#1]K » %1:l2[×] + ArrangeBy keys=[[], [#1{id}]] // { arity: 3 } + Filter (#1{id}) IS NOT NULL // { arity: 3 } + Get l4 // { arity: 3 } + Get l2 // { arity: 1 } + Get l1 // { arity: 3 } + cte l5 = + Reduce group_by=[#0{id}, #2{id}] aggregates=[min((#1 + #3))] // { arity: 3 } + Project (#0{id}, #2, #3{id}, #5) // { arity: 4 } + Join on=(#1{id} = #4{id}) type=differential // { arity: 6 } + implementation + %0:l3[#1]K » %1:l4[#1]K + ArrangeBy keys=[[#1{id}]] // { arity: 3 } + Filter (#1{id}) IS NOT NULL // { arity: 3 } + Get l3 // { arity: 3 } + ArrangeBy keys=[[#1{id}]] // { arity: 3 } + Filter (#1{id}) IS NOT NULL // { arity: 3 } + Get l4 // { arity: 3 } + cte l6 = + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#2{min}) // { arity: 1 } + Get l5 // { arity: 3 } + cte l7 = Union // { arity: 1 } - Project (#1) // { arity: 1 } - Map ((#0 / 2)) // { arity: 2 } - Get l7 // { arity: 1 } + Get l6 // { arity: 1 } Map (null) // { arity: 1 } Union // { arity: 0 } Negate // { arity: 0 } Project () // { arity: 0 } - Get l7 // { arity: 1 } + Get l6 // { arity: 1 } Constant // { arity: 0 } - () - cte l2 = - ArrangeBy keys=[[#0{src}]] // { arity: 3 } - ReadIndex on=pathq19 pathq19_src=[delta join lookup] // { arity: 3 } - cte l3 = - TopK group_by=[#0{id}, #1{id}] order_by=[#2 asc nulls_last] limit=1 // { arity: 3 } - Union // { arity: 3 } - Project (#1{id}, #1{id}, #12) // { arity: 3 } - Map (0) // { arity: 13 } - ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } - Project (#0, #5, #7) // { arity: 3 } - Filter coalesce((#2 < #3), true) // { arity: 8 } - Map ((#2 + bigint_to_double(#6{w}))) // { arity: 8 } - Join on=(#1 = #4{src}) type=delta // { arity: 7 } - implementation - %0:l3 » %2:l2[#0]KA » %1:l1[×] - %1:l1 » %0:l3[×] » %2:l2[#0]KA - %2:l2 » %0:l3[#1]K » %1:l1[×] - ArrangeBy keys=[[], [#1{id}]] // { arity: 3 } - Filter (#1{id}) IS NOT NULL // { arity: 3 } - Get l3 // { arity: 3 } - Get l1 // { arity: 1 } - Get l2 // { arity: 3 } - cte l4 = - TopK group_by=[#0{id}, #1{id}] order_by=[#2 asc nulls_last] limit=1 // { arity: 3 } - Union // { arity: 3 } - Project (#1{id}, #1{id}, #12) // { arity: 3 } - Map (0) // { arity: 13 } - ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } - Project (#0, #5, #7) // { arity: 3 } - Filter coalesce((#2 < #3), true) // { arity: 8 } - Map ((#2 + bigint_to_double(#6{w}))) // { arity: 8 } - Join on=(#1 = #4{src}) type=delta // { arity: 7 } - implementation - %0:l4 » %2:l2[#0]KA » %1:l1[×] - %1:l1 » %0:l4[×] » %2:l2[#0]KA - %2:l2 » %0:l4[#1]K » %1:l1[×] - ArrangeBy keys=[[], [#1{id}]] // { arity: 3 } - Filter (#1{id}) IS NOT NULL // { arity: 3 } - Get l4 // { arity: 3 } - Get l1 // { arity: 1 } - Get l2 // { arity: 3 } - cte l5 = - Reduce group_by=[#0{id}, #2{id}] aggregates=[min((#1 + #3))] // { arity: 3 } - Project (#0{id}, #2{id}, #3, #5) // { arity: 4 } - Join on=(#1{id} = #4{id}) type=differential // { arity: 6 } - implementation - %0:l3[#1]K » %1:l4[#1]K - ArrangeBy keys=[[#1{id}]] // { arity: 3 } - Filter (#1{id}) IS NOT NULL // { arity: 3 } - Get l3 // { arity: 3 } - ArrangeBy keys=[[#1{id}]] // { arity: 3 } - Filter (#1{id}) IS NOT NULL // { arity: 3 } - Get l4 // { arity: 3 } - cte l6 = - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } - Get l5 // { arity: 3 } - cte l7 = - Union // { arity: 1 } - Get l6 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l6 // { arity: 1 } - Constant // { arity: 0 } - - () - Return // { arity: 3 } - Project (#0{id}..=#2{min}) // { arity: 3 } - Join on=(#2{min} = #3{min_min}) type=differential // { arity: 4 } - implementation - %1[#0]UK » %0:l5[#2]K - ArrangeBy keys=[[#2{min}]] // { arity: 3 } - Get l5 // { arity: 3 } - ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } - Get l5 // { arity: 3 } + Return // { arity: 3 } + Project (#0{id}..=#2{min}) // { arity: 3 } + Join on=(#2{min} = #3{min_min}) type=differential // { arity: 4 } + implementation + %1[#0]UK » %0:l5[#2]K + ArrangeBy keys=[[#2{min}]] // { arity: 3 } + Get l5 // { arity: 3 } + ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#2{min}) // { arity: 1 } + Get l5 // { arity: 3 } Used Indexes: - materialize.public.person_locationcityid (lookup) @@ -3829,172 +3835,174 @@ SELECT * FROM results WHERE w = (SELECT min(w) FROM results) ORDER BY f, t ---- Explained Query: Finish order_by=[#0{id} asc nulls_last, #1{id} asc nulls_last] output=[#0..=#2] - With Mutually Recursive + With cte l0 = - TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Filter (#4 = false) // { arity: 6 } - Get l7 // { arity: 6 } - cte l1 = - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Get l7 // { arity: 6 } - cte l2 = - Project (#0..=#2) // { arity: 3 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1[#0..=#2]UKKKA » %0:l1[#0..=#2]UKKK - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Filter (#1) IS NOT NULL AND (#2) IS NOT NULL // { arity: 3 } - Get l1 // { arity: 3 } - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Filter (#1) IS NOT NULL AND (#2) IS NOT NULL // { arity: 5 } - Get l0 // { arity: 5 } - cte l3 = - TopK group_by=[#0, #1, #2{dst}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } - Distinct project=[#0..=#4] // { arity: 5 } - Union // { arity: 5 } - Project (#3, #4, #1, #7, #8) // { arity: 5 } - Map ((#6 + bigint_to_double(#2{w})), false) // { arity: 9 } - Join on=(#0{src} = #5) type=differential // { arity: 7 } - implementation - %0:pathq19[#0]KA » %1:l0[#2]K - ArrangeBy keys=[[#0{src}]] // { arity: 3 } - ReadIndex on=pathq19 pathq19_src=[differential join] // { arity: 3 } - ArrangeBy keys=[[#2]] // { arity: 4 } - Project (#0..=#3) // { arity: 4 } - Filter (#2) IS NOT NULL // { arity: 5 } - Get l0 // { arity: 5 } - Project (#0..=#3, #9) // { arity: 5 } - Map ((#4 OR #8)) // { arity: 10 } - Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } - implementation - %0:l7[#0..=#2]KKK » %1[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 5 } - Project (#0..=#4) // { arity: 5 } - Get l7 // { arity: 6 } - ArrangeBy keys=[[#0..=#2]] // { arity: 4 } - Union // { arity: 4 } - Map (true) // { arity: 4 } - Get l2 // { arity: 3 } - Project (#0..=#2, #6) // { arity: 4 } - Map (false) // { arity: 7 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1:l1[#0..=#2]UKKK » %0[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Union // { arity: 3 } - Negate // { arity: 3 } - Get l2 // { arity: 3 } - Get l1 // { arity: 3 } - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Get l1 // { arity: 3 } - cte l4 = - Reduce aggregates=[min((#0 + #1))] // { arity: 1 } - Project (#1, #3) // { arity: 2 } - Join on=(#0{dst} = #2{dst}) type=differential // { arity: 4 } - implementation - %0:l3[#0]Kef » %1:l3[#0]Kef - ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = false) AND (#2{dst}) IS NOT NULL // { arity: 5 } - Get l3 // { arity: 5 } - ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = true) AND (#2{dst}) IS NOT NULL // { arity: 5 } - Get l3 // { arity: 5 } - cte l5 = - Project (#1) // { arity: 1 } - Map ((#0{min} / 2)) // { arity: 2 } - Union // { arity: 1 } - Get l4 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l4 // { arity: 1 } - Constant // { arity: 0 } - - () - cte l6 = ArrangeBy keys=[[#8{locationcityid}]] // { arity: 11 } ReadIndex on=person person_locationcityid=[lookup] // { arity: 11 } - cte l7 = - Distinct project=[#0..=#5] // { arity: 6 } - Union // { arity: 6 } - Project (#1{id}, #0, #0, #2{id}..=#4) // { arity: 6 } - Map (0, false, 0) // { arity: 5 } - Union // { arity: 2 } - Project (#1, #12) // { arity: 2 } - Map (false) // { arity: 13 } - ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } - Project (#1, #12) // { arity: 2 } - Map (true) // { arity: 13 } - ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } - Project (#0..=#3, #7, #8) // { arity: 6 } - Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } - CrossJoin type=delta // { arity: 7 } - implementation - %0:l3 » %1[×]U » %2[×]U - %1 » %2[×]U » %0:l3[×] - %2 » %1[×]U » %0:l3[×] - ArrangeBy keys=[[]] // { arity: 5 } - Get l3 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - TopK limit=1 // { arity: 1 } - Project (#4) // { arity: 1 } - Get l0 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - Union // { arity: 1 } - Get l5 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l5 // { arity: 1 } - Constant // { arity: 0 } - - () Return // { arity: 3 } - With - cte l8 = - Project (#0..=#3) // { arity: 4 } - Join on=(#4 = #5{max}) type=differential // { arity: 6 } + With Mutually Recursive + cte l1 = + TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Filter (#4 = false) // { arity: 6 } + Get l7 // { arity: 6 } + cte l2 = + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2{id}) // { arity: 3 } + Get l7 // { arity: 6 } + cte l3 = + Project (#0..=#2) // { arity: 3 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } implementation - %1[#0]UK » %0:l7[#4]K - ArrangeBy keys=[[#4]] // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Filter (#2{id}) IS NOT NULL // { arity: 6 } - Get l7 // { arity: 6 } - ArrangeBy keys=[[#0{max}]] // { arity: 1 } - Reduce aggregates=[max(#0)] // { arity: 1 } - Project (#5) // { arity: 1 } - Get l7 // { arity: 6 } - cte l9 = - Reduce group_by=[#0{id}, #2{id}] aggregates=[min((#1 + #3))] // { arity: 3 } - Project (#0{id}, #2{id}, #3, #5) // { arity: 4 } - Join on=(#1{id} = #4{id}) type=differential // { arity: 6 } + %1[#0..=#2]UKKKA » %0:l2[#0..=#2]UKKK + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Filter (#1) IS NOT NULL AND (#2) IS NOT NULL // { arity: 3 } + Get l2 // { arity: 3 } + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2) // { arity: 3 } + Filter (#1) IS NOT NULL AND (#2) IS NOT NULL // { arity: 5 } + Get l1 // { arity: 5 } + cte l4 = + TopK group_by=[#0, #1, #2{dst}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } + Distinct project=[#0..=#4] // { arity: 5 } + Union // { arity: 5 } + Project (#3, #4, #1{dst}, #7, #8) // { arity: 5 } + Map ((#6 + bigint_to_double(#2{w})), false) // { arity: 9 } + Join on=(#0{src} = #5) type=differential // { arity: 7 } + implementation + %0:pathq19[#0]KA » %1:l1[#2]K + ArrangeBy keys=[[#0{src}]] // { arity: 3 } + ReadIndex on=pathq19 pathq19_src=[differential join] // { arity: 3 } + ArrangeBy keys=[[#2]] // { arity: 4 } + Project (#0..=#3) // { arity: 4 } + Filter (#2) IS NOT NULL // { arity: 5 } + Get l1 // { arity: 5 } + Project (#0..=#3, #9) // { arity: 5 } + Map ((#4 OR #8)) // { arity: 10 } + Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } + implementation + %0:l7[#0..=#2]KKK » %1[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 5 } + Project (#0..=#4) // { arity: 5 } + Get l7 // { arity: 6 } + ArrangeBy keys=[[#0..=#2]] // { arity: 4 } + Union // { arity: 4 } + Map (true) // { arity: 4 } + Get l3 // { arity: 3 } + Project (#0..=#2, #6) // { arity: 4 } + Map (false) // { arity: 7 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + implementation + %1:l2[#0..=#2]UKKK » %0[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Union // { arity: 3 } + Negate // { arity: 3 } + Get l3 // { arity: 3 } + Get l2 // { arity: 3 } + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Get l2 // { arity: 3 } + cte l5 = + Reduce aggregates=[min((#0 + #1))] // { arity: 1 } + Project (#1, #3) // { arity: 2 } + Join on=(#0{dst} = #2{dst}) type=differential // { arity: 4 } implementation - %0:l8[#1]Kef » %1:l8[#1]Kef - ArrangeBy keys=[[#1{id}]] // { arity: 3 } - Project (#1{id}..=#3) // { arity: 3 } - Filter (#0 = false) // { arity: 4 } - Get l8 // { arity: 4 } - ArrangeBy keys=[[#1{id}]] // { arity: 3 } - Project (#1{id}..=#3) // { arity: 3 } - Filter (#0 = true) // { arity: 4 } - Get l8 // { arity: 4 } + %0:l4[#0]Kef » %1:l4[#0]Kef + ArrangeBy keys=[[#0{dst}]] // { arity: 2 } + Project (#2{dst}, #3) // { arity: 2 } + Filter (#0 = false) AND (#2{dst}) IS NOT NULL // { arity: 5 } + Get l4 // { arity: 5 } + ArrangeBy keys=[[#0{dst}]] // { arity: 2 } + Project (#2{dst}, #3) // { arity: 2 } + Filter (#0 = true) AND (#2{dst}) IS NOT NULL // { arity: 5 } + Get l4 // { arity: 5 } + cte l6 = + Project (#1) // { arity: 1 } + Map ((#0{min} / 2)) // { arity: 2 } + Union // { arity: 1 } + Get l5 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l5 // { arity: 1 } + Constant // { arity: 0 } + - () + cte l7 = + Distinct project=[#0..=#5] // { arity: 6 } + Union // { arity: 6 } + Project (#1, #0{id}, #0{id}, #2..=#4) // { arity: 6 } + Map (0, false, 0) // { arity: 5 } + Union // { arity: 2 } + Project (#1{id}, #12) // { arity: 2 } + Map (false) // { arity: 13 } + ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } + Project (#1{id}, #12) // { arity: 2 } + Map (true) // { arity: 13 } + ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } + Project (#0..=#3, #7, #8) // { arity: 6 } + Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } + CrossJoin type=delta // { arity: 7 } + implementation + %0:l4 » %1[×]U » %2[×]U + %1 » %2[×]U » %0:l4[×] + %2 » %1[×]U » %0:l4[×] + ArrangeBy keys=[[]] // { arity: 5 } + Get l4 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + TopK limit=1 // { arity: 1 } + Project (#4) // { arity: 1 } + Get l1 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + Union // { arity: 1 } + Get l6 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l6 // { arity: 1 } + Constant // { arity: 0 } + - () Return // { arity: 3 } - Project (#0{id}..=#2{min}) // { arity: 3 } - Join on=(#2{min} = #3{min_min}) type=differential // { arity: 4 } - implementation - %1[#0]UK » %0:l9[#2]K - ArrangeBy keys=[[#2{min}]] // { arity: 3 } - Get l9 // { arity: 3 } - ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } - Get l9 // { arity: 3 } + With + cte l8 = + Project (#0..=#3) // { arity: 4 } + Join on=(#4 = #5{max}) type=differential // { arity: 6 } + implementation + %1[#0]UK » %0:l7[#4]K + ArrangeBy keys=[[#4]] // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Filter (#2{id}) IS NOT NULL // { arity: 6 } + Get l7 // { arity: 6 } + ArrangeBy keys=[[#0{max}]] // { arity: 1 } + Reduce aggregates=[max(#0)] // { arity: 1 } + Project (#5) // { arity: 1 } + Get l7 // { arity: 6 } + cte l9 = + Reduce group_by=[#0{id}, #2{id}] aggregates=[min((#1 + #3))] // { arity: 3 } + Project (#0{id}, #2, #3{id}, #5) // { arity: 4 } + Join on=(#1{id} = #4{id}) type=differential // { arity: 6 } + implementation + %0:l8[#1]Kef » %1:l8[#1]Kef + ArrangeBy keys=[[#1{id}]] // { arity: 3 } + Project (#1{id}..=#3) // { arity: 3 } + Filter (#0 = false) // { arity: 4 } + Get l8 // { arity: 4 } + ArrangeBy keys=[[#1{id}]] // { arity: 3 } + Project (#1{id}..=#3) // { arity: 3 } + Filter (#0 = true) // { arity: 4 } + Get l8 // { arity: 4 } + Return // { arity: 3 } + Project (#0{id}..=#2{min}) // { arity: 3 } + Join on=(#2{min} = #3{min_min}) type=differential // { arity: 4 } + implementation + %1[#0]UK » %0:l9[#2]K + ArrangeBy keys=[[#2{min}]] // { arity: 3 } + Get l9 // { arity: 3 } + ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#2{min}) // { arity: 1 } + Get l9 // { arity: 3 } Used Indexes: - materialize.public.person_locationcityid (lookup) @@ -4061,18 +4069,18 @@ Explained Query: Finish order_by=[#0{dst} asc nulls_last] limit=20 output=[#0, #1] With Mutually Recursive cte l0 = - Project (#2{min}, #0, #1{dst}) // { arity: 3 } + Project (#2, #0{dst}, #1{min}) // { arity: 3 } Map (10995116285979) // { arity: 3 } Reduce group_by=[#0{dst}] aggregates=[min(#1)] // { arity: 2 } Distinct project=[#0{dst}, #1] // { arity: 2 } Union // { arity: 2 } - Project (#3, #5) // { arity: 2 } + Project (#3{dst}, #5) // { arity: 2 } Map ((#1 + integer_to_bigint(#4{w}))) // { arity: 6 } Join on=(#0 = #2{src}) type=differential // { arity: 5 } implementation %0:l0[#0]UK » %1:pathq20[#0]KA ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#1{min}, #2) // { arity: 2 } + Project (#1{dst}, #2{min}) // { arity: 2 } Get l0 // { arity: 3 } ArrangeBy keys=[[#0{src}]] // { arity: 3 } ReadIndex on=pathq20 pathq20_src=[differential join] // { arity: 3 } @@ -4086,11 +4094,11 @@ Explained Query: implementation %1[#0]UKA » %0:l0[#0]UK ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#1{min}, #2) // { arity: 2 } + Project (#1{dst}, #2{min}) // { arity: 2 } Get l0 // { arity: 3 } ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Distinct project=[#0{personid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{personid}) // { arity: 1 } Filter (#5{name} = "Balkh_Airlines") // { arity: 8 } Join on=(#2{companyid} = #4{id}) type=differential // { arity: 8 } implementation @@ -4100,7 +4108,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 4 } ReadIndex on=company company_id=[differential join] // { arity: 4 } cte l2 = - Project (#1) // { arity: 1 } + Project (#1{min}) // { arity: 1 } Get l1 // { arity: 2 } Return // { arity: 2 } Project (#0{dst}, #1{min}) // { arity: 2 } @@ -4172,7 +4180,7 @@ Explained Query: Reduce group_by=[#0{dst}] aggregates=[min(#1)] // { arity: 2 } Distinct project=[#0{dst}, #1] // { arity: 2 } Union // { arity: 2 } - Project (#3, #5) // { arity: 2 } + Project (#3{dst}, #5) // { arity: 2 } Map ((#1 + integer_to_bigint(#4{w}))) // { arity: 6 } Join on=(#0 = #2{src}) type=differential // { arity: 5 } implementation @@ -4194,7 +4202,7 @@ Explained Query: Get l0 // { arity: 2 } ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Distinct project=[#0{personid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{personid}) // { arity: 1 } Filter (#5{name} = "Balkh_Airlines") // { arity: 8 } Join on=(#2{companyid} = #4{id}) type=differential // { arity: 8 } implementation @@ -4204,7 +4212,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 4 } ReadIndex on=company company_id=[differential join] // { arity: 4 } cte l2 = - Project (#1) // { arity: 1 } + Project (#1{min}) // { arity: 1 } Get l1 // { arity: 2 } Return // { arity: 2 } Project (#0{dst}, #1{min}) // { arity: 2 } @@ -4278,19 +4286,19 @@ Explained Query: Finish order_by=[#0{dst} asc nulls_last] limit=20 output=[#0..=#2] With Mutually Recursive cte l0 = - Project (#3{min}, #0..=#2{min}) // { arity: 4 } + Project (#3, #0{dst}..=#2{min}) // { arity: 4 } Map (10995116285979) // { arity: 4 } Reduce group_by=[#0{dst}, #2{min}] aggregates=[min(#1)] // { arity: 3 } Reduce group_by=[#0{dst}, #2] aggregates=[min(#1)] // { arity: 3 } Distinct project=[#0{dst}..=#2] // { arity: 3 } Union // { arity: 3 } - Project (#4, #6, #7) // { arity: 3 } + Project (#4{dst}, #6, #7) // { arity: 3 } Map ((#1 + integer_to_bigint(#5{w})), (#2 + 1)) // { arity: 8 } Join on=(#0 = #3{src}) type=differential // { arity: 6 } implementation %1:pathq20[#0]KA » %0:l0[#0]K ArrangeBy keys=[[#0{dst}]] // { arity: 3 } - Project (#1{min}..=#3) // { arity: 3 } + Project (#1{dst}..=#3{min}) // { arity: 3 } Get l0 // { arity: 4 } ArrangeBy keys=[[#0{src}]] // { arity: 3 } ReadIndex on=pathq20 pathq20_src=[differential join] // { arity: 3 } @@ -4304,11 +4312,11 @@ Explained Query: implementation %1[#0]UKA » %0:l0[#0]K ArrangeBy keys=[[#0{dst}]] // { arity: 3 } - Project (#1{min}..=#3) // { arity: 3 } + Project (#1{dst}..=#3{min}) // { arity: 3 } Get l0 // { arity: 4 } ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Distinct project=[#0{personid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{personid}) // { arity: 1 } Filter (#5{name} = "Balkh_Airlines") // { arity: 8 } Join on=(#2{companyid} = #4{id}) type=differential // { arity: 8 } implementation @@ -4318,7 +4326,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 4 } ReadIndex on=company company_id=[differential join] // { arity: 4 } cte l2 = - Project (#1) // { arity: 1 } + Project (#1{min}) // { arity: 1 } Get l1 // { arity: 3 } Return // { arity: 3 } Project (#0{dst}..=#2{min}) // { arity: 3 } @@ -4427,9 +4435,9 @@ SELECT t, w FROM results WHERE w = (SELECT min(w) FROM results) ORDER BY t LIMIT ---- Explained Query: Finish order_by=[#0{personid} asc nulls_last] limit=20 output=[#0, #1] - With Mutually Recursive + With cte l0 = - Project (#1) // { arity: 1 } + Project (#1{personid}) // { arity: 1 } Filter (#5{name} = "Balkh_Airlines") // { arity: 8 } Join on=(#2{companyid} = #4{id}) type=differential // { arity: 8 } implementation @@ -4444,207 +4452,209 @@ Explained Query: cte l2 = ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Get l0 // { arity: 1 } - cte l3 = - Distinct project=[#0{dst}] // { arity: 1 } - Union // { arity: 1 } - Project (#2) // { arity: 1 } - Join on=(#0 = #1{src}) type=delta // { arity: 4 } + Return // { arity: 2 } + With Mutually Recursive + cte l3 = + Distinct project=[#0{dst}] // { arity: 1 } + Union // { arity: 1 } + Project (#2{dst}) // { arity: 1 } + Join on=(#0 = #1{src}) type=delta // { arity: 4 } + implementation + %0:l3 » %1:l1[#0]KA » %2[×] + %1:l1 » %0:l3[#0]UK » %2[×] + %2 » %0:l3[×] » %1:l1[#0]KA + ArrangeBy keys=[[], [#0{dst}]] // { arity: 1 } + Get l3 // { arity: 1 } + Get l1 // { arity: 3 } + ArrangeBy keys=[[]] // { arity: 0 } + Union // { arity: 0 } + Negate // { arity: 0 } + Distinct project=[] // { arity: 0 } + Project () // { arity: 0 } + Join on=(#0{dst} = #1{personid}) type=differential // { arity: 2 } + implementation + %0:l3[#0]UK » %1:l2[#0]K + ArrangeBy keys=[[#0{dst}]] // { arity: 1 } + Get l3 // { arity: 1 } + Get l2 // { arity: 1 } + Constant // { arity: 0 } + - () + Constant // { arity: 1 } + - (10995116285979) + cte l4 = + TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Filter (#4 = false) // { arity: 6 } + Get l12 // { arity: 6 } + cte l5 = + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2{personid}) // { arity: 3 } + Get l12 // { arity: 6 } + cte l6 = + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Get l5 // { arity: 3 } + cte l7 = + Project (#0..=#2) // { arity: 3 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + implementation + %1[#0..=#2]UKKKA » %0:l6[#0..=#2]UKKK + Get l6 // { arity: 3 } + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2) // { arity: 3 } + Get l4 // { arity: 5 } + cte l8 = + TopK group_by=[#0, #1, #2{dst}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } + Union // { arity: 5 } + Project (#3, #4, #1{dst}, #7, #8) // { arity: 5 } + Map ((#6 + integer_to_bigint(#2{w})), false) // { arity: 9 } + Join on=(#0{src} = #5) type=differential // { arity: 7 } + implementation + %0:l1[#0]KA » %1:l4[#2]K + Get l1 // { arity: 3 } + ArrangeBy keys=[[#2]] // { arity: 4 } + Project (#0..=#3) // { arity: 4 } + Get l4 // { arity: 5 } + Project (#0..=#3, #9) // { arity: 5 } + Map ((#4 OR #8)) // { arity: 10 } + Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } + implementation + %0:l12[#0..=#2]KKK » %1[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 5 } + Project (#0..=#4) // { arity: 5 } + Get l12 // { arity: 6 } + ArrangeBy keys=[[#0..=#2]] // { arity: 4 } + Union // { arity: 4 } + Map (true) // { arity: 4 } + Get l7 // { arity: 3 } + Project (#0..=#2, #6) // { arity: 4 } + Map (false) // { arity: 7 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + implementation + %1:l6[#0..=#2]UKKK » %0[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Union // { arity: 3 } + Negate // { arity: 3 } + Get l7 // { arity: 3 } + Get l5 // { arity: 3 } + Get l6 // { arity: 3 } + cte l9 = + Reduce aggregates=[min((#0 + #1))] // { arity: 1 } + Project (#1, #3) // { arity: 2 } + Join on=(#0{dst} = #2{dst}) type=differential // { arity: 4 } implementation - %0:l3 » %1:l1[#0]KA » %2[×] - %1:l1 » %0:l3[#0]UK » %2[×] - %2 » %0:l3[×] » %1:l1[#0]KA - ArrangeBy keys=[[], [#0{dst}]] // { arity: 1 } - Get l3 // { arity: 1 } - Get l1 // { arity: 3 } - ArrangeBy keys=[[]] // { arity: 0 } + %0:l8[#0]Kef » %1:l8[#0]Kef + ArrangeBy keys=[[#0{dst}]] // { arity: 2 } + Project (#2{dst}, #3) // { arity: 2 } + Filter (#0 = false) // { arity: 5 } + Get l8 // { arity: 5 } + ArrangeBy keys=[[#0{dst}]] // { arity: 2 } + Project (#2{dst}, #3) // { arity: 2 } + Filter (#0 = true) // { arity: 5 } + Get l8 // { arity: 5 } + cte l10 = + Project (#1) // { arity: 1 } + Map ((#0{min} / 2)) // { arity: 2 } + Union // { arity: 1 } + Get l9 // { arity: 1 } + Map (null) // { arity: 1 } Union // { arity: 0 } Negate // { arity: 0 } - Distinct project=[] // { arity: 0 } - Project () // { arity: 0 } - Join on=(#0{dst} = #1{personid}) type=differential // { arity: 2 } - implementation - %0:l3[#0]UK » %1:l2[#0]K - ArrangeBy keys=[[#0{dst}]] // { arity: 1 } - Get l3 // { arity: 1 } - Get l2 // { arity: 1 } + Project () // { arity: 0 } + Get l9 // { arity: 1 } Constant // { arity: 0 } - () - Constant // { arity: 1 } - - (10995116285979) - cte l4 = - TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Filter (#4 = false) // { arity: 6 } - Get l12 // { arity: 6 } - cte l5 = - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Get l12 // { arity: 6 } - cte l6 = - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Get l5 // { arity: 3 } - cte l7 = - Project (#0..=#2) // { arity: 3 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1[#0..=#2]UKKKA » %0:l6[#0..=#2]UKKK - Get l6 // { arity: 3 } - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Get l4 // { arity: 5 } - cte l8 = - TopK group_by=[#0, #1, #2{dst}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } - Union // { arity: 5 } - Project (#3, #4, #1, #7, #8) // { arity: 5 } - Map ((#6 + integer_to_bigint(#2{w})), false) // { arity: 9 } - Join on=(#0{src} = #5) type=differential // { arity: 7 } - implementation - %0:l1[#0]KA » %1:l4[#2]K - Get l1 // { arity: 3 } - ArrangeBy keys=[[#2]] // { arity: 4 } - Project (#0..=#3) // { arity: 4 } - Get l4 // { arity: 5 } - Project (#0..=#3, #9) // { arity: 5 } - Map ((#4 OR #8)) // { arity: 10 } - Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } - implementation - %0:l12[#0..=#2]KKK » %1[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 5 } - Project (#0..=#4) // { arity: 5 } - Get l12 // { arity: 6 } - ArrangeBy keys=[[#0..=#2]] // { arity: 4 } - Union // { arity: 4 } - Map (true) // { arity: 4 } - Get l7 // { arity: 3 } - Project (#0..=#2, #6) // { arity: 4 } - Map (false) // { arity: 7 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + cte l11 = + Distinct project=[] // { arity: 0 } + Project () // { arity: 0 } + Join on=(#0{dst} = #1{personid}) type=differential // { arity: 2 } + implementation + %0:l3[#0]UK » %1:l2[#0]K + ArrangeBy keys=[[#0{dst}]] // { arity: 1 } + Get l3 // { arity: 1 } + Get l2 // { arity: 1 } + cte l12 = + Distinct project=[#0..=#5] // { arity: 6 } + Union // { arity: 6 } + Project (#0..=#2{personid}, #4, #3, #5) // { arity: 6 } + Map (false, 0, 0) // { arity: 6 } + Distinct project=[#0..=#2{personid}] // { arity: 3 } + Union // { arity: 3 } + Project (#1, #0, #0) // { arity: 3 } + Map (10995116285979, false) // { arity: 2 } + Get l11 // { arity: 0 } + Project (#1, #0{personid}, #0{personid}) // { arity: 3 } + Map (true) // { arity: 2 } + CrossJoin type=differential // { arity: 1 } implementation - %1:l6[#0..=#2]UKKK » %0[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Union // { arity: 3 } - Negate // { arity: 3 } - Get l7 // { arity: 3 } - Get l5 // { arity: 3 } - Get l6 // { arity: 3 } - cte l9 = - Reduce aggregates=[min((#0 + #1))] // { arity: 1 } - Project (#1, #3) // { arity: 2 } - Join on=(#0{dst} = #2{dst}) type=differential // { arity: 4 } - implementation - %0:l8[#0]Kef » %1:l8[#0]Kef - ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = false) // { arity: 5 } - Get l8 // { arity: 5 } - ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = true) // { arity: 5 } - Get l8 // { arity: 5 } - cte l10 = - Project (#1) // { arity: 1 } - Map ((#0{min} / 2)) // { arity: 2 } - Union // { arity: 1 } - Get l9 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l9 // { arity: 1 } - Constant // { arity: 0 } - - () - cte l11 = - Distinct project=[] // { arity: 0 } - Project () // { arity: 0 } - Join on=(#0{dst} = #1{personid}) type=differential // { arity: 2 } - implementation - %0:l3[#0]UK » %1:l2[#0]K - ArrangeBy keys=[[#0{dst}]] // { arity: 1 } - Get l3 // { arity: 1 } - Get l2 // { arity: 1 } - cte l12 = - Distinct project=[#0..=#5] // { arity: 6 } - Union // { arity: 6 } - Project (#0..=#2{personid}, #4, #3, #5) // { arity: 6 } - Map (false, 0, 0) // { arity: 6 } - Distinct project=[#0..=#2{personid}] // { arity: 3 } - Union // { arity: 3 } - Project (#1, #0, #0) // { arity: 3 } - Map (10995116285979, false) // { arity: 2 } - Get l11 // { arity: 0 } - Project (#1{personid}, #0, #0) // { arity: 3 } - Map (true) // { arity: 2 } - CrossJoin type=differential // { arity: 1 } - implementation - %1:l11[×]U » %0:l0[×] - ArrangeBy keys=[[]] // { arity: 1 } - Get l0 // { arity: 1 } - ArrangeBy keys=[[]] // { arity: 0 } - Get l11 // { arity: 0 } - Project (#0..=#3, #7, #8) // { arity: 6 } - Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } - CrossJoin type=delta // { arity: 7 } - implementation - %0:l8 » %1[×]U » %2[×]U - %1 » %2[×]U » %0:l8[×] - %2 » %1[×]U » %0:l8[×] - ArrangeBy keys=[[]] // { arity: 5 } - Get l8 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - TopK limit=1 // { arity: 1 } - Project (#4) // { arity: 1 } - Get l4 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - Union // { arity: 1 } - Get l10 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l10 // { arity: 1 } - Constant // { arity: 0 } - - () - Return // { arity: 2 } - With - cte l13 = - Project (#0..=#3) // { arity: 4 } - Join on=(#4 = #5{max}) type=differential // { arity: 6 } - implementation - %1[#0]UK » %0:l12[#4]K - ArrangeBy keys=[[#4]] // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Get l12 // { arity: 6 } - ArrangeBy keys=[[#0{max}]] // { arity: 1 } - Reduce aggregates=[max(#0)] // { arity: 1 } - Project (#5) // { arity: 1 } - Get l12 // { arity: 6 } - cte l14 = - Project (#1{min}, #2) // { arity: 2 } - Reduce group_by=[#0{personid}, #2{personid}] aggregates=[min((#1 + #3))] // { arity: 3 } - Project (#0{personid}, #2{personid}, #3, #5) // { arity: 4 } - Join on=(#1{personid} = #4{personid}) type=differential // { arity: 6 } - implementation - %0:l13[#1]Kef » %1:l13[#1]Kef - ArrangeBy keys=[[#1{personid}]] // { arity: 3 } - Project (#1{personid}..=#3) // { arity: 3 } - Filter (#0 = false) // { arity: 4 } - Get l13 // { arity: 4 } - ArrangeBy keys=[[#1{personid}]] // { arity: 3 } - Project (#1{personid}..=#3) // { arity: 3 } - Filter (#0 = true) // { arity: 4 } - Get l13 // { arity: 4 } + %1:l11[×]U » %0:l0[×] + ArrangeBy keys=[[]] // { arity: 1 } + Get l0 // { arity: 1 } + ArrangeBy keys=[[]] // { arity: 0 } + Get l11 // { arity: 0 } + Project (#0..=#3, #7, #8) // { arity: 6 } + Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } + CrossJoin type=delta // { arity: 7 } + implementation + %0:l8 » %1[×]U » %2[×]U + %1 » %2[×]U » %0:l8[×] + %2 » %1[×]U » %0:l8[×] + ArrangeBy keys=[[]] // { arity: 5 } + Get l8 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + TopK limit=1 // { arity: 1 } + Project (#4) // { arity: 1 } + Get l4 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + Union // { arity: 1 } + Get l10 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l10 // { arity: 1 } + Constant // { arity: 0 } + - () Return // { arity: 2 } - Project (#0{personid}, #1{min}) // { arity: 2 } - Join on=(#1{min} = #2{min_min}) type=differential // { arity: 3 } - implementation - %1[#0]UK » %0:l14[#1]K - ArrangeBy keys=[[#1{min}]] // { arity: 2 } - Get l14 // { arity: 2 } - ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#1) // { arity: 1 } - Get l14 // { arity: 2 } + With + cte l13 = + Project (#0..=#3) // { arity: 4 } + Join on=(#4 = #5{max}) type=differential // { arity: 6 } + implementation + %1[#0]UK » %0:l12[#4]K + ArrangeBy keys=[[#4]] // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Get l12 // { arity: 6 } + ArrangeBy keys=[[#0{max}]] // { arity: 1 } + Reduce aggregates=[max(#0)] // { arity: 1 } + Project (#5) // { arity: 1 } + Get l12 // { arity: 6 } + cte l14 = + Project (#1{personid}, #2{min}) // { arity: 2 } + Reduce group_by=[#0{personid}, #2{personid}] aggregates=[min((#1 + #3))] // { arity: 3 } + Project (#0{personid}, #2, #3{personid}, #5) // { arity: 4 } + Join on=(#1{personid} = #4{personid}) type=differential // { arity: 6 } + implementation + %0:l13[#1]Kef » %1:l13[#1]Kef + ArrangeBy keys=[[#1{personid}]] // { arity: 3 } + Project (#1{personid}..=#3) // { arity: 3 } + Filter (#0 = false) // { arity: 4 } + Get l13 // { arity: 4 } + ArrangeBy keys=[[#1{personid}]] // { arity: 3 } + Project (#1{personid}..=#3) // { arity: 3 } + Filter (#0 = true) // { arity: 4 } + Get l13 // { arity: 4 } + Return // { arity: 2 } + Project (#0{personid}, #1{min}) // { arity: 2 } + Join on=(#1{min} = #2{min_min}) type=differential // { arity: 3 } + implementation + %1[#0]UK » %0:l14[#1]K + ArrangeBy keys=[[#1{min}]] // { arity: 2 } + Get l14 // { arity: 2 } + ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#1{min}) // { arity: 1 } + Get l14 // { arity: 2 } Used Indexes: - materialize.public.person_workat_company_companyid (differential join) diff --git a/test/sqllogictest/ldbc_bi_eager.slt b/test/sqllogictest/ldbc_bi_eager.slt index efac3c35ef148..d47e2b0355e3b 100644 --- a/test/sqllogictest/ldbc_bi_eager.slt +++ b/test/sqllogictest/ldbc_bi_eager.slt @@ -455,14 +455,14 @@ Explained Query: Filter (#0{creationdate} < 2010-06-11 09:21:46 UTC) // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } Return // { arity: 7 } - Project (#0..=#2, #4, #7, #5{sum}, #8) // { arity: 7 } + Project (#0..=#2, #4{count}, #7, #5{sum}, #8) // { arity: 7 } Map ((#5{sum} / bigint_to_numeric(case when (#6{count} = 0) then null else #6{count} end)), (bigint_to_numeric(#4{count}) / #3)) // { arity: 9 } Reduce group_by=[#1..=#4] aggregates=[count(*), sum(integer_to_bigint(#0{length})), count(integer_to_bigint(#0{length}))] // { arity: 7 } CrossJoin type=differential // { arity: 5 } implementation %1[×]U » %0:message[×]if ArrangeBy keys=[[]] // { arity: 4 } - Project (#8, #13..=#15) // { arity: 4 } + Project (#8{length}, #13..=#15) // { arity: 4 } Filter (#0{creationdate} < 2010-06-11 09:21:46 UTC) AND (#4{content}) IS NOT NULL // { arity: 16 } Map (extract_year_tstz(#0{creationdate}), (#12{parentmessageid}) IS NOT NULL, case when (#8{length} < 40) then 0 else case when (#8{length} < 80) then 1 else case when (#8{length} < 160) then 2 else 3 end end end) // { arity: 16 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } @@ -527,7 +527,7 @@ Explained Query: Finish order_by=[#3 desc nulls_first, #0{name} asc nulls_last] limit=100 output=[#0..=#3] With cte l0 = - Project (#5, #6) // { arity: 2 } + Project (#5{id}, #6{name}) // { arity: 2 } Join on=(#0{id} = #8{typetagclassid}) type=differential // { arity: 9 } implementation %0:tagclass[#0]KAe » %1:tag[#3]KAe @@ -539,7 +539,7 @@ Explained Query: Filter (#0{id}) IS NOT NULL // { arity: 2 } Get l0 // { arity: 2 } cte l2 = - Project (#1{count}, #3, #4) // { arity: 3 } + Project (#1{name}, #3{count}, #4{count}) // { arity: 3 } Join on=(#0{id} = #2{id}) type=differential // { arity: 5 } implementation %1[#0]UKA » %0:l1[#0]K @@ -547,7 +547,7 @@ Explained Query: Get l1 // { arity: 2 } ArrangeBy keys=[[#0{id}]] // { arity: 3 } Reduce group_by=[#0{id}] aggregates=[count(case when (#2{creationdate} < 2010-09-16 00:00:00 UTC) then #1{messageid} else null end), count(case when (#2{creationdate} >= 2010-09-16 00:00:00 UTC) then #1{messageid} else null end)] // { arity: 3 } - Project (#0{id}, #2{creationdate}, #4) // { arity: 3 } + Project (#0{id}, #2{messageid}, #4{creationdate}) // { arity: 3 } Filter (#4{creationdate} < 2010-12-25 00:00:00 UTC) AND (#4{creationdate} >= 2010-06-08 00:00:00 UTC) // { arity: 17 } Join on=(#0{id} = #3{tagid} AND #2{messageid} = #5{messageid}) type=delta // { arity: 17 } implementation @@ -570,7 +570,7 @@ Explained Query: Negate // { arity: 1 } Project (#0{name}) // { arity: 1 } Get l2 // { arity: 3 } - Project (#1) // { arity: 1 } + Project (#1{name}) // { arity: 1 } Get l0 // { arity: 2 } Get l2 // { arity: 3 } @@ -623,7 +623,7 @@ LIMIT 20 Explained Query: Finish order_by=[#4{count} desc nulls_first, #0{containerforumid} asc nulls_last] limit=20 output=[#0..=#4] Reduce group_by=[#0{containerforumid}, #2{title}, #1{creationdate}, #3{moderatorpersonid}] aggregates=[count(*)] // { arity: 5 } - Project (#10, #13, #15, #16) // { arity: 4 } + Project (#10{containerforumid}, #13{creationdate}, #15{title}, #16{moderatorpersonid}) // { arity: 4 } Filter (#33{name} = "China") AND (#16{moderatorpersonid}) IS NOT NULL AND (#31{partofcountryid}) IS NOT NULL // { arity: 37 } Join on=(#1{messageid} = #36{messageid} AND #10{containerforumid} = #14{id} AND #16{moderatorpersonid} = #18{id} AND #25{locationcityid} = #28{id} AND #31{partofcountryid} = #32{id}) type=delta // { arity: 37 } implementation @@ -645,7 +645,7 @@ Explained Query: ReadIndex on=country country_id=[delta join lookup] // { arity: 4 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#10) // { arity: 1 } + Project (#10{messageid}) // { arity: 1 } Join on=(#0{id} = #8{typetagclassid} AND #5{id} = #11{tagid}) type=delta // { arity: 12 } implementation %0:tagclass » %1:tag[#3]KA » %2:message_hastag_tag[#2]KA @@ -752,7 +752,7 @@ Explained Query: cte l0 = Project (#0{id}) // { arity: 1 } TopK order_by=[#1{maxnumberofmembers} desc nulls_first, #0{id} asc nulls_last] limit=100 // { arity: 2 } - Project (#0{id}, #2) // { arity: 2 } + Project (#0{id}, #2{maxnumberofmembers}) // { arity: 2 } Filter (#1{creationdate} > 2010-02-12 00:00:00 UTC) // { arity: 3 } ReadIndex on=top100popularforumsq04 top100popularforumsq04_id=[*** full scan ***] // { arity: 3 } cte l1 = @@ -764,7 +764,7 @@ Explained Query: ReadIndex on=person person_id=[differential join] // { arity: 11 } ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Distinct project=[#0{personid}] // { arity: 1 } - Project (#3) // { arity: 1 } + Project (#3{personid}) // { arity: 1 } Join on=(#0{id} = #2{forumid}) type=differential // { arity: 4 } implementation %1:forum_hasmember_person[#1]KA » %0:l0[#0]K @@ -776,7 +776,7 @@ Explained Query: ArrangeBy keys=[[#1{id}]] // { arity: 4 } Get l1 // { arity: 4 } cte l3 = - Project (#0{creationdate}..=#3{lastname}, #5) // { arity: 5 } + Project (#0{creationdate}..=#3{lastname}, #5{messageid}) // { arity: 5 } Join on=(#1{id} = #13{creatorpersonid} AND #14{containerforumid} = #17{id}) type=delta // { arity: 18 } implementation %0:l2 » %1:message[#9]KA » %2[#0]UKA @@ -801,7 +801,7 @@ Explained Query: Get l2 // { arity: 4 } ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Get l3 // { arity: 5 } Get l1 // { arity: 4 } Get l3 // { arity: 5 } @@ -853,7 +853,7 @@ Explained Query: Finish order_by=[#4 desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=100 output=[#0..=#4] With cte l0 = - Project (#1{messageid}, #5, #16) // { arity: 3 } + Project (#1{name}, #5{messageid}, #16{creatorpersonid}) // { arity: 3 } Join on=(#0{id} = #6{tagid} AND #5{messageid} = #8{messageid}) type=delta // { arity: 20 } implementation %0:tag » %1:message_hastag_tag[#2]KA » %2:message[#1]KA @@ -867,28 +867,28 @@ Explained Query: ReadIndex on=message message_messageid=[delta join lookup] // { arity: 13 } cte l1 = Reduce group_by=[#0{parentmessageid}] aggregates=[count(*)] // { arity: 2 } - Project (#12) // { arity: 1 } + Project (#12{parentmessageid}) // { arity: 1 } Filter (#12{parentmessageid}) IS NOT NULL // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } cte l2 = Reduce group_by=[#0{messageid}] aggregates=[count(*)] // { arity: 2 } - Project (#2) // { arity: 1 } + Project (#2{messageid}) // { arity: 1 } ReadIndex on=person_likes_message person_likes_message_personid=[*** full scan ***] // { arity: 3 } cte l3 = Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Get l0 // { arity: 3 } Return // { arity: 5 } Map (((bigint_to_numeric((1 * #3{count})) + (2 * #1{sum})) + (10 * #2{sum}))) // { arity: 5 } Reduce group_by=[#0{creatorpersonid}] aggregates=[sum(coalesce(case when (#2) IS NULL then null else #1{count} end, 0)), sum(coalesce(case when (#4) IS NULL then null else #3{count} end, 0)), count(*)] // { arity: 4 } - Project (#1{count}, #3{count}, #4, #6, #7) // { arity: 5 } + Project (#1{creatorpersonid}, #3{count}, #4, #6{count}, #7) // { arity: 5 } Join on=(#0{messageid} = #2{parentmessageid} = #5{messageid}) type=delta // { arity: 8 } implementation %0:l0 » %1[#0]K » %2[#0]K %1 » %0:l0[#0]Kef » %2[#0]K %2 » %0:l0[#0]Kef » %1[#0]K ArrangeBy keys=[[#0{messageid}]] // { arity: 2 } - Project (#1{creatorpersonid}, #2) // { arity: 2 } + Project (#1{messageid}, #2{creatorpersonid}) // { arity: 2 } Filter (#0{name} = "Sikh_Empire") // { arity: 3 } Get l0 // { arity: 3 } ArrangeBy keys=[[#0{parentmessageid}]] // { arity: 3 } @@ -980,7 +980,7 @@ Explained Query: Finish order_by=[#1{sum} desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=100 output=[#0, #1] With cte l0 = - Project (#6, #17) // { arity: 2 } + Project (#6{messageid}, #17{creatorpersonid}) // { arity: 2 } Join on=(#0{id} = #7{tagid} AND #6{messageid} = #9{messageid}) type=delta // { arity: 21 } implementation %0:tag » %1:message_hastag_tag[#2]KA » %2:message[#1]KA @@ -996,7 +996,7 @@ Explained Query: ArrangeBy keys=[[#0{messageid}]] // { arity: 2 } Get l0 // { arity: 2 } cte l2 = - Project (#0{messageid}, #1{creatorpersonid}, #3) // { arity: 3 } + Project (#0{messageid}, #1{creatorpersonid}, #3{personid}) // { arity: 3 } Join on=(#0{messageid} = #4{messageid}) type=differential // { arity: 5 } implementation %1:person_likes_message[#2]KA » %0:l1[#0]K @@ -1009,7 +1009,7 @@ Explained Query: Map (null) // { arity: 2 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Join on=(#0{messageid} = #2{messageid}) type=differential // { arity: 3 } implementation %1[#0]UKA » %0:l1[#0]K @@ -1018,12 +1018,12 @@ Explained Query: Distinct project=[#0{messageid}] // { arity: 1 } Project (#0{messageid}) // { arity: 1 } Get l2 // { arity: 3 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Get l0 // { arity: 2 } - Project (#1{personid}, #2) // { arity: 2 } + Project (#1{creatorpersonid}, #2{personid}) // { arity: 2 } Get l2 // { arity: 3 } cte l4 = - Project (#0{creatorpersonid}, #3) // { arity: 2 } + Project (#0{creatorpersonid}, #3{popularityscore}) // { arity: 2 } Join on=(#1{personid} = #2{person2id}) type=differential // { arity: 4 } implementation %1:popularityscoreq06[#0]UKA » %0:l3[#1]K @@ -1086,7 +1086,7 @@ Explained Query: Finish order_by=[#1{sum} desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=100 output=[#0, #1] With cte l0 = - Project (#6, #17) // { arity: 2 } + Project (#6{messageid}, #17{creatorpersonid}) // { arity: 2 } Join on=(#0{id} = #7{tagid} AND #6{messageid} = #9{messageid}) type=delta // { arity: 21 } implementation %0:tag » %1:message_hastag_tag[#2]KA » %2:message[#1]KA @@ -1102,7 +1102,7 @@ Explained Query: ArrangeBy keys=[[#0{messageid}]] // { arity: 2 } Get l0 // { arity: 2 } cte l2 = - Project (#0{messageid}, #1{creatorpersonid}, #3) // { arity: 3 } + Project (#0{messageid}, #1{creatorpersonid}, #3{personid}) // { arity: 3 } Join on=(#0{messageid} = #4{messageid}) type=differential // { arity: 5 } implementation %1:person_likes_message[#2]KA » %0:l1[#0]K @@ -1115,7 +1115,7 @@ Explained Query: Map (null) // { arity: 2 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Join on=(#0{messageid} = #2{messageid}) type=differential // { arity: 3 } implementation %1[#0]UKA » %0:l1[#0]K @@ -1124,12 +1124,12 @@ Explained Query: Distinct project=[#0{messageid}] // { arity: 1 } Project (#0{messageid}) // { arity: 1 } Get l2 // { arity: 3 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Get l0 // { arity: 2 } - Project (#1{personid}, #2) // { arity: 2 } + Project (#1{creatorpersonid}, #2{personid}) // { arity: 2 } Get l2 // { arity: 3 } cte l4 = - Project (#0{creatorpersonid}, #3) // { arity: 2 } + Project (#0{creatorpersonid}, #3{popularityscore}) // { arity: 2 } Join on=(#1{personid} = #2{person2id}) type=differential // { arity: 4 } implementation %1:popularityscoreq06[#0]UKA » %0:l3[#1]K @@ -1193,7 +1193,7 @@ Explained Query: Finish order_by=[#1{sum} desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=100 output=[#0, #1] With cte l0 = - Project (#1{messageid}, #5, #16) // { arity: 3 } + Project (#1{name}, #5{messageid}, #16{creatorpersonid}) // { arity: 3 } Join on=(#0{id} = #6{tagid} AND #5{messageid} = #8{messageid}) type=delta // { arity: 20 } implementation %0:tag » %1:message_hastag_tag[#2]KA » %2:message[#1]KA @@ -1206,7 +1206,7 @@ Explained Query: ArrangeBy keys=[[#1{messageid}]] // { arity: 13 } ReadIndex on=message message_messageid=[delta join lookup] // { arity: 13 } cte l1 = - Project (#0{name}..=#2{creatorpersonid}, #4) // { arity: 4 } + Project (#0{name}..=#2{creatorpersonid}, #4{personid}) // { arity: 4 } Join on=(#1{messageid} = #5{messageid}) type=differential // { arity: 6 } implementation %1:person_likes_message[#2]KA » %0:l0[#1]K @@ -1215,32 +1215,32 @@ Explained Query: ArrangeBy keys=[[#2{messageid}]] // { arity: 3 } ReadIndex on=person_likes_message person_likes_message_messageid=[differential join] // { arity: 3 } cte l2 = - Filter (#0{name} = "Bob_Geldof") // { arity: 3 } - Get l0 // { arity: 3 } + Project (#1{messageid}, #2{creatorpersonid}) // { arity: 2 } + Filter (#0{name} = "Bob_Geldof") // { arity: 3 } + Get l0 // { arity: 3 } cte l3 = Distinct project=[#0{creatorpersonid}, #1{personid}] // { arity: 2 } Union // { arity: 2 } Map (null) // { arity: 2 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{creatorpersonid}) // { arity: 1 } Join on=(#0{messageid} = #2{messageid}) type=differential // { arity: 3 } implementation %1[#0]UKA » %0:l2[#0]Kef ArrangeBy keys=[[#0{messageid}]] // { arity: 2 } - Project (#1{creatorpersonid}, #2) // { arity: 2 } - Get l2 // { arity: 3 } + Get l2 // { arity: 2 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Get l1 // { arity: 4 } - Project (#2) // { arity: 1 } - Get l2 // { arity: 3 } - Project (#2, #3) // { arity: 2 } + Project (#1{creatorpersonid}) // { arity: 1 } + Get l2 // { arity: 2 } + Project (#2{creatorpersonid}, #3{personid}) // { arity: 2 } Filter (#0{name} = "Bob_Geldof") // { arity: 4 } Get l1 // { arity: 4 } cte l4 = - Project (#0{creatorpersonid}, #3) // { arity: 2 } + Project (#0{creatorpersonid}, #3{popularityscore}) // { arity: 2 } Join on=(#1{personid} = #2{person2id}) type=differential // { arity: 4 } implementation %1:popularityscoreq06[#0]UKA » %0:l3[#1]K @@ -1310,7 +1310,7 @@ Explained Query: Finish order_by=[#1{count} desc nulls_first, #0{name} asc nulls_last] limit=100 output=[#0, #1] With cte l0 = - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id}) type=differential // { arity: 8 } implementation %1:tag[#0]KAe » %0:message_hastag_tag[#2]KAe @@ -1319,7 +1319,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 5 } ReadIndex on=materialize.public.tag tag_name=[lookup value=("Slovenia")] // { arity: 5 } cte l1 = - Project (#2, #18) // { arity: 2 } + Project (#2{messageid}, #18{name}) // { arity: 2 } Join on=(#0{messageid} = #13{parentmessageid} AND #2{messageid} = #15{messageid} AND #16{tagid} = #17{id}) type=delta // { arity: 21 } implementation %0:l0 » %1:message[#12]KA » %2:message_hastag_tag[#1]KA » %3:tag[#0]KA @@ -1340,7 +1340,7 @@ Explained Query: Get l1 // { arity: 2 } Return // { arity: 2 } Reduce group_by=[#0{name}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{name}) // { arity: 1 } Join on=(#0{messageid} = #2{messageid}) type=differential // { arity: 3 } implementation %0:l1[#0]K » %1[#0]K @@ -1435,7 +1435,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 5 } ReadIndex on=materialize.public.tag tag_name=[lookup value=("Abbas_I_of_Persia")] // { arity: 5 } cte l2 = - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Join on=(#1{id} = #11{personid} AND #12{tagid} = #13{id}) type=delta // { arity: 18 } implementation %0:l0 » %1:person_hasinterest_tag[#0]K » %2:l1[#0]KAe @@ -1443,12 +1443,12 @@ Explained Query: %2:l1 » %1:person_hasinterest_tag[#1]KA » %0:l0[#1]KA Get l0 // { arity: 11 } ArrangeBy keys=[[#0{personid}], [#1{tagid}]] // { arity: 2 } - Project (#1{tagid}, #2) // { arity: 2 } + Project (#1{personid}, #2{tagid}) // { arity: 2 } ReadIndex on=person_hasinterest_tag person_hasinterest_tag_tagid=[*** full scan ***] // { arity: 3 } Get l1 // { arity: 5 } cte l3 = Reduce group_by=[#0{creatorpersonid}] aggregates=[count(*)] // { arity: 2 } - Project (#17) // { arity: 1 } + Project (#17{creatorpersonid}) // { arity: 1 } Filter (#8{creationdate} < 2010-06-28 00:00:00 UTC) AND (2010-06-14 00:00:00 UTC < #8{creationdate}) // { arity: 32 } Join on=(#0{id} = #7{tagid} AND #6{messageid} = #9{messageid} AND #17{creatorpersonid} = #22{id}) type=delta // { arity: 32 } implementation @@ -1466,7 +1466,7 @@ Explained Query: ArrangeBy keys=[[#0{creatorpersonid}]] // { arity: 2 } Get l3 // { arity: 2 } cte l5 = - Project (#0{id}, #2) // { arity: 2 } + Project (#0{id}, #2{count}) // { arity: 2 } Join on=(#0{id} = #1{creatorpersonid}) type=differential // { arity: 3 } implementation %1:l4[#0]UKA » %0:l2[#0]K @@ -1480,7 +1480,7 @@ Explained Query: Project (#3, #4) // { arity: 2 } Map (coalesce(#0{id}, #1{creatorpersonid}), (integer_to_bigint(case when (#0{id}) IS NULL then 0 else 100 end) + coalesce(#2{count}, 0))) // { arity: 5 } Union // { arity: 3 } - Project (#2{count}, #0, #1{creatorpersonid}) // { arity: 3 } + Project (#2, #0{creatorpersonid}, #1{count}) // { arity: 3 } Map (null) // { arity: 3 } Union // { arity: 2 } Negate // { arity: 2 } @@ -1498,7 +1498,7 @@ Explained Query: Negate // { arity: 1 } Get l6 // { arity: 1 } Get l2 // { arity: 1 } - Project (#0{id}, #0{id}, #1{id}) // { arity: 3 } + Project (#0{id}, #0{id}, #1{count}) // { arity: 3 } Get l5 // { arity: 2 } Return // { arity: 5 } Map (coalesce(#2{sum}, 0), (bigint_to_numeric(#1) + #3)) // { arity: 5 } @@ -1513,14 +1513,14 @@ Explained Query: Get l7 // { arity: 2 } ArrangeBy keys=[[#0{person1id}], [case when (#2) IS NULL then null else #1{person2id} end]] // { arity: 3 } Union // { arity: 3 } - Project (#1{person2id}..=#3) // { arity: 3 } + Project (#1{person1id}..=#3) // { arity: 3 } Map (true) // { arity: 4 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } Map (null, null) // { arity: 3 } Threshold // { arity: 1 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{person1id}) // { arity: 1 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } Distinct project=[#0] // { arity: 1 } Union // { arity: 1 } @@ -1542,7 +1542,7 @@ Explained Query: Get l7 // { arity: 2 } Distinct project=[#0{person2id}] // { arity: 1 } Union // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{person2id}) // { arity: 1 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } Constant // { arity: 1 } - (null) @@ -1589,7 +1589,7 @@ SELECT Person.id AS "person.id" Explained Query: Finish order_by=[#4{sum_count} desc nulls_first, #0{id} asc nulls_last] limit=100 output=[#0..=#4] Reduce group_by=[#0{id}..=#2{lastname}] aggregates=[count(*), sum(#3{count})] // { arity: 5 } - Project (#1{firstname}..=#3{count}, #25) // { arity: 4 } + Project (#1{id}..=#3{lastname}, #25{count}) // { arity: 4 } Filter (#23{parentmessageid}) IS NULL AND (#11{creationdate} <= 2012-11-24 00:00:00 UTC) AND (#11{creationdate} >= 2012-08-29 00:00:00 UTC) // { arity: 26 } Join on=(#1{id} = #20{creatorpersonid} AND #12{messageid} = #24{rootpostid}) type=delta // { arity: 26 } implementation @@ -1602,7 +1602,7 @@ Explained Query: ReadIndex on=message message_messageid=[delta join lookup] message_creatorpersonid=[delta join lookup] // { arity: 13 } ArrangeBy keys=[[#0{rootpostid}]] // { arity: 2 } Reduce group_by=[#0{rootpostid}] aggregates=[count(*)] // { arity: 2 } - Project (#2) // { arity: 1 } + Project (#2{rootpostid}) // { arity: 1 } Filter (#0{creationdate} <= 2012-11-24 00:00:00 UTC) AND (#0{creationdate} >= 2012-08-29 00:00:00 UTC) // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } @@ -1707,9 +1707,9 @@ Explained Query: cte l2 = Distinct project=[#0{person2id}] // { arity: 1 } Union // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{person2id}) // { arity: 1 } Get l1 // { arity: 4 } - Project (#6) // { arity: 1 } + Project (#6{person2id}) // { arity: 1 } Join on=(#2{person2id} = #5{person1id}) type=differential // { arity: 7 } implementation %0:l1[#2]KAe » %1:l0[#1]KAe @@ -1718,7 +1718,7 @@ Explained Query: Get l0 // { arity: 3 } Return // { arity: 3 } Reduce group_by=[#0{person2id}, #1{name}] aggregates=[count(*)] // { arity: 3 } - Project (#0{person2id}, #9) // { arity: 2 } + Project (#0{person2id}, #9{name}) // { arity: 2 } Join on=(#0{person2id} = #1{id} = #2{creatorpersonid} AND #3{messageid} = #4{messageid} = #6{messageid} AND #7{tagid} = #8{id}) type=delta // { arity: 12 } implementation %0 » %1[#0]UKA » %2[#0]K » %3[#0]UKA » %4:message_hastag_tag[#1]KA » %5:tag[#0]KA @@ -1732,7 +1732,7 @@ Explained Query: Threshold // { arity: 1 } Union // { arity: 1 } Distinct project=[#0{person2id}] // { arity: 1 } - Project (#6) // { arity: 1 } + Project (#6{person2id}) // { arity: 1 } Join on=(#0{person2id} = #2{person1id} AND #3{person2id} = #5{person1id}) type=delta // { arity: 7 } implementation %0:l2 » %1:person_knows_person[#1]KA » %2:l0[#1]KA @@ -1747,7 +1747,7 @@ Explained Query: Get l2 // { arity: 1 } ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#16{name} = "Italy") AND (#1{id}) IS NOT NULL AND (#14{partofcountryid}) IS NOT NULL // { arity: 19 } Join on=(#8{locationcityid} = #11{id} AND #14{partofcountryid} = #15{id}) type=delta // { arity: 19 } implementation @@ -1762,11 +1762,11 @@ Explained Query: ReadIndex on=country country_id=[delta join lookup] // { arity: 4 } ArrangeBy keys=[[#0{creatorpersonid}], [#1{messageid}]] // { arity: 2 } Distinct project=[#1{creatorpersonid}, #0{messageid}] // { arity: 2 } - Project (#1{creatorpersonid}, #9) // { arity: 2 } + Project (#1{messageid}, #9{creatorpersonid}) // { arity: 2 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id} AND #6{typetagclassid} = #7{id}) type=delta // { arity: 12 } implementation %0:message_hastag_tag » %1:tag[#0]KA » %2:tagclass[#0]KAe @@ -1842,7 +1842,7 @@ SELECT count(*) Explained Query: With cte l0 = - Project (#1{person2id}, #21) // { arity: 2 } + Project (#1{id}, #21{person2id}) // { arity: 2 } Filter (#16{name} = "India") AND (#19{creationdate} <= 2013-01-10 00:00:00 UTC) AND (2012-09-28 00:00:00 UTC <= #19{creationdate}) AND (#14{partofcountryid}) IS NOT NULL // { arity: 22 } Join on=(#1{id} = #20{person1id} AND #8{locationcityid} = #11{id} AND #14{partofcountryid} = #15{id}) type=delta // { arity: 22 } implementation @@ -1939,7 +1939,7 @@ Explained Query: ArrangeBy keys=[[#1{id}]] // { arity: 11 } ReadIndex on=person person_id=[differential join] // { arity: 11 } cte l1 = - Project (#1{messageid}, #12) // { arity: 2 } + Project (#1{id}, #12{messageid}) // { arity: 2 } Filter (#19{length} < 120) AND (#11{creationdate} > 2012-06-03 00:00:00 UTC) AND (#15{content}) IS NOT NULL // { arity: 25 } Join on=(#1{id} = #20{creatorpersonid}) type=differential // { arity: 25 } implementation @@ -1949,13 +1949,13 @@ Explained Query: ReadIndex on=materialize.public.message message_rootpostlanguage=[lookup values=[("es"); ("pt"); ("ta")]] // { arity: 14 } Return // { arity: 2 } Reduce group_by=[#0{count_messageid}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{count_messageid}) // { arity: 1 } Reduce group_by=[#0{id}] aggregates=[count(#1{messageid})] // { arity: 2 } Union // { arity: 2 } Map (null) // { arity: 2 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Join on=(#1{id} = #11{id}) type=differential // { arity: 12 } implementation %1[#0]UKA » %0:l0[#1]KA @@ -1964,7 +1964,7 @@ Explained Query: Distinct project=[#0{id}] // { arity: 1 } Project (#0{id}) // { arity: 1 } Get l1 // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } ReadIndex on=person person_id=[*** full scan ***] // { arity: 11 } Get l1 // { arity: 2 } @@ -2010,7 +2010,7 @@ Explained Query: ArrangeBy keys=[[]] // { arity: 11 } ReadIndex on=person person_id=[*** full scan ***] // { arity: 11 } ArrangeBy keys=[[]] // { arity: 6 } - Project (#0{creationdate}, #1{messageid}, #3{content}, #4{length}, #8, #9) // { arity: 6 } + Project (#0{creationdate}, #1{messageid}, #3{rootpostlanguage}, #4{content}, #8{length}, #9{creatorpersonid}) // { arity: 6 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } cte l1 = Project (#0{creationdate}..=#11{messageid}) // { arity: 12 } @@ -2018,7 +2018,7 @@ Explained Query: implementation %1[#0]UKA » %0:l0[#12]Kiif ArrangeBy keys=[[#12{rootpostlanguage}]] // { arity: 13 } - Project (#0{creationdate}..=#10{email}, #12{rootpostlanguage}, #13) // { arity: 13 } + Project (#0{creationdate}..=#10{email}, #12{messageid}, #13{rootpostlanguage}) // { arity: 13 } Filter (#15{length} < 120) AND (#11{creationdate} > 2012-06-03 00:00:00 UTC) AND (#14{content}) IS NOT NULL AND (#1{id} = #16{creatorpersonid}) // { arity: 17 } Get l0 // { arity: 17 } ArrangeBy keys=[[#0{rootpostlanguage}]] // { arity: 1 } @@ -2027,16 +2027,16 @@ Explained Query: Filter (#0{rootpostlanguage} = varchar_to_text(#1)) // { arity: 2 } FlatMap unnest_array({"es", "ta", "pt"}) // { arity: 2 } Distinct project=[#0{rootpostlanguage}] // { arity: 1 } - Project (#13) // { arity: 1 } + Project (#13{rootpostlanguage}) // { arity: 1 } Get l0 // { arity: 17 } Return // { arity: 2 } Reduce group_by=[#0{count_messageid}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{count_messageid}) // { arity: 1 } Reduce group_by=[#0{id}] aggregates=[count(#1{messageid})] // { arity: 2 } Union // { arity: 2 } - Project (#1{messageid}, #11) // { arity: 2 } + Project (#1{id}, #11{messageid}) // { arity: 2 } Get l1 // { arity: 12 } - Project (#1, #22) // { arity: 2 } + Project (#1{id}, #22) // { arity: 2 } Map (null) // { arity: 23 } Join on=(#0{creationdate} = #11{creationdate} AND #1{id} = #12{id} AND #2{firstname} = #13{firstname} AND #3{lastname} = #14{lastname} AND #4{gender} = #15{gender} AND #5{birthday} = #16{birthday} AND #6{locationip} = #17{locationip} AND #7{browserused} = #18{browserused} AND #8{locationcityid} = #19{locationcityid} AND #9{speaks} = #20{speaks} AND #10{email} = #21{email}) type=differential // { arity: 22 } implementation @@ -2104,7 +2104,7 @@ Explained Query: Finish order_by=[#3 desc nulls_first, #0{id} asc nulls_last] limit=100 output=[#0..=#3] With cte l0 = - Project (#0{id}, #2{partofcontinentid}..=#6{creationdate}, #8{firstname}..=#15{email}, #17, #18) // { arity: 16 } + Project (#0{id}, #2{url}..=#6{url}, #8{creationdate}..=#15{browserused}, #17{speaks}, #18{email}) // { arity: 16 } Filter (#1{name} = "India") AND (#8{creationdate} < 2012-11-09 00:00:00 UTC) AND (#0{id}) IS NOT NULL // { arity: 19 } Join on=(#0{id} = #7{partofcountryid} AND #4{id} = #16{locationcityid}) type=delta // { arity: 19 } implementation @@ -2118,7 +2118,7 @@ Explained Query: ArrangeBy keys=[[#8{locationcityid}]] // { arity: 11 } ReadIndex on=person person_locationcityid=[delta join lookup] // { arity: 11 } cte l1 = - Project (#0{id}..=#15{email}, #17) // { arity: 17 } + Project (#0{id}..=#15{email}, #17{messageid}) // { arity: 17 } Filter (#16{creationdate} <= 2012-11-09 00:00:00 UTC) AND (#16{creationdate} >= #6{creationdate}) // { arity: 29 } Join on=(#7{id} = #25{creatorpersonid}) type=differential // { arity: 29 } implementation @@ -2133,9 +2133,9 @@ Explained Query: Filter (bigint_to_numeric(#2{count_messageid}) < ((24155 - ((12 * extract_year_tstz(#1{creationdate})) + extract_month_tstz(#1{creationdate}))) + 1)) // { arity: 3 } Reduce group_by=[#1{id}, #0{creationdate}] aggregates=[count(#2{messageid})] // { arity: 3 } Union // { arity: 3 } - Project (#6, #7, #16) // { arity: 3 } + Project (#6{creationdate}, #7{id}, #16{messageid}) // { arity: 3 } Get l1 // { arity: 17 } - Project (#6, #7, #32) // { arity: 3 } + Project (#6{creationdate}, #7{id}, #32) // { arity: 3 } Map (null) // { arity: 33 } Join on=(#0{id} = #16{id} AND #1{url} = #17{url} AND #2{partofcontinentid} = #18{partofcontinentid} AND #3{id} = #19{id} AND #4{name} = #20{name} AND #5{url} = #21{url} AND #6{creationdate} = #22{creationdate} AND #7{id} = #23{id} AND #8{firstname} = #24{firstname} AND #9{lastname} = #25{lastname} AND #10{gender} = #26{gender} AND #11{birthday} = #27{birthday} AND #12{locationip} = #28{locationip} AND #13{browserused} = #29{browserused} AND #14{speaks} = #30{speaks} AND #15{email} = #31{email}) type=differential // { arity: 32 } implementation @@ -2159,7 +2159,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 1 } Get l3 // { arity: 1 } cte l5 = - Project (#1{creatorpersonid}, #23) // { arity: 2 } + Project (#1{id}, #23{creatorpersonid}) // { arity: 2 } Filter (#0{creationdate} < 2012-11-09 00:00:00 UTC) // { arity: 28 } Join on=(#1{id} = #12{personid} AND #13{messageid} = #15{messageid} AND #23{creatorpersonid} = #27{id}) type=delta // { arity: 28 } implementation @@ -2189,14 +2189,14 @@ Explained Query: Distinct project=[#0{id}] // { arity: 1 } Get l3 // { arity: 1 } cte l8 = - Project (#0{id}, #2{sum}, #3) // { arity: 3 } + Project (#0{id}, #2{count}, #3{sum}) // { arity: 3 } Join on=(#0{id} = #1{creatorpersonid}) type=differential // { arity: 4 } implementation %1[#0]UKA » %0:l4[#0]K Get l4 // { arity: 1 } ArrangeBy keys=[[#0{creatorpersonid}]] // { arity: 3 } Reduce group_by=[#0{creatorpersonid}] aggregates=[count(*), sum(case when #1 then 1 else 0 end)] // { arity: 3 } - Project (#1, #3) // { arity: 2 } + Project (#1{creatorpersonid}, #3) // { arity: 2 } Join on=(#0{id} = #2{id}) type=differential // { arity: 4 } implementation %0:l5[#0]K » %1[#0]K @@ -2317,7 +2317,7 @@ Explained Query: ArrangeBy keys=[[#1{id}], [#8{locationcityid}]] // { arity: 11 } ReadIndex on=person person_id=[delta join lookup] person_locationcityid=[delta join lookup] // { arity: 11 } cte l3 = - Project (#4, #5, #9, #21) // { arity: 4 } + Project (#4{id}, #5{name}, #9{id}, #21{person2id}) // { arity: 4 } Filter (#1{name} = "Philippines") AND (#38{name} = "Taiwan") AND (#0{id}) IS NOT NULL AND (#36{partofcountryid}) IS NOT NULL // { arity: 41 } Join on=(#0{id} = #7{partofcountryid} AND #4{id} = #16{locationcityid} AND #9{id} = #20{person1id} AND #21{person2id} = #23{id} AND #30{locationcityid} = #33{id} AND #36{partofcountryid} = #37{id}) type=delta // { arity: 41 } implementation @@ -2339,10 +2339,10 @@ Explained Query: cte l4 = Map (case when #2 then #1{person2id} else #0{id} end, case when #2 then #0{id} else #1{person2id} end) // { arity: 5 } Union // { arity: 3 } - Project (#2..=#4) // { arity: 3 } + Project (#2{id}..=#4) // { arity: 3 } Map (false) // { arity: 5 } Get l3 // { arity: 4 } - Project (#3, #2, #4) // { arity: 3 } + Project (#3{person2id}, #2{id}, #4) // { arity: 3 } Map (true) // { arity: 5 } Get l3 // { arity: 4 } cte l5 = @@ -2361,7 +2361,7 @@ Explained Query: Get l5 // { arity: 2 } ArrangeBy keys=[[#0{creatorpersonid}, #1{creatorpersonid}]] // { arity: 2 } Distinct project=[#1{creatorpersonid}, #0{creatorpersonid}] // { arity: 2 } - Project (#9, #22) // { arity: 2 } + Project (#9{creatorpersonid}, #22{creatorpersonid}) // { arity: 2 } Join on=(#1{messageid} = #25{parentmessageid}) type=differential // { arity: 26 } implementation %0:l6[#1]KA » %1:message[#12]KA @@ -2397,7 +2397,7 @@ Explained Query: Get l9 // { arity: 2 } ArrangeBy keys=[[#0{personid}, #1{creatorpersonid}]] // { arity: 2 } Distinct project=[#1{personid}, #0{creatorpersonid}] // { arity: 2 } - Project (#9, #14) // { arity: 2 } + Project (#9{creatorpersonid}, #14{personid}) // { arity: 2 } Join on=(#1{messageid} = #15{messageid}) type=differential // { arity: 16 } implementation %0:l6[#1]KA » %1:person_likes_message[#2]KA @@ -2405,7 +2405,7 @@ Explained Query: ArrangeBy keys=[[#2{messageid}]] // { arity: 3 } ReadIndex on=person_likes_message person_likes_message_messageid=[differential join] // { arity: 3 } cte l11 = - Project (#0{id}..=#3{person2id}, #6) // { arity: 5 } + Project (#0{id}..=#3{person2id}, #6{sum}) // { arity: 5 } Join on=(#2{id} = #4 AND #3{person2id} = #5) type=differential // { arity: 7 } implementation %1[#0, #1]UKKA » %0:l3[#2, #3]KK @@ -2429,17 +2429,17 @@ Explained Query: Get l10 // { arity: 2 } Get l9 // { arity: 2 } Return // { arity: 4 } - Project (#0{id}, #1{person2id}, #3{sum}, #4) // { arity: 4 } + Project (#0{id}, #1{person2id}, #3{name}, #4{sum}) // { arity: 4 } TopK group_by=[#2{id}] order_by=[#4{sum} desc nulls_first, #0{id} asc nulls_last, #1{person2id} asc nulls_last] limit=1 // { arity: 5 } Union // { arity: 5 } Map (null) // { arity: 5 } Union // { arity: 4 } Negate // { arity: 4 } - Project (#2{id}, #3{name}, #0{id}, #1{person2id}) // { arity: 4 } + Project (#2{id}, #3{person2id}, #0{id}, #1{name}) // { arity: 4 } Get l11 // { arity: 5 } - Project (#2{id}, #3{name}, #0{id}, #1{person2id}) // { arity: 4 } + Project (#2{id}, #3{person2id}, #0{id}, #1{name}) // { arity: 4 } Get l3 // { arity: 4 } - Project (#2{id}, #3{name}, #0{id}, #1{person2id}, #4{sum}) // { arity: 5 } + Project (#2{id}, #3{person2id}, #0{id}, #1{name}, #4{sum}) // { arity: 5 } Get l11 // { arity: 5 } Used Indexes: @@ -2524,9 +2524,9 @@ SELECT coalesce(w, -1) FROM results ORDER BY w ASC LIMIT 20 ---- Explained Query: Finish order_by=[#1{min} asc nulls_last] limit=20 output=[#2] - With Mutually Recursive + With cte l0 = - Project (#1{person2id}, #2) // { arity: 2 } + Project (#1{person1id}, #2{person2id}) // { arity: 2 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } cte l1 = ArrangeBy keys=[[greatest(#0{person1id}, #1{person2id}), least(#0{person1id}, #1{person2id})]] // { arity: 2 } @@ -2534,7 +2534,7 @@ Explained Query: cte l2 = ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#0{creationdate} <= 2012-11-10 00:00:00 UTC) AND (#0{creationdate} >= 2012-11-06 00:00:00 UTC) AND (#1{id}) IS NOT NULL // { arity: 4 } ReadIndex on=forum forum_id=[*** full scan ***] // { arity: 4 } cte l3 = @@ -2544,64 +2544,66 @@ Explained Query: Get l1 // { arity: 2 } ArrangeBy keys=[[#1, #0]] // { arity: 3 } Reduce group_by=[least(#0{person1id}, #1{person2id}), greatest(#0{person1id}, #1{person2id})] aggregates=[sum(case when (#2{parentmessageid}) IS NULL then 10 else 5 end)] // { arity: 3 } - Project (#1{person2id}, #2{parentmessageid}, #6) // { arity: 3 } + Project (#1{person1id}, #2{person2id}, #6{parentmessageid}) // { arity: 3 } Join on=(#1{person1id} = #4{creatorpersonid} AND #2{person2id} = #7{creatorpersonid} AND #3{messageid} = #9{parentmessageid} AND #5{containerforumid} = #10{id} AND #8{containerforumid} = #11{id}) type=delta // { arity: 12 } implementation - %0:person_knows_person » %1:message[#1]KA » %3:l2[#0]UK » %2:message[#0, #2]KK » %4:l2[#0]UK - %1:message » %3:l2[#0]UK » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UK - %2:message » %4:l2[#0]UK » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UK - %3:l2 » %1:message[#2]KA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UK - %4:l2 » %2:message[#1]KA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UK + %0:person_knows_person » %1:message[#1]KA » %3:l2[#0]UKA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %1:message » %3:l2[#0]UKA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %2:message » %4:l2[#0]UKA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UKA + %3:l2 » %1:message[#2]KA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %4:l2 » %2:message[#1]KA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UKA ArrangeBy keys=[[#1{person1id}], [#2{person2id}]] // { arity: 3 } ReadIndex on=person_knows_person person_knows_person_person1id=[delta join 1st input (full scan)] person_knows_person_person2id=[delta join lookup] // { arity: 3 } ArrangeBy keys=[[#0{messageid}, #1{creatorpersonid}], [#1{creatorpersonid}], [#2{containerforumid}]] // { arity: 4 } - Project (#1{creatorpersonid}, #9, #10, #12) // { arity: 4 } + Project (#1{messageid}, #9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 4 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{creatorpersonid}, #2{parentmessageid}], [#1{containerforumid}]] // { arity: 3 } - Project (#9, #10, #12) // { arity: 3 } + Project (#9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 3 } Filter (#12{parentmessageid}) IS NOT NULL // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } Get l2 // { arity: 1 } Get l2 // { arity: 1 } - cte l4 = - Project (#2{min}, #0, #1{person2id}) // { arity: 3 } - Map (1450) // { arity: 3 } - Reduce group_by=[#0{person2id}] aggregates=[min(#1)] // { arity: 2 } - Distinct project=[#0{person2id}, #1] // { arity: 2 } - Union // { arity: 2 } - Project (#3, #5) // { arity: 2 } - Map ((#1 + #4)) // { arity: 6 } - Join on=(#0 = #2{person1id}) type=differential // { arity: 5 } - implementation - %0:l4[#0]UK » %1[#0]K - ArrangeBy keys=[[#0]] // { arity: 2 } - Project (#1, #2) // { arity: 2 } - Get l4 // { arity: 3 } - ArrangeBy keys=[[#0{person1id}]] // { arity: 3 } - Project (#0{person1id}, #1{person2id}, #3) // { arity: 3 } - Map ((10 / bigint_to_double((coalesce(#2{sum}, 0) + 10)))) // { arity: 4 } - Union // { arity: 3 } - Map (null) // { arity: 3 } - Union // { arity: 2 } - Negate // { arity: 2 } - Project (#0{person1id}, #1{person2id}) // { arity: 2 } - Join on=(#2 = least(#0{person1id}, #1{person2id}) AND #3 = greatest(#0{person1id}, #1{person2id})) type=differential // { arity: 4 } - implementation - %1[#1, #0]UKK » %0:l1[greatest(#0, #1), least(#0, #1)]KK - Get l1 // { arity: 2 } - ArrangeBy keys=[[#1, #0]] // { arity: 2 } - Distinct project=[#0, #1] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Get l3 // { arity: 5 } - Get l0 // { arity: 2 } - Project (#0{person1id}, #1{person2id}, #4) // { arity: 3 } - Get l3 // { arity: 5 } - Constant // { arity: 2 } - - (1450, 0) Return // { arity: 3 } - Project (#1{min}, #2{min}, #2{min}) // { arity: 3 } - Filter (#1{person2id} = 15393162796819) AND (#2{min} = #2{min}) // { arity: 3 } - Get l4 // { arity: 3 } + With Mutually Recursive + cte l4 = + Project (#2, #0{person2id}, #1{min}) // { arity: 3 } + Map (1450) // { arity: 3 } + Reduce group_by=[#0{person2id}] aggregates=[min(#1)] // { arity: 2 } + Distinct project=[#0{person2id}, #1] // { arity: 2 } + Union // { arity: 2 } + Project (#3{person2id}, #5) // { arity: 2 } + Map ((#1 + #4)) // { arity: 6 } + Join on=(#0 = #2{person1id}) type=differential // { arity: 5 } + implementation + %0:l4[#0]UK » %1[#0]K + ArrangeBy keys=[[#0]] // { arity: 2 } + Project (#1{person2id}, #2{min}) // { arity: 2 } + Get l4 // { arity: 3 } + ArrangeBy keys=[[#0{person1id}]] // { arity: 3 } + Project (#0{person1id}, #1{person2id}, #3) // { arity: 3 } + Map ((10 / bigint_to_double((coalesce(#2{sum}, 0) + 10)))) // { arity: 4 } + Union // { arity: 3 } + Map (null) // { arity: 3 } + Union // { arity: 2 } + Negate // { arity: 2 } + Project (#0{person1id}, #1{person2id}) // { arity: 2 } + Join on=(#2 = least(#0{person1id}, #1{person2id}) AND #3 = greatest(#0{person1id}, #1{person2id})) type=differential // { arity: 4 } + implementation + %1[#1, #0]UKK » %0:l1[greatest(#0, #1), least(#0, #1)]KK + Get l1 // { arity: 2 } + ArrangeBy keys=[[#1, #0]] // { arity: 2 } + Distinct project=[#0, #1] // { arity: 2 } + Project (#2, #3) // { arity: 2 } + Get l3 // { arity: 5 } + Get l0 // { arity: 2 } + Project (#0{person1id}, #1{person2id}, #4{sum}) // { arity: 3 } + Get l3 // { arity: 5 } + Constant // { arity: 2 } + - (1450, 0) + Return // { arity: 3 } + Project (#1{person2id}, #2{min}, #2{min}) // { arity: 3 } + Filter (#1{person2id} = 15393162796819) AND (#2{min} = #2{min}) // { arity: 3 } + Get l4 // { arity: 3 } Used Indexes: - materialize.public.forum_id (*** full scan ***) @@ -2720,9 +2722,9 @@ EXPLAIN WITH(humanized expressions, arity, join implementations) WITH MUTUALLY R SELECT coalesce(min(w), -1) FROM results ---- Explained Query: - With Mutually Recursive + With cte l0 = - Project (#1{person2id}, #2) // { arity: 2 } + Project (#1{person1id}, #2{person2id}) // { arity: 2 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } cte l1 = ArrangeBy keys=[[greatest(#0{person1id}, #1{person2id}), least(#0{person1id}, #1{person2id})]] // { arity: 2 } @@ -2730,7 +2732,7 @@ Explained Query: cte l2 = ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#0{creationdate} <= 2012-11-10 00:00:00 UTC) AND (#0{creationdate} >= 2012-11-06 00:00:00 UTC) AND (#1{id}) IS NOT NULL // { arity: 4 } ReadIndex on=forum forum_id=[*** full scan ***] // { arity: 4 } cte l3 = @@ -2740,21 +2742,21 @@ Explained Query: Get l1 // { arity: 2 } ArrangeBy keys=[[#1, #0]] // { arity: 3 } Reduce group_by=[least(#0{person1id}, #1{person2id}), greatest(#0{person1id}, #1{person2id})] aggregates=[sum(case when (#2{parentmessageid}) IS NULL then 10 else 5 end)] // { arity: 3 } - Project (#1{person2id}, #2{parentmessageid}, #6) // { arity: 3 } + Project (#1{person1id}, #2{person2id}, #6{parentmessageid}) // { arity: 3 } Join on=(#1{person1id} = #4{creatorpersonid} AND #2{person2id} = #7{creatorpersonid} AND #3{messageid} = #9{parentmessageid} AND #5{containerforumid} = #10{id} AND #8{containerforumid} = #11{id}) type=delta // { arity: 12 } implementation - %0:person_knows_person » %1:message[#1]KA » %3:l2[#0]UK » %2:message[#0, #2]KK » %4:l2[#0]UK - %1:message » %3:l2[#0]UK » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UK - %2:message » %4:l2[#0]UK » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UK - %3:l2 » %1:message[#2]KA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UK - %4:l2 » %2:message[#1]KA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UK + %0:person_knows_person » %1:message[#1]KA » %3:l2[#0]UKA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %1:message » %3:l2[#0]UKA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %2:message » %4:l2[#0]UKA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UKA + %3:l2 » %1:message[#2]KA » %0:person_knows_person[#1]KA » %2:message[#0, #2]KK » %4:l2[#0]UKA + %4:l2 » %2:message[#1]KA » %0:person_knows_person[#2]KA » %1:message[#0, #1]KK » %3:l2[#0]UKA ArrangeBy keys=[[#1{person1id}], [#2{person2id}]] // { arity: 3 } ReadIndex on=person_knows_person person_knows_person_person1id=[delta join 1st input (full scan)] person_knows_person_person2id=[delta join lookup] // { arity: 3 } ArrangeBy keys=[[#0{messageid}, #1{creatorpersonid}], [#1{creatorpersonid}], [#2{containerforumid}]] // { arity: 4 } - Project (#1{creatorpersonid}, #9, #10, #12) // { arity: 4 } + Project (#1{messageid}, #9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 4 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{creatorpersonid}, #2{parentmessageid}], [#1{containerforumid}]] // { arity: 3 } - Project (#9, #10, #12) // { arity: 3 } + Project (#9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 3 } Filter (#12{parentmessageid}) IS NOT NULL // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } Get l2 // { arity: 1 } @@ -2776,222 +2778,224 @@ Explained Query: Project (#2, #3) // { arity: 2 } Get l3 // { arity: 5 } Get l0 // { arity: 2 } - Project (#0{person1id}, #1{person2id}, #4) // { arity: 3 } + Project (#0{person1id}, #1{person2id}, #4{sum}) // { arity: 3 } Get l3 // { arity: 5 } - cte l5 = - Project (#1{person2id}, #3) // { arity: 2 } - Join on=(#0 = #2{person1id}) type=differential // { arity: 4 } - implementation - %0:l8[#0]K » %1:l4[#0]K - ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } - Get l8 // { arity: 2 } - ArrangeBy keys=[[#0{person1id}]] // { arity: 2 } - Project (#0{person1id}, #1{person2id}) // { arity: 2 } - Get l4 // { arity: 3 } - cte l6 = - Union // { arity: 2 } - Project (#1, #0{person2id}) // { arity: 2 } - Get l5 // { arity: 2 } - Get l8 // { arity: 2 } - cte l7 = - Distinct project=[] // { arity: 0 } - Project () // { arity: 0 } - Join on=(#0{person2id} = #1{person2id}) type=differential // { arity: 2 } + Return // { arity: 1 } + With Mutually Recursive + cte l5 = + Project (#1, #3{person2id}) // { arity: 2 } + Join on=(#0 = #2{person1id}) type=differential // { arity: 4 } implementation - %0:l6[#0]Kf » %1:l6[#0]Kf - ArrangeBy keys=[[#0{person2id}]] // { arity: 1 } - Project (#0{person2id}) // { arity: 1 } - Filter #1 // { arity: 2 } - Get l6 // { arity: 2 } - ArrangeBy keys=[[#0{person2id}]] // { arity: 1 } - Project (#0{person2id}) // { arity: 1 } - Filter NOT(#1) // { arity: 2 } - Get l6 // { arity: 2 } - cte l8 = - Distinct project=[#0{person2id}, #1] // { arity: 2 } + %0:l8[#0]K » %1:l4[#0]K + ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } + Get l8 // { arity: 2 } + ArrangeBy keys=[[#0{person1id}]] // { arity: 2 } + Project (#0{person1id}, #1{person2id}) // { arity: 2 } + Get l4 // { arity: 3 } + cte l6 = Union // { arity: 2 } - Project (#1, #0{person2id}) // { arity: 2 } - CrossJoin type=differential // { arity: 2 } + Project (#1{person2id}, #0) // { arity: 2 } + Get l5 // { arity: 2 } + Get l8 // { arity: 2 } + cte l7 = + Distinct project=[] // { arity: 0 } + Project () // { arity: 0 } + Join on=(#0{person2id} = #1{person2id}) type=differential // { arity: 2 } implementation - %0:l5[×] » %1[×] - ArrangeBy keys=[[]] // { arity: 2 } - Get l5 // { arity: 2 } - ArrangeBy keys=[[]] // { arity: 0 } - Union // { arity: 0 } - Negate // { arity: 0 } - Get l7 // { arity: 0 } - Constant // { arity: 0 } - - () - Project (#1, #0) // { arity: 2 } - Map (true, -1) // { arity: 2 } - Get l7 // { arity: 0 } - Constant // { arity: 2 } - - (1450, true) - - (15393162796819, false) - cte l9 = - TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Filter (#4 = false) // { arity: 6 } + %0:l6[#0]Kf » %1:l6[#0]Kf + ArrangeBy keys=[[#0{person2id}]] // { arity: 1 } + Project (#0{person2id}) // { arity: 1 } + Filter #1 // { arity: 2 } + Get l6 // { arity: 2 } + ArrangeBy keys=[[#0{person2id}]] // { arity: 1 } + Project (#0{person2id}) // { arity: 1 } + Filter NOT(#1) // { arity: 2 } + Get l6 // { arity: 2 } + cte l8 = + Distinct project=[#0{person2id}, #1] // { arity: 2 } + Union // { arity: 2 } + Project (#1{person2id}, #0) // { arity: 2 } + CrossJoin type=differential // { arity: 2 } + implementation + %0:l5[×] » %1[×] + ArrangeBy keys=[[]] // { arity: 2 } + Get l5 // { arity: 2 } + ArrangeBy keys=[[]] // { arity: 0 } + Union // { arity: 0 } + Negate // { arity: 0 } + Get l7 // { arity: 0 } + Constant // { arity: 0 } + - () + Project (#1, #0) // { arity: 2 } + Map (true, -1) // { arity: 2 } + Get l7 // { arity: 0 } + Constant // { arity: 2 } + - (1450, true) + - (15393162796819, false) + cte l9 = + TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Filter (#4 = false) // { arity: 6 } + Get l17 // { arity: 6 } + cte l10 = + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2{person2id}) // { arity: 3 } Get l17 // { arity: 6 } - cte l10 = - Distinct project=[#0..=#2] // { arity: 3 } + cte l11 = + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Get l10 // { arity: 3 } + cte l12 = Project (#0..=#2) // { arity: 3 } - Get l17 // { arity: 6 } - cte l11 = - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Get l10 // { arity: 3 } - cte l12 = - Project (#0..=#2) // { arity: 3 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1[#0..=#2]UKKKA » %0:l11[#0..=#2]UKKK - Get l11 // { arity: 3 } - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Get l9 // { arity: 5 } - cte l13 = - TopK group_by=[#0, #1, #2{person2id}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } - Union // { arity: 5 } - Project (#3, #4, #1, #7, #8) // { arity: 5 } - Map ((#6 + #2), false) // { arity: 9 } - Join on=(#0{person1id} = #5) type=differential // { arity: 7 } - implementation - %0:l4[#0]K » %1:l9[#2]K - ArrangeBy keys=[[#0{person1id}]] // { arity: 3 } - Get l4 // { arity: 3 } - ArrangeBy keys=[[#2]] // { arity: 4 } - Project (#0..=#3) // { arity: 4 } - Get l9 // { arity: 5 } - Project (#0..=#3, #9) // { arity: 5 } - Map ((#4 OR #8)) // { arity: 10 } - Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } - implementation - %0:l17[#0..=#2]KKK » %1[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 5 } - Project (#0..=#4) // { arity: 5 } - Get l17 // { arity: 6 } - ArrangeBy keys=[[#0..=#2]] // { arity: 4 } - Union // { arity: 4 } - Map (true) // { arity: 4 } - Get l12 // { arity: 3 } - Project (#0..=#2, #6) // { arity: 4 } - Map (false) // { arity: 7 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1:l11[#0..=#2]UKKK » %0[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Union // { arity: 3 } - Negate // { arity: 3 } - Get l12 // { arity: 3 } - Get l10 // { arity: 3 } - Get l11 // { arity: 3 } - cte l14 = - Reduce aggregates=[min((#0 + #1))] // { arity: 1 } - Project (#1, #3) // { arity: 2 } - Join on=(#0{person2id} = #2{person2id}) type=differential // { arity: 4 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } implementation - %0:l13[#0]Kef » %1:l13[#0]Kef - ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = false) // { arity: 5 } - Get l13 // { arity: 5 } - ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = true) // { arity: 5 } - Get l13 // { arity: 5 } - cte l15 = - Project (#1) // { arity: 1 } - Map ((#0{min} / 2)) // { arity: 2 } - Union // { arity: 1 } - Get l14 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l14 // { arity: 1 } - Constant // { arity: 0 } - - () - cte l16 = - Distinct project=[] // { arity: 0 } - Project () // { arity: 0 } - Filter #1 AND (#0{person2id} = -1) // { arity: 2 } - Get l8 // { arity: 2 } - cte l17 = - Distinct project=[#0..=#5] // { arity: 6 } - Union // { arity: 6 } - Project (#1, #0, #0, #2..=#4) // { arity: 6 } - Map (0, false, 0) // { arity: 5 } - Union // { arity: 2 } - Map (1450, false) // { arity: 2 } - Get l16 // { arity: 0 } - Map (15393162796819, true) // { arity: 2 } - Get l16 // { arity: 0 } - Project (#0..=#3, #7, #8) // { arity: 6 } - Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } - CrossJoin type=delta // { arity: 7 } - implementation - %0:l13 » %1[×]U » %2[×]U - %1 » %2[×]U » %0:l13[×] - %2 » %1[×]U » %0:l13[×] - ArrangeBy keys=[[]] // { arity: 5 } - Get l13 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - TopK limit=1 // { arity: 1 } - Project (#4) // { arity: 1 } + %1[#0..=#2]UKKKA » %0:l11[#0..=#2]UKKK + Get l11 // { arity: 3 } + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2) // { arity: 3 } + Get l9 // { arity: 5 } + cte l13 = + TopK group_by=[#0, #1, #2{person2id}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } + Union // { arity: 5 } + Project (#3, #4, #1{person2id}, #7, #8) // { arity: 5 } + Map ((#6 + #2), false) // { arity: 9 } + Join on=(#0{person1id} = #5) type=differential // { arity: 7 } + implementation + %0:l4[#0]K » %1:l9[#2]K + ArrangeBy keys=[[#0{person1id}]] // { arity: 3 } + Get l4 // { arity: 3 } + ArrangeBy keys=[[#2]] // { arity: 4 } + Project (#0..=#3) // { arity: 4 } Get l9 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - Union // { arity: 1 } - Get l15 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l15 // { arity: 1 } - Constant // { arity: 0 } - - () - Return // { arity: 1 } - With - cte l18 = - Project (#0..=#3) // { arity: 4 } - Join on=(#4 = #5{max}) type=differential // { arity: 6 } - implementation - %1[#0]UK » %0:l17[#4]K - ArrangeBy keys=[[#4]] // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Get l17 // { arity: 6 } - ArrangeBy keys=[[#0{max}]] // { arity: 1 } - Reduce aggregates=[max(#0)] // { arity: 1 } - Project (#5) // { arity: 1 } - Get l17 // { arity: 6 } - cte l19 = - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } - Reduce group_by=[#0, #2] aggregates=[min((#1 + #3))] // { arity: 3 } - Project (#0, #2, #3, #5) // { arity: 4 } - Join on=(#1{person2id} = #4{person2id}) type=differential // { arity: 6 } + Project (#0..=#3, #9) // { arity: 5 } + Map ((#4 OR #8)) // { arity: 10 } + Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } implementation - %0:l18[#1]Kef » %1:l18[#1]Kef - ArrangeBy keys=[[#1{person2id}]] // { arity: 3 } - Project (#1{person2id}..=#3) // { arity: 3 } - Filter (#0 = false) // { arity: 4 } - Get l18 // { arity: 4 } - ArrangeBy keys=[[#1{person2id}]] // { arity: 3 } - Project (#1{person2id}..=#3) // { arity: 3 } - Filter (#0 = true) // { arity: 4 } - Get l18 // { arity: 4 } + %0:l17[#0..=#2]KKK » %1[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 5 } + Project (#0..=#4) // { arity: 5 } + Get l17 // { arity: 6 } + ArrangeBy keys=[[#0..=#2]] // { arity: 4 } + Union // { arity: 4 } + Map (true) // { arity: 4 } + Get l12 // { arity: 3 } + Project (#0..=#2, #6) // { arity: 4 } + Map (false) // { arity: 7 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + implementation + %1:l11[#0..=#2]UKKK » %0[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Union // { arity: 3 } + Negate // { arity: 3 } + Get l12 // { arity: 3 } + Get l10 // { arity: 3 } + Get l11 // { arity: 3 } + cte l14 = + Reduce aggregates=[min((#0 + #1))] // { arity: 1 } + Project (#1, #3) // { arity: 2 } + Join on=(#0{person2id} = #2{person2id}) type=differential // { arity: 4 } + implementation + %0:l13[#0]Kef » %1:l13[#0]Kef + ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } + Project (#2{person2id}, #3) // { arity: 2 } + Filter (#0 = false) // { arity: 5 } + Get l13 // { arity: 5 } + ArrangeBy keys=[[#0{person2id}]] // { arity: 2 } + Project (#2{person2id}, #3) // { arity: 2 } + Filter (#0 = true) // { arity: 5 } + Get l13 // { arity: 5 } + cte l15 = + Project (#1) // { arity: 1 } + Map ((#0{min} / 2)) // { arity: 2 } + Union // { arity: 1 } + Get l14 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l14 // { arity: 1 } + Constant // { arity: 0 } + - () + cte l16 = + Distinct project=[] // { arity: 0 } + Project () // { arity: 0 } + Filter #1 AND (#0{person2id} = -1) // { arity: 2 } + Get l8 // { arity: 2 } + cte l17 = + Distinct project=[#0..=#5] // { arity: 6 } + Union // { arity: 6 } + Project (#1, #0, #0, #2..=#4) // { arity: 6 } + Map (0, false, 0) // { arity: 5 } + Union // { arity: 2 } + Map (1450, false) // { arity: 2 } + Get l16 // { arity: 0 } + Map (15393162796819, true) // { arity: 2 } + Get l16 // { arity: 0 } + Project (#0..=#3, #7, #8) // { arity: 6 } + Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } + CrossJoin type=delta // { arity: 7 } + implementation + %0:l13 » %1[×]U » %2[×]U + %1 » %2[×]U » %0:l13[×] + %2 » %1[×]U » %0:l13[×] + ArrangeBy keys=[[]] // { arity: 5 } + Get l13 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + TopK limit=1 // { arity: 1 } + Project (#4) // { arity: 1 } + Get l9 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + Union // { arity: 1 } + Get l15 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l15 // { arity: 1 } + Constant // { arity: 0 } + - () Return // { arity: 1 } - Project (#1) // { arity: 1 } - Map (coalesce(#0{min_min}, -1)) // { arity: 2 } - Union // { arity: 1 } - Get l19 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l19 // { arity: 1 } - Constant // { arity: 0 } - - () + With + cte l18 = + Project (#0..=#3) // { arity: 4 } + Join on=(#4 = #5{max}) type=differential // { arity: 6 } + implementation + %1[#0]UK » %0:l17[#4]K + ArrangeBy keys=[[#4]] // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Get l17 // { arity: 6 } + ArrangeBy keys=[[#0{max}]] // { arity: 1 } + Reduce aggregates=[max(#0)] // { arity: 1 } + Project (#5) // { arity: 1 } + Get l17 // { arity: 6 } + cte l19 = + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#2{min}) // { arity: 1 } + Reduce group_by=[#0, #2] aggregates=[min((#1 + #3))] // { arity: 3 } + Project (#0, #2, #3, #5) // { arity: 4 } + Join on=(#1{person2id} = #4{person2id}) type=differential // { arity: 6 } + implementation + %0:l18[#1]Kef » %1:l18[#1]Kef + ArrangeBy keys=[[#1{person2id}]] // { arity: 3 } + Project (#1..=#3) // { arity: 3 } + Filter (#0 = false) // { arity: 4 } + Get l18 // { arity: 4 } + ArrangeBy keys=[[#1{person2id}]] // { arity: 3 } + Project (#1..=#3) // { arity: 3 } + Filter (#0 = true) // { arity: 4 } + Get l18 // { arity: 4 } + Return // { arity: 1 } + Project (#1) // { arity: 1 } + Map (coalesce(#0{min_min}, -1)) // { arity: 2 } + Union // { arity: 1 } + Get l19 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l19 // { arity: 1 } + Constant // { arity: 0 } + - () Used Indexes: - materialize.public.forum_id (*** full scan ***) @@ -3083,7 +3087,7 @@ Explained Query: cte l0 = ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#1{id}) IS NOT NULL // { arity: 11 } ReadIndex on=person person_id=[*** full scan ***] // { arity: 11 } cte l1 = @@ -3093,7 +3097,7 @@ Explained Query: ArrangeBy keys=[[#1{name}]] // { arity: 4 } ReadIndex on=tag tag_name=[lookup] // { arity: 4 } cte l3 = - Project (#0{id}, #2) // { arity: 2 } + Project (#0{id}, #2{messageid}) // { arity: 2 } Join on=(#0{id} = #1{creatorpersonid} AND #2{messageid} = #3{messageid}) type=delta // { arity: 4 } implementation %0:l0 » %1[#0]K » %2[#0]UKA @@ -3102,12 +3106,12 @@ Explained Query: Get l0 // { arity: 1 } ArrangeBy keys=[[#0{creatorpersonid}], [#1{messageid}]] // { arity: 2 } Distinct project=[#1{creatorpersonid}, #0{messageid}] // { arity: 2 } - Project (#1{creatorpersonid}, #9) // { arity: 2 } + Project (#1{messageid}, #9{creatorpersonid}) // { arity: 2 } Filter (2012-10-07 00:00:00 = date_to_timestamp(timestamp_with_time_zone_to_date(#0{creationdate}))) // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id}) type=differential // { arity: 8 } implementation %1:tag[#0]KAe » %0:l1[#2]KAe @@ -3118,7 +3122,7 @@ Explained Query: ArrangeBy keys=[[#1{person1id}], [#2{person2id}]] // { arity: 3 } ReadIndex on=person_knows_person person_knows_person_person1id=[delta join lookup] person_knows_person_person2id=[delta join lookup] // { arity: 3 } cte l5 = - Project (#0{id}, #1{messageid}, #4) // { arity: 3 } + Project (#0{id}, #1{messageid}, #4{person2id}) // { arity: 3 } Join on=(#0{id} = #3{person1id} AND #4{person2id} = #5{id}) type=delta // { arity: 6 } implementation %0:l3 » %1:l4[#1]KA » %2[#0]UKA @@ -3132,7 +3136,7 @@ Explained Query: Project (#0{id}) // { arity: 1 } Get l3 // { arity: 2 } cte l6 = - Project (#0{id}, #2) // { arity: 2 } + Project (#0{id}, #2{messageid}) // { arity: 2 } Join on=(#0{id} = #1{creatorpersonid} AND #2{messageid} = #3{messageid}) type=delta // { arity: 4 } implementation %0:l0 » %1[#0]K » %2[#0]UKA @@ -3141,12 +3145,12 @@ Explained Query: Get l0 // { arity: 1 } ArrangeBy keys=[[#0{creatorpersonid}], [#1{messageid}]] // { arity: 2 } Distinct project=[#1{creatorpersonid}, #0{messageid}] // { arity: 2 } - Project (#1{creatorpersonid}, #9) // { arity: 2 } + Project (#1{messageid}, #9{creatorpersonid}) // { arity: 2 } Filter (2012-12-14 00:00:00 = date_to_timestamp(timestamp_with_time_zone_to_date(#0{creationdate}))) // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id}) type=differential // { arity: 8 } implementation %1:tag[#0]KAe » %0:l1[#2]KAe @@ -3154,7 +3158,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 5 } ReadIndex on=materialize.public.tag tag_name=[lookup value=("Thailand_Noriega")] // { arity: 5 } cte l7 = - Project (#0{id}, #1{messageid}, #4) // { arity: 3 } + Project (#0{id}, #1{messageid}, #4{person2id}) // { arity: 3 } Join on=(#0{id} = #3{person1id} AND #4{person2id} = #5{id}) type=delta // { arity: 6 } implementation %0:l6 » %1:l4[#1]KA » %2[#0]UKA @@ -3256,7 +3260,7 @@ Explained Query: Finish order_by=[#1{count_messageid} desc nulls_first, #0{creatorpersonid} asc nulls_last] limit=10 output=[#0, #1] With cte l0 = - Project (#0{creationdate}, #1{messageid}, #9, #10, #12) // { arity: 5 } + Project (#0{creationdate}, #1{messageid}, #9{creatorpersonid}, #10{containerforumid}, #12{parentmessageid}) // { arity: 5 } Join on=(#1{messageid} = #13{messageid}) type=differential // { arity: 14 } implementation %1[#0]UKA » %0:message[#1]KA @@ -3264,7 +3268,7 @@ Explained Query: ReadIndex on=message message_messageid=[differential join] // { arity: 13 } ArrangeBy keys=[[#0{messageid}]] // { arity: 1 } Distinct project=[#0{messageid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{messageid}) // { arity: 1 } Join on=(#2{tagid} = #3{id}) type=differential // { arity: 4 } implementation %1[#0]UKA » %0:message_hastag_tag[#2]KA @@ -3279,7 +3283,7 @@ Explained Query: ArrangeBy keys=[[#1{forumid}], [#2{personid}]] // { arity: 3 } ReadIndex on=forum_hasmember_person forum_hasmember_person_forumid=[delta join lookup] forum_hasmember_person_personid=[delta join lookup] // { arity: 3 } cte l2 = - Project (#1{messageid}, #4, #6) // { arity: 3 } + Project (#1{creatorpersonid}, #4{messageid}, #6{containerforumid}) // { arity: 3 } Filter (#2{containerforumid} != #6{containerforumid}) AND (#5{creatorpersonid} != #7{creatorpersonid}) AND ((#0{creationdate} + 12:00:00) < #3{creationdate}) // { arity: 15 } Join on=(#2{containerforumid} = #10{forumid} = #13{forumid} AND #4{messageid} = #8{parentmessageid} AND #5{creatorpersonid} = #14{personid} AND #7{creatorpersonid} = #11{personid}) type=delta // { arity: 15 } implementation @@ -3289,20 +3293,20 @@ Explained Query: %3:l1 » %4:l1[#1]KA » %0:l0[#2]K » %1:l0[#2]K » %2:l0[#0, #1]KK %4:l1 » %3:l1[#1]KA » %0:l0[#2]K » %1:l0[#2]K » %2:l0[#0, #1]KK ArrangeBy keys=[[#2{containerforumid}]] // { arity: 3 } - Project (#0{creationdate}, #2{containerforumid}, #3) // { arity: 3 } + Project (#0{creationdate}, #2{creatorpersonid}, #3{containerforumid}) // { arity: 3 } Get l0 // { arity: 5 } ArrangeBy keys=[[#1{messageid}, #2{creatorpersonid}], [#2{creatorpersonid}]] // { arity: 4 } Project (#0{creationdate}..=#3{containerforumid}) // { arity: 4 } Get l0 // { arity: 5 } ArrangeBy keys=[[#0{creatorpersonid}, #1{parentmessageid}]] // { arity: 2 } - Project (#2, #4) // { arity: 2 } + Project (#2{creatorpersonid}, #4{parentmessageid}) // { arity: 2 } Filter (#4{parentmessageid}) IS NOT NULL // { arity: 5 } Get l0 // { arity: 5 } Get l1 // { arity: 3 } Get l1 // { arity: 3 } cte l3 = Distinct project=[#0{creatorpersonid}, #1{containerforumid}] // { arity: 2 } - Project (#0{creatorpersonid}, #2) // { arity: 2 } + Project (#0{creatorpersonid}, #2{containerforumid}) // { arity: 2 } Get l2 // { arity: 3 } Return // { arity: 2 } Reduce group_by=[#0{creatorpersonid}] aggregates=[count(distinct #1{messageid})] // { arity: 2 } @@ -3323,7 +3327,7 @@ Explained Query: Get l3 // { arity: 2 } ArrangeBy keys=[[#0{personid}, #1{forumid}]] // { arity: 2 } Distinct project=[#1{personid}, #0{forumid}] // { arity: 2 } - Project (#1{personid}, #2) // { arity: 2 } + Project (#1{forumid}, #2{personid}) // { arity: 2 } ReadIndex on=forum_hasmember_person forum_hasmember_person_forumid=[*** full scan ***] // { arity: 3 } Get l3 // { arity: 2 } @@ -3376,21 +3380,21 @@ Explained Query: With cte l0 = ArrangeBy keys=[[#1{person2id}]] // { arity: 2 } - Project (#0{personid}, #9) // { arity: 2 } + Project (#0{personid}, #9{person2id}) // { arity: 2 } Join on=(#0{personid} = #8{person1id} AND #1{tagid} = #2{id}) type=delta // { arity: 10 } implementation %0:person_hasinterest_tag » %1:tag[#0]KAe » %2:person_knows_person[#1]KA %1:tag » %0:person_hasinterest_tag[#1]KA » %2:person_knows_person[#1]KA %2:person_knows_person » %0:person_hasinterest_tag[#0]K » %1:tag[#0]KAe ArrangeBy keys=[[#0{personid}], [#1{tagid}]] // { arity: 2 } - Project (#1{tagid}, #2) // { arity: 2 } + Project (#1{personid}, #2{tagid}) // { arity: 2 } ReadIndex on=person_hasinterest_tag person_hasinterest_tag_tagid=[*** full scan ***] // { arity: 3 } ArrangeBy keys=[[#0{id}]] // { arity: 5 } ReadIndex on=materialize.public.tag tag_name=[lookup value=("Fyodor_Dostoyevsky")] // { arity: 5 } ArrangeBy keys=[[#1{person1id}]] // { arity: 3 } ReadIndex on=person_knows_person person_knows_person_person1id=[delta join lookup] // { arity: 3 } cte l1 = - Project (#0{personid}, #2) // { arity: 2 } + Project (#0{personid}, #2{personid}) // { arity: 2 } Filter (#0{personid} != #2{personid}) // { arity: 4 } Join on=(#1{person2id} = #3{person2id}) type=differential // { arity: 4 } implementation @@ -3419,7 +3423,7 @@ Explained Query: Get l2 // { arity: 2 } ArrangeBy keys=[[#0{person2id}, #1{person1id}]] // { arity: 2 } Distinct project=[#1{person2id}, #0{person1id}] // { arity: 2 } - Project (#1{person2id}, #2) // { arity: 2 } + Project (#1{person1id}, #2{person2id}) // { arity: 2 } ReadIndex on=person_knows_person person_knows_person_person1id=[*** full scan ***] // { arity: 3 } Get l2 // { arity: 2 } @@ -3474,7 +3478,7 @@ materialize.public.pathq19: Project (#0{person1id}, #1{person2id}, #3) // { arity: 3 } Map (greatest(f64toi64(roundf64((40 - sqrtf64(bigint_to_double(#2{count}))))), 1)) // { arity: 4 } Reduce group_by=[#0{person1id}, #1{person2id}] aggregates=[count(*)] // { arity: 3 } - Project (#16, #17) // { arity: 2 } + Project (#16{person1id}, #17{person2id}) // { arity: 2 } Filter (#0{creatorpersonid} != #11{creatorpersonid}) AND (#16{person1id} < #17{person2id}) // { arity: 18 } Join on=(#1{parentmessageid} = #3{messageid} AND #16{person1id} = least(#0{creatorpersonid}, #11{creatorpersonid}) AND #17{person2id} = greatest(#0{creatorpersonid}, #11{creatorpersonid})) type=delta // { arity: 18 } implementation @@ -3482,7 +3486,7 @@ materialize.public.pathq19: %1:message » %0:message[#1]KA » %2:person_knows_person[#1, #2]KKAf %2:person_knows_person » %0:message[×] » %1:message[#1]KA ArrangeBy keys=[[], [#1{parentmessageid}]] // { arity: 2 } - Project (#9, #12) // { arity: 2 } + Project (#9{creatorpersonid}, #12{parentmessageid}) // { arity: 2 } Filter (#12{parentmessageid}) IS NOT NULL // { arity: 13 } ReadIndex on=message message_messageid=[*** full scan ***] // { arity: 13 } ArrangeBy keys=[[#1{messageid}]] // { arity: 13 } @@ -3492,7 +3496,7 @@ materialize.public.pathq19: Return // { arity: 3 } Union // { arity: 3 } Get l0 // { arity: 3 } - Project (#1{person1id}, #0{person2id}, #2) // { arity: 3 } + Project (#1{person2id}, #0{person1id}, #2) // { arity: 3 } Get l0 // { arity: 3 } Used Indexes: @@ -3576,7 +3580,7 @@ Explained Query: Project (#1{id}, #1{id}, #12) // { arity: 3 } Map (0) // { arity: 13 } ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } - Project (#0, #4, #6) // { arity: 3 } + Project (#0, #4{dst}, #6) // { arity: 3 } Map ((#2 + bigint_to_double(#5{w}))) // { arity: 7 } Join on=(#1 = #3{src}) type=differential // { arity: 6 } implementation @@ -3598,7 +3602,7 @@ Explained Query: Get l0 // { arity: 3 } ArrangeBy keys=[[#0{id}]] // { arity: 1 } Distinct project=[#0{id}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{id}) // { arity: 1 } Filter (#1{id}) IS NOT NULL // { arity: 12 } ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } Return // { arity: 3 } @@ -3610,7 +3614,7 @@ Explained Query: Get l1 // { arity: 3 } ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{min}) // { arity: 1 } Get l1 // { arity: 3 } Used Indexes: @@ -3677,101 +3681,103 @@ FROM paths WHERE w = (SELECT MIN(w) FROM paths) ---- Explained Query: - With Mutually Recursive + With cte l0 = ArrangeBy keys=[[#8{locationcityid}]] // { arity: 11 } ReadIndex on=person person_locationcityid=[lookup] // { arity: 11 } cte l1 = - ArrangeBy keys=[[]] // { arity: 1 } + ArrangeBy keys=[[#0{src}]] // { arity: 3 } + ReadIndex on=pathq19 pathq19_src=[delta join lookup] // { arity: 3 } + Return // { arity: 3 } + With Mutually Recursive + cte l2 = + ArrangeBy keys=[[]] // { arity: 1 } + Union // { arity: 1 } + Project (#1) // { arity: 1 } + Map ((#0 / 2)) // { arity: 2 } + Get l7 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l7 // { arity: 1 } + Constant // { arity: 0 } + - () + cte l3 = + TopK group_by=[#0{id}, #1{id}] order_by=[#2 asc nulls_last] limit=1 // { arity: 3 } + Union // { arity: 3 } + Project (#1{id}, #1{id}, #12) // { arity: 3 } + Map (0) // { arity: 13 } + ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } + Project (#0, #5{dst}, #7) // { arity: 3 } + Filter coalesce((#2 < #3), true) // { arity: 8 } + Map ((#2 + bigint_to_double(#6{w}))) // { arity: 8 } + Join on=(#1 = #4{src}) type=delta // { arity: 7 } + implementation + %0:l3 » %2:l1[#0]KA » %1:l2[×] + %1:l2 » %0:l3[×] » %2:l1[#0]KA + %2:l1 » %0:l3[#1]K » %1:l2[×] + ArrangeBy keys=[[], [#1{id}]] // { arity: 3 } + Filter (#1{id}) IS NOT NULL // { arity: 3 } + Get l3 // { arity: 3 } + Get l2 // { arity: 1 } + Get l1 // { arity: 3 } + cte l4 = + TopK group_by=[#0{id}, #1{id}] order_by=[#2 asc nulls_last] limit=1 // { arity: 3 } + Union // { arity: 3 } + Project (#1{id}, #1{id}, #12) // { arity: 3 } + Map (0) // { arity: 13 } + ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } + Project (#0, #5{dst}, #7) // { arity: 3 } + Filter coalesce((#2 < #3), true) // { arity: 8 } + Map ((#2 + bigint_to_double(#6{w}))) // { arity: 8 } + Join on=(#1 = #4{src}) type=delta // { arity: 7 } + implementation + %0:l4 » %2:l1[#0]KA » %1:l2[×] + %1:l2 » %0:l4[×] » %2:l1[#0]KA + %2:l1 » %0:l4[#1]K » %1:l2[×] + ArrangeBy keys=[[], [#1{id}]] // { arity: 3 } + Filter (#1{id}) IS NOT NULL // { arity: 3 } + Get l4 // { arity: 3 } + Get l2 // { arity: 1 } + Get l1 // { arity: 3 } + cte l5 = + Reduce group_by=[#0{id}, #2{id}] aggregates=[min((#1 + #3))] // { arity: 3 } + Project (#0{id}, #2, #3{id}, #5) // { arity: 4 } + Join on=(#1{id} = #4{id}) type=differential // { arity: 6 } + implementation + %0:l3[#1]K » %1:l4[#1]K + ArrangeBy keys=[[#1{id}]] // { arity: 3 } + Filter (#1{id}) IS NOT NULL // { arity: 3 } + Get l3 // { arity: 3 } + ArrangeBy keys=[[#1{id}]] // { arity: 3 } + Filter (#1{id}) IS NOT NULL // { arity: 3 } + Get l4 // { arity: 3 } + cte l6 = + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#2{min}) // { arity: 1 } + Get l5 // { arity: 3 } + cte l7 = Union // { arity: 1 } - Project (#1) // { arity: 1 } - Map ((#0 / 2)) // { arity: 2 } - Get l7 // { arity: 1 } + Get l6 // { arity: 1 } Map (null) // { arity: 1 } Union // { arity: 0 } Negate // { arity: 0 } Project () // { arity: 0 } - Get l7 // { arity: 1 } + Get l6 // { arity: 1 } Constant // { arity: 0 } - () - cte l2 = - ArrangeBy keys=[[#0{src}]] // { arity: 3 } - ReadIndex on=pathq19 pathq19_src=[delta join lookup] // { arity: 3 } - cte l3 = - TopK group_by=[#0{id}, #1{id}] order_by=[#2 asc nulls_last] limit=1 // { arity: 3 } - Union // { arity: 3 } - Project (#1{id}, #1{id}, #12) // { arity: 3 } - Map (0) // { arity: 13 } - ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } - Project (#0, #5, #7) // { arity: 3 } - Filter coalesce((#2 < #3), true) // { arity: 8 } - Map ((#2 + bigint_to_double(#6{w}))) // { arity: 8 } - Join on=(#1 = #4{src}) type=delta // { arity: 7 } - implementation - %0:l3 » %2:l2[#0]KA » %1:l1[×] - %1:l1 » %0:l3[×] » %2:l2[#0]KA - %2:l2 » %0:l3[#1]K » %1:l1[×] - ArrangeBy keys=[[], [#1{id}]] // { arity: 3 } - Filter (#1{id}) IS NOT NULL // { arity: 3 } - Get l3 // { arity: 3 } - Get l1 // { arity: 1 } - Get l2 // { arity: 3 } - cte l4 = - TopK group_by=[#0{id}, #1{id}] order_by=[#2 asc nulls_last] limit=1 // { arity: 3 } - Union // { arity: 3 } - Project (#1{id}, #1{id}, #12) // { arity: 3 } - Map (0) // { arity: 13 } - ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } - Project (#0, #5, #7) // { arity: 3 } - Filter coalesce((#2 < #3), true) // { arity: 8 } - Map ((#2 + bigint_to_double(#6{w}))) // { arity: 8 } - Join on=(#1 = #4{src}) type=delta // { arity: 7 } - implementation - %0:l4 » %2:l2[#0]KA » %1:l1[×] - %1:l1 » %0:l4[×] » %2:l2[#0]KA - %2:l2 » %0:l4[#1]K » %1:l1[×] - ArrangeBy keys=[[], [#1{id}]] // { arity: 3 } - Filter (#1{id}) IS NOT NULL // { arity: 3 } - Get l4 // { arity: 3 } - Get l1 // { arity: 1 } - Get l2 // { arity: 3 } - cte l5 = - Reduce group_by=[#0{id}, #2{id}] aggregates=[min((#1 + #3))] // { arity: 3 } - Project (#0{id}, #2{id}, #3, #5) // { arity: 4 } - Join on=(#1{id} = #4{id}) type=differential // { arity: 6 } - implementation - %0:l3[#1]K » %1:l4[#1]K - ArrangeBy keys=[[#1{id}]] // { arity: 3 } - Filter (#1{id}) IS NOT NULL // { arity: 3 } - Get l3 // { arity: 3 } - ArrangeBy keys=[[#1{id}]] // { arity: 3 } - Filter (#1{id}) IS NOT NULL // { arity: 3 } - Get l4 // { arity: 3 } - cte l6 = - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } - Get l5 // { arity: 3 } - cte l7 = - Union // { arity: 1 } - Get l6 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l6 // { arity: 1 } - Constant // { arity: 0 } - - () - Return // { arity: 3 } - Project (#0{id}..=#2{min}) // { arity: 3 } - Join on=(#2{min} = #3{min_min}) type=differential // { arity: 4 } - implementation - %1[#0]UK » %0:l5[#2]K - ArrangeBy keys=[[#2{min}]] // { arity: 3 } - Get l5 // { arity: 3 } - ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } - Get l5 // { arity: 3 } + Return // { arity: 3 } + Project (#0{id}..=#2{min}) // { arity: 3 } + Join on=(#2{min} = #3{min_min}) type=differential // { arity: 4 } + implementation + %1[#0]UK » %0:l5[#2]K + ArrangeBy keys=[[#2{min}]] // { arity: 3 } + Get l5 // { arity: 3 } + ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#2{min}) // { arity: 1 } + Get l5 // { arity: 3 } Used Indexes: - materialize.public.person_locationcityid (lookup) @@ -3836,172 +3842,174 @@ SELECT * FROM results WHERE w = (SELECT min(w) FROM results) ORDER BY f, t ---- Explained Query: Finish order_by=[#0{id} asc nulls_last, #1{id} asc nulls_last] output=[#0..=#2] - With Mutually Recursive + With cte l0 = - TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Filter (#4 = false) // { arity: 6 } - Get l7 // { arity: 6 } - cte l1 = - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Get l7 // { arity: 6 } - cte l2 = - Project (#0..=#2) // { arity: 3 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1[#0..=#2]UKKKA » %0:l1[#0..=#2]UKKK - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Filter (#1) IS NOT NULL AND (#2) IS NOT NULL // { arity: 3 } - Get l1 // { arity: 3 } - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Filter (#1) IS NOT NULL AND (#2) IS NOT NULL // { arity: 5 } - Get l0 // { arity: 5 } - cte l3 = - TopK group_by=[#0, #1, #2{dst}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } - Distinct project=[#0..=#4] // { arity: 5 } - Union // { arity: 5 } - Project (#3, #4, #1, #7, #8) // { arity: 5 } - Map ((#6 + bigint_to_double(#2{w})), false) // { arity: 9 } - Join on=(#0{src} = #5) type=differential // { arity: 7 } - implementation - %0:pathq19[#0]KA » %1:l0[#2]K - ArrangeBy keys=[[#0{src}]] // { arity: 3 } - ReadIndex on=pathq19 pathq19_src=[differential join] // { arity: 3 } - ArrangeBy keys=[[#2]] // { arity: 4 } - Project (#0..=#3) // { arity: 4 } - Filter (#2) IS NOT NULL // { arity: 5 } - Get l0 // { arity: 5 } - Project (#0..=#3, #9) // { arity: 5 } - Map ((#4 OR #8)) // { arity: 10 } - Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } - implementation - %0:l7[#0..=#2]KKK » %1[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 5 } - Project (#0..=#4) // { arity: 5 } - Get l7 // { arity: 6 } - ArrangeBy keys=[[#0..=#2]] // { arity: 4 } - Union // { arity: 4 } - Map (true) // { arity: 4 } - Get l2 // { arity: 3 } - Project (#0..=#2, #6) // { arity: 4 } - Map (false) // { arity: 7 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1:l1[#0..=#2]UKKK » %0[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Union // { arity: 3 } - Negate // { arity: 3 } - Get l2 // { arity: 3 } - Get l1 // { arity: 3 } - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Get l1 // { arity: 3 } - cte l4 = - Reduce aggregates=[min((#0 + #1))] // { arity: 1 } - Project (#1, #3) // { arity: 2 } - Join on=(#0{dst} = #2{dst}) type=differential // { arity: 4 } - implementation - %0:l3[#0]Kef » %1:l3[#0]Kef - ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = false) AND (#2{dst}) IS NOT NULL // { arity: 5 } - Get l3 // { arity: 5 } - ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = true) AND (#2{dst}) IS NOT NULL // { arity: 5 } - Get l3 // { arity: 5 } - cte l5 = - Project (#1) // { arity: 1 } - Map ((#0{min} / 2)) // { arity: 2 } - Union // { arity: 1 } - Get l4 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l4 // { arity: 1 } - Constant // { arity: 0 } - - () - cte l6 = ArrangeBy keys=[[#8{locationcityid}]] // { arity: 11 } ReadIndex on=person person_locationcityid=[lookup] // { arity: 11 } - cte l7 = - Distinct project=[#0..=#5] // { arity: 6 } - Union // { arity: 6 } - Project (#1{id}, #0, #0, #2{id}..=#4) // { arity: 6 } - Map (0, false, 0) // { arity: 5 } - Union // { arity: 2 } - Project (#1, #12) // { arity: 2 } - Map (false) // { arity: 13 } - ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } - Project (#1, #12) // { arity: 2 } - Map (true) // { arity: 13 } - ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } - Project (#0..=#3, #7, #8) // { arity: 6 } - Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } - CrossJoin type=delta // { arity: 7 } - implementation - %0:l3 » %1[×]U » %2[×]U - %1 » %2[×]U » %0:l3[×] - %2 » %1[×]U » %0:l3[×] - ArrangeBy keys=[[]] // { arity: 5 } - Get l3 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - TopK limit=1 // { arity: 1 } - Project (#4) // { arity: 1 } - Get l0 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - Union // { arity: 1 } - Get l5 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l5 // { arity: 1 } - Constant // { arity: 0 } - - () Return // { arity: 3 } - With - cte l8 = - Project (#0..=#3) // { arity: 4 } - Join on=(#4 = #5{max}) type=differential // { arity: 6 } + With Mutually Recursive + cte l1 = + TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Filter (#4 = false) // { arity: 6 } + Get l7 // { arity: 6 } + cte l2 = + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2{id}) // { arity: 3 } + Get l7 // { arity: 6 } + cte l3 = + Project (#0..=#2) // { arity: 3 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } implementation - %1[#0]UK » %0:l7[#4]K - ArrangeBy keys=[[#4]] // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Filter (#2{id}) IS NOT NULL // { arity: 6 } - Get l7 // { arity: 6 } - ArrangeBy keys=[[#0{max}]] // { arity: 1 } - Reduce aggregates=[max(#0)] // { arity: 1 } - Project (#5) // { arity: 1 } - Get l7 // { arity: 6 } - cte l9 = - Reduce group_by=[#0{id}, #2{id}] aggregates=[min((#1 + #3))] // { arity: 3 } - Project (#0{id}, #2{id}, #3, #5) // { arity: 4 } - Join on=(#1{id} = #4{id}) type=differential // { arity: 6 } + %1[#0..=#2]UKKKA » %0:l2[#0..=#2]UKKK + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Filter (#1) IS NOT NULL AND (#2) IS NOT NULL // { arity: 3 } + Get l2 // { arity: 3 } + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2) // { arity: 3 } + Filter (#1) IS NOT NULL AND (#2) IS NOT NULL // { arity: 5 } + Get l1 // { arity: 5 } + cte l4 = + TopK group_by=[#0, #1, #2{dst}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } + Distinct project=[#0..=#4] // { arity: 5 } + Union // { arity: 5 } + Project (#3, #4, #1{dst}, #7, #8) // { arity: 5 } + Map ((#6 + bigint_to_double(#2{w})), false) // { arity: 9 } + Join on=(#0{src} = #5) type=differential // { arity: 7 } + implementation + %0:pathq19[#0]KA » %1:l1[#2]K + ArrangeBy keys=[[#0{src}]] // { arity: 3 } + ReadIndex on=pathq19 pathq19_src=[differential join] // { arity: 3 } + ArrangeBy keys=[[#2]] // { arity: 4 } + Project (#0..=#3) // { arity: 4 } + Filter (#2) IS NOT NULL // { arity: 5 } + Get l1 // { arity: 5 } + Project (#0..=#3, #9) // { arity: 5 } + Map ((#4 OR #8)) // { arity: 10 } + Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } + implementation + %0:l7[#0..=#2]KKK » %1[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 5 } + Project (#0..=#4) // { arity: 5 } + Get l7 // { arity: 6 } + ArrangeBy keys=[[#0..=#2]] // { arity: 4 } + Union // { arity: 4 } + Map (true) // { arity: 4 } + Get l3 // { arity: 3 } + Project (#0..=#2, #6) // { arity: 4 } + Map (false) // { arity: 7 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + implementation + %1:l2[#0..=#2]UKKK » %0[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Union // { arity: 3 } + Negate // { arity: 3 } + Get l3 // { arity: 3 } + Get l2 // { arity: 3 } + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Get l2 // { arity: 3 } + cte l5 = + Reduce aggregates=[min((#0 + #1))] // { arity: 1 } + Project (#1, #3) // { arity: 2 } + Join on=(#0{dst} = #2{dst}) type=differential // { arity: 4 } implementation - %0:l8[#1]Kef » %1:l8[#1]Kef - ArrangeBy keys=[[#1{id}]] // { arity: 3 } - Project (#1{id}..=#3) // { arity: 3 } - Filter (#0 = false) // { arity: 4 } - Get l8 // { arity: 4 } - ArrangeBy keys=[[#1{id}]] // { arity: 3 } - Project (#1{id}..=#3) // { arity: 3 } - Filter (#0 = true) // { arity: 4 } - Get l8 // { arity: 4 } + %0:l4[#0]Kef » %1:l4[#0]Kef + ArrangeBy keys=[[#0{dst}]] // { arity: 2 } + Project (#2{dst}, #3) // { arity: 2 } + Filter (#0 = false) AND (#2{dst}) IS NOT NULL // { arity: 5 } + Get l4 // { arity: 5 } + ArrangeBy keys=[[#0{dst}]] // { arity: 2 } + Project (#2{dst}, #3) // { arity: 2 } + Filter (#0 = true) AND (#2{dst}) IS NOT NULL // { arity: 5 } + Get l4 // { arity: 5 } + cte l6 = + Project (#1) // { arity: 1 } + Map ((#0{min} / 2)) // { arity: 2 } + Union // { arity: 1 } + Get l5 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l5 // { arity: 1 } + Constant // { arity: 0 } + - () + cte l7 = + Distinct project=[#0..=#5] // { arity: 6 } + Union // { arity: 6 } + Project (#1, #0{id}, #0{id}, #2..=#4) // { arity: 6 } + Map (0, false, 0) // { arity: 5 } + Union // { arity: 2 } + Project (#1{id}, #12) // { arity: 2 } + Map (false) // { arity: 13 } + ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(655)] // { arity: 12 } + Project (#1{id}, #12) // { arity: 2 } + Map (true) // { arity: 13 } + ReadIndex on=materialize.public.person person_locationcityid=[lookup value=(1138)] // { arity: 12 } + Project (#0..=#3, #7, #8) // { arity: 6 } + Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } + CrossJoin type=delta // { arity: 7 } + implementation + %0:l4 » %1[×]U » %2[×]U + %1 » %2[×]U » %0:l4[×] + %2 » %1[×]U » %0:l4[×] + ArrangeBy keys=[[]] // { arity: 5 } + Get l4 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + TopK limit=1 // { arity: 1 } + Project (#4) // { arity: 1 } + Get l1 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + Union // { arity: 1 } + Get l6 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l6 // { arity: 1 } + Constant // { arity: 0 } + - () Return // { arity: 3 } - Project (#0{id}..=#2{min}) // { arity: 3 } - Join on=(#2{min} = #3{min_min}) type=differential // { arity: 4 } - implementation - %1[#0]UK » %0:l9[#2]K - ArrangeBy keys=[[#2{min}]] // { arity: 3 } - Get l9 // { arity: 3 } - ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#2) // { arity: 1 } - Get l9 // { arity: 3 } + With + cte l8 = + Project (#0..=#3) // { arity: 4 } + Join on=(#4 = #5{max}) type=differential // { arity: 6 } + implementation + %1[#0]UK » %0:l7[#4]K + ArrangeBy keys=[[#4]] // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Filter (#2{id}) IS NOT NULL // { arity: 6 } + Get l7 // { arity: 6 } + ArrangeBy keys=[[#0{max}]] // { arity: 1 } + Reduce aggregates=[max(#0)] // { arity: 1 } + Project (#5) // { arity: 1 } + Get l7 // { arity: 6 } + cte l9 = + Reduce group_by=[#0{id}, #2{id}] aggregates=[min((#1 + #3))] // { arity: 3 } + Project (#0{id}, #2, #3{id}, #5) // { arity: 4 } + Join on=(#1{id} = #4{id}) type=differential // { arity: 6 } + implementation + %0:l8[#1]Kef » %1:l8[#1]Kef + ArrangeBy keys=[[#1{id}]] // { arity: 3 } + Project (#1{id}..=#3) // { arity: 3 } + Filter (#0 = false) // { arity: 4 } + Get l8 // { arity: 4 } + ArrangeBy keys=[[#1{id}]] // { arity: 3 } + Project (#1{id}..=#3) // { arity: 3 } + Filter (#0 = true) // { arity: 4 } + Get l8 // { arity: 4 } + Return // { arity: 3 } + Project (#0{id}..=#2{min}) // { arity: 3 } + Join on=(#2{min} = #3{min_min}) type=differential // { arity: 4 } + implementation + %1[#0]UK » %0:l9[#2]K + ArrangeBy keys=[[#2{min}]] // { arity: 3 } + Get l9 // { arity: 3 } + ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#2{min}) // { arity: 1 } + Get l9 // { arity: 3 } Used Indexes: - materialize.public.person_locationcityid (lookup) @@ -4068,18 +4076,18 @@ Explained Query: Finish order_by=[#0{dst} asc nulls_last] limit=20 output=[#0, #1] With Mutually Recursive cte l0 = - Project (#2{min}, #0, #1{dst}) // { arity: 3 } + Project (#2, #0{dst}, #1{min}) // { arity: 3 } Map (10995116285979) // { arity: 3 } Reduce group_by=[#0{dst}] aggregates=[min(#1)] // { arity: 2 } Distinct project=[#0{dst}, #1] // { arity: 2 } Union // { arity: 2 } - Project (#3, #5) // { arity: 2 } + Project (#3{dst}, #5) // { arity: 2 } Map ((#1 + integer_to_bigint(#4{w}))) // { arity: 6 } Join on=(#0 = #2{src}) type=differential // { arity: 5 } implementation %0:l0[#0]UK » %1:pathq20[#0]KA ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#1{min}, #2) // { arity: 2 } + Project (#1{dst}, #2{min}) // { arity: 2 } Get l0 // { arity: 3 } ArrangeBy keys=[[#0{src}]] // { arity: 3 } ReadIndex on=pathq20 pathq20_src=[differential join] // { arity: 3 } @@ -4093,11 +4101,11 @@ Explained Query: implementation %1[#0]UKA » %0:l0[#0]UK ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#1{min}, #2) // { arity: 2 } + Project (#1{dst}, #2{min}) // { arity: 2 } Get l0 // { arity: 3 } ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Distinct project=[#0{personid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{personid}) // { arity: 1 } Filter (#5{name} = "Balkh_Airlines") // { arity: 8 } Join on=(#2{companyid} = #4{id}) type=differential // { arity: 8 } implementation @@ -4107,7 +4115,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 4 } ReadIndex on=company company_id=[differential join] // { arity: 4 } cte l2 = - Project (#1) // { arity: 1 } + Project (#1{min}) // { arity: 1 } Get l1 // { arity: 2 } Return // { arity: 2 } Project (#0{dst}, #1{min}) // { arity: 2 } @@ -4179,7 +4187,7 @@ Explained Query: Reduce group_by=[#0{dst}] aggregates=[min(#1)] // { arity: 2 } Distinct project=[#0{dst}, #1] // { arity: 2 } Union // { arity: 2 } - Project (#3, #5) // { arity: 2 } + Project (#3{dst}, #5) // { arity: 2 } Map ((#1 + integer_to_bigint(#4{w}))) // { arity: 6 } Join on=(#0 = #2{src}) type=differential // { arity: 5 } implementation @@ -4201,7 +4209,7 @@ Explained Query: Get l0 // { arity: 2 } ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Distinct project=[#0{personid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{personid}) // { arity: 1 } Filter (#5{name} = "Balkh_Airlines") // { arity: 8 } Join on=(#2{companyid} = #4{id}) type=differential // { arity: 8 } implementation @@ -4211,7 +4219,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 4 } ReadIndex on=company company_id=[differential join] // { arity: 4 } cte l2 = - Project (#1) // { arity: 1 } + Project (#1{min}) // { arity: 1 } Get l1 // { arity: 2 } Return // { arity: 2 } Project (#0{dst}, #1{min}) // { arity: 2 } @@ -4285,19 +4293,19 @@ Explained Query: Finish order_by=[#0{dst} asc nulls_last] limit=20 output=[#0..=#2] With Mutually Recursive cte l0 = - Project (#3{min}, #0..=#2{min}) // { arity: 4 } + Project (#3, #0{dst}..=#2{min}) // { arity: 4 } Map (10995116285979) // { arity: 4 } Reduce group_by=[#0{dst}, #2{min}] aggregates=[min(#1)] // { arity: 3 } Reduce group_by=[#0{dst}, #2] aggregates=[min(#1)] // { arity: 3 } Distinct project=[#0{dst}..=#2] // { arity: 3 } Union // { arity: 3 } - Project (#4, #6, #7) // { arity: 3 } + Project (#4{dst}, #6, #7) // { arity: 3 } Map ((#1 + integer_to_bigint(#5{w})), (#2 + 1)) // { arity: 8 } Join on=(#0 = #3{src}) type=differential // { arity: 6 } implementation %1:pathq20[#0]KA » %0:l0[#0]K ArrangeBy keys=[[#0{dst}]] // { arity: 3 } - Project (#1{min}..=#3) // { arity: 3 } + Project (#1{dst}..=#3{min}) // { arity: 3 } Get l0 // { arity: 4 } ArrangeBy keys=[[#0{src}]] // { arity: 3 } ReadIndex on=pathq20 pathq20_src=[differential join] // { arity: 3 } @@ -4311,11 +4319,11 @@ Explained Query: implementation %1[#0]UKA » %0:l0[#0]K ArrangeBy keys=[[#0{dst}]] // { arity: 3 } - Project (#1{min}..=#3) // { arity: 3 } + Project (#1{dst}..=#3{min}) // { arity: 3 } Get l0 // { arity: 4 } ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Distinct project=[#0{personid}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{personid}) // { arity: 1 } Filter (#5{name} = "Balkh_Airlines") // { arity: 8 } Join on=(#2{companyid} = #4{id}) type=differential // { arity: 8 } implementation @@ -4325,7 +4333,7 @@ Explained Query: ArrangeBy keys=[[#0{id}]] // { arity: 4 } ReadIndex on=company company_id=[differential join] // { arity: 4 } cte l2 = - Project (#1) // { arity: 1 } + Project (#1{min}) // { arity: 1 } Get l1 // { arity: 3 } Return // { arity: 3 } Project (#0{dst}..=#2{min}) // { arity: 3 } @@ -4434,9 +4442,9 @@ SELECT t, w FROM results WHERE w = (SELECT min(w) FROM results) ORDER BY t LIMIT ---- Explained Query: Finish order_by=[#0{personid} asc nulls_last] limit=20 output=[#0, #1] - With Mutually Recursive + With cte l0 = - Project (#1) // { arity: 1 } + Project (#1{personid}) // { arity: 1 } Filter (#5{name} = "Balkh_Airlines") // { arity: 8 } Join on=(#2{companyid} = #4{id}) type=differential // { arity: 8 } implementation @@ -4451,207 +4459,209 @@ Explained Query: cte l2 = ArrangeBy keys=[[#0{personid}]] // { arity: 1 } Get l0 // { arity: 1 } - cte l3 = - Distinct project=[#0{dst}] // { arity: 1 } - Union // { arity: 1 } - Project (#2) // { arity: 1 } - Join on=(#0 = #1{src}) type=delta // { arity: 4 } + Return // { arity: 2 } + With Mutually Recursive + cte l3 = + Distinct project=[#0{dst}] // { arity: 1 } + Union // { arity: 1 } + Project (#2{dst}) // { arity: 1 } + Join on=(#0 = #1{src}) type=delta // { arity: 4 } + implementation + %0:l3 » %1:l1[#0]KA » %2[×] + %1:l1 » %0:l3[#0]UK » %2[×] + %2 » %0:l3[×] » %1:l1[#0]KA + ArrangeBy keys=[[], [#0{dst}]] // { arity: 1 } + Get l3 // { arity: 1 } + Get l1 // { arity: 3 } + ArrangeBy keys=[[]] // { arity: 0 } + Union // { arity: 0 } + Negate // { arity: 0 } + Distinct project=[] // { arity: 0 } + Project () // { arity: 0 } + Join on=(#0{dst} = #1{personid}) type=differential // { arity: 2 } + implementation + %0:l3[#0]UK » %1:l2[#0]K + ArrangeBy keys=[[#0{dst}]] // { arity: 1 } + Get l3 // { arity: 1 } + Get l2 // { arity: 1 } + Constant // { arity: 0 } + - () + Constant // { arity: 1 } + - (10995116285979) + cte l4 = + TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Filter (#4 = false) // { arity: 6 } + Get l12 // { arity: 6 } + cte l5 = + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2{personid}) // { arity: 3 } + Get l12 // { arity: 6 } + cte l6 = + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Get l5 // { arity: 3 } + cte l7 = + Project (#0..=#2) // { arity: 3 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + implementation + %1[#0..=#2]UKKKA » %0:l6[#0..=#2]UKKK + Get l6 // { arity: 3 } + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Distinct project=[#0..=#2] // { arity: 3 } + Project (#0..=#2) // { arity: 3 } + Get l4 // { arity: 5 } + cte l8 = + TopK group_by=[#0, #1, #2{dst}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } + Union // { arity: 5 } + Project (#3, #4, #1{dst}, #7, #8) // { arity: 5 } + Map ((#6 + integer_to_bigint(#2{w})), false) // { arity: 9 } + Join on=(#0{src} = #5) type=differential // { arity: 7 } + implementation + %0:l1[#0]KA » %1:l4[#2]K + Get l1 // { arity: 3 } + ArrangeBy keys=[[#2]] // { arity: 4 } + Project (#0..=#3) // { arity: 4 } + Get l4 // { arity: 5 } + Project (#0..=#3, #9) // { arity: 5 } + Map ((#4 OR #8)) // { arity: 10 } + Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } + implementation + %0:l12[#0..=#2]KKK » %1[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 5 } + Project (#0..=#4) // { arity: 5 } + Get l12 // { arity: 6 } + ArrangeBy keys=[[#0..=#2]] // { arity: 4 } + Union // { arity: 4 } + Map (true) // { arity: 4 } + Get l7 // { arity: 3 } + Project (#0..=#2, #6) // { arity: 4 } + Map (false) // { arity: 7 } + Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + implementation + %1:l6[#0..=#2]UKKK » %0[#0..=#2]KKK + ArrangeBy keys=[[#0..=#2]] // { arity: 3 } + Union // { arity: 3 } + Negate // { arity: 3 } + Get l7 // { arity: 3 } + Get l5 // { arity: 3 } + Get l6 // { arity: 3 } + cte l9 = + Reduce aggregates=[min((#0 + #1))] // { arity: 1 } + Project (#1, #3) // { arity: 2 } + Join on=(#0{dst} = #2{dst}) type=differential // { arity: 4 } implementation - %0:l3 » %1:l1[#0]KA » %2[×] - %1:l1 » %0:l3[#0]UK » %2[×] - %2 » %0:l3[×] » %1:l1[#0]KA - ArrangeBy keys=[[], [#0{dst}]] // { arity: 1 } - Get l3 // { arity: 1 } - Get l1 // { arity: 3 } - ArrangeBy keys=[[]] // { arity: 0 } + %0:l8[#0]Kef » %1:l8[#0]Kef + ArrangeBy keys=[[#0{dst}]] // { arity: 2 } + Project (#2{dst}, #3) // { arity: 2 } + Filter (#0 = false) // { arity: 5 } + Get l8 // { arity: 5 } + ArrangeBy keys=[[#0{dst}]] // { arity: 2 } + Project (#2{dst}, #3) // { arity: 2 } + Filter (#0 = true) // { arity: 5 } + Get l8 // { arity: 5 } + cte l10 = + Project (#1) // { arity: 1 } + Map ((#0{min} / 2)) // { arity: 2 } + Union // { arity: 1 } + Get l9 // { arity: 1 } + Map (null) // { arity: 1 } Union // { arity: 0 } Negate // { arity: 0 } - Distinct project=[] // { arity: 0 } - Project () // { arity: 0 } - Join on=(#0{dst} = #1{personid}) type=differential // { arity: 2 } - implementation - %0:l3[#0]UK » %1:l2[#0]K - ArrangeBy keys=[[#0{dst}]] // { arity: 1 } - Get l3 // { arity: 1 } - Get l2 // { arity: 1 } + Project () // { arity: 0 } + Get l9 // { arity: 1 } Constant // { arity: 0 } - () - Constant // { arity: 1 } - - (10995116285979) - cte l4 = - TopK order_by=[#3 asc nulls_last] limit=1000 // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Filter (#4 = false) // { arity: 6 } - Get l12 // { arity: 6 } - cte l5 = - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Get l12 // { arity: 6 } - cte l6 = - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Get l5 // { arity: 3 } - cte l7 = - Project (#0..=#2) // { arity: 3 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } - implementation - %1[#0..=#2]UKKKA » %0:l6[#0..=#2]UKKK - Get l6 // { arity: 3 } - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Distinct project=[#0..=#2] // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Get l4 // { arity: 5 } - cte l8 = - TopK group_by=[#0, #1, #2{dst}] order_by=[#3 asc nulls_last, #4 desc nulls_first] limit=1 // { arity: 5 } - Union // { arity: 5 } - Project (#3, #4, #1, #7, #8) // { arity: 5 } - Map ((#6 + integer_to_bigint(#2{w})), false) // { arity: 9 } - Join on=(#0{src} = #5) type=differential // { arity: 7 } - implementation - %0:l1[#0]KA » %1:l4[#2]K - Get l1 // { arity: 3 } - ArrangeBy keys=[[#2]] // { arity: 4 } - Project (#0..=#3) // { arity: 4 } - Get l4 // { arity: 5 } - Project (#0..=#3, #9) // { arity: 5 } - Map ((#4 OR #8)) // { arity: 10 } - Join on=(#0 = #5 AND #1 = #6 AND #2 = #7) type=differential // { arity: 9 } - implementation - %0:l12[#0..=#2]KKK » %1[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 5 } - Project (#0..=#4) // { arity: 5 } - Get l12 // { arity: 6 } - ArrangeBy keys=[[#0..=#2]] // { arity: 4 } - Union // { arity: 4 } - Map (true) // { arity: 4 } - Get l7 // { arity: 3 } - Project (#0..=#2, #6) // { arity: 4 } - Map (false) // { arity: 7 } - Join on=(#0 = #3 AND #1 = #4 AND #2 = #5) type=differential // { arity: 6 } + cte l11 = + Distinct project=[] // { arity: 0 } + Project () // { arity: 0 } + Join on=(#0{dst} = #1{personid}) type=differential // { arity: 2 } + implementation + %0:l3[#0]UK » %1:l2[#0]K + ArrangeBy keys=[[#0{dst}]] // { arity: 1 } + Get l3 // { arity: 1 } + Get l2 // { arity: 1 } + cte l12 = + Distinct project=[#0..=#5] // { arity: 6 } + Union // { arity: 6 } + Project (#0..=#2{personid}, #4, #3, #5) // { arity: 6 } + Map (false, 0, 0) // { arity: 6 } + Distinct project=[#0..=#2{personid}] // { arity: 3 } + Union // { arity: 3 } + Project (#1, #0, #0) // { arity: 3 } + Map (10995116285979, false) // { arity: 2 } + Get l11 // { arity: 0 } + Project (#1, #0{personid}, #0{personid}) // { arity: 3 } + Map (true) // { arity: 2 } + CrossJoin type=differential // { arity: 1 } implementation - %1:l6[#0..=#2]UKKK » %0[#0..=#2]KKK - ArrangeBy keys=[[#0..=#2]] // { arity: 3 } - Union // { arity: 3 } - Negate // { arity: 3 } - Get l7 // { arity: 3 } - Get l5 // { arity: 3 } - Get l6 // { arity: 3 } - cte l9 = - Reduce aggregates=[min((#0 + #1))] // { arity: 1 } - Project (#1, #3) // { arity: 2 } - Join on=(#0{dst} = #2{dst}) type=differential // { arity: 4 } - implementation - %0:l8[#0]Kef » %1:l8[#0]Kef - ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = false) // { arity: 5 } - Get l8 // { arity: 5 } - ArrangeBy keys=[[#0{dst}]] // { arity: 2 } - Project (#2, #3) // { arity: 2 } - Filter (#0 = true) // { arity: 5 } - Get l8 // { arity: 5 } - cte l10 = - Project (#1) // { arity: 1 } - Map ((#0{min} / 2)) // { arity: 2 } - Union // { arity: 1 } - Get l9 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l9 // { arity: 1 } - Constant // { arity: 0 } - - () - cte l11 = - Distinct project=[] // { arity: 0 } - Project () // { arity: 0 } - Join on=(#0{dst} = #1{personid}) type=differential // { arity: 2 } - implementation - %0:l3[#0]UK » %1:l2[#0]K - ArrangeBy keys=[[#0{dst}]] // { arity: 1 } - Get l3 // { arity: 1 } - Get l2 // { arity: 1 } - cte l12 = - Distinct project=[#0..=#5] // { arity: 6 } - Union // { arity: 6 } - Project (#0..=#2{personid}, #4, #3, #5) // { arity: 6 } - Map (false, 0, 0) // { arity: 6 } - Distinct project=[#0..=#2{personid}] // { arity: 3 } - Union // { arity: 3 } - Project (#1, #0, #0) // { arity: 3 } - Map (10995116285979, false) // { arity: 2 } - Get l11 // { arity: 0 } - Project (#1{personid}, #0, #0) // { arity: 3 } - Map (true) // { arity: 2 } - CrossJoin type=differential // { arity: 1 } - implementation - %1:l11[×]U » %0:l0[×] - ArrangeBy keys=[[]] // { arity: 1 } - Get l0 // { arity: 1 } - ArrangeBy keys=[[]] // { arity: 0 } - Get l11 // { arity: 0 } - Project (#0..=#3, #7, #8) // { arity: 6 } - Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } - CrossJoin type=delta // { arity: 7 } - implementation - %0:l8 » %1[×]U » %2[×]U - %1 » %2[×]U » %0:l8[×] - %2 » %1[×]U » %0:l8[×] - ArrangeBy keys=[[]] // { arity: 5 } - Get l8 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - TopK limit=1 // { arity: 1 } - Project (#4) // { arity: 1 } - Get l4 // { arity: 5 } - ArrangeBy keys=[[]] // { arity: 1 } - Union // { arity: 1 } - Get l10 // { arity: 1 } - Map (null) // { arity: 1 } - Union // { arity: 0 } - Negate // { arity: 0 } - Project () // { arity: 0 } - Get l10 // { arity: 1 } - Constant // { arity: 0 } - - () - Return // { arity: 2 } - With - cte l13 = - Project (#0..=#3) // { arity: 4 } - Join on=(#4 = #5{max}) type=differential // { arity: 6 } - implementation - %1[#0]UK » %0:l12[#4]K - ArrangeBy keys=[[#4]] // { arity: 5 } - Project (#0..=#3, #5) // { arity: 5 } - Get l12 // { arity: 6 } - ArrangeBy keys=[[#0{max}]] // { arity: 1 } - Reduce aggregates=[max(#0)] // { arity: 1 } - Project (#5) // { arity: 1 } - Get l12 // { arity: 6 } - cte l14 = - Project (#1{min}, #2) // { arity: 2 } - Reduce group_by=[#0{personid}, #2{personid}] aggregates=[min((#1 + #3))] // { arity: 3 } - Project (#0{personid}, #2{personid}, #3, #5) // { arity: 4 } - Join on=(#1{personid} = #4{personid}) type=differential // { arity: 6 } - implementation - %0:l13[#1]Kef » %1:l13[#1]Kef - ArrangeBy keys=[[#1{personid}]] // { arity: 3 } - Project (#1{personid}..=#3) // { arity: 3 } - Filter (#0 = false) // { arity: 4 } - Get l13 // { arity: 4 } - ArrangeBy keys=[[#1{personid}]] // { arity: 3 } - Project (#1{personid}..=#3) // { arity: 3 } - Filter (#0 = true) // { arity: 4 } - Get l13 // { arity: 4 } + %1:l11[×]U » %0:l0[×] + ArrangeBy keys=[[]] // { arity: 1 } + Get l0 // { arity: 1 } + ArrangeBy keys=[[]] // { arity: 0 } + Get l11 // { arity: 0 } + Project (#0..=#3, #7, #8) // { arity: 6 } + Map ((#4 OR coalesce((#3 > #6), false)), (#5 + 1)) // { arity: 9 } + CrossJoin type=delta // { arity: 7 } + implementation + %0:l8 » %1[×]U » %2[×]U + %1 » %2[×]U » %0:l8[×] + %2 » %1[×]U » %0:l8[×] + ArrangeBy keys=[[]] // { arity: 5 } + Get l8 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + TopK limit=1 // { arity: 1 } + Project (#4) // { arity: 1 } + Get l4 // { arity: 5 } + ArrangeBy keys=[[]] // { arity: 1 } + Union // { arity: 1 } + Get l10 // { arity: 1 } + Map (null) // { arity: 1 } + Union // { arity: 0 } + Negate // { arity: 0 } + Project () // { arity: 0 } + Get l10 // { arity: 1 } + Constant // { arity: 0 } + - () Return // { arity: 2 } - Project (#0{personid}, #1{min}) // { arity: 2 } - Join on=(#1{min} = #2{min_min}) type=differential // { arity: 3 } - implementation - %1[#0]UK » %0:l14[#1]K - ArrangeBy keys=[[#1{min}]] // { arity: 2 } - Get l14 // { arity: 2 } - ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } - Reduce aggregates=[min(#0{min})] // { arity: 1 } - Project (#1) // { arity: 1 } - Get l14 // { arity: 2 } + With + cte l13 = + Project (#0..=#3) // { arity: 4 } + Join on=(#4 = #5{max}) type=differential // { arity: 6 } + implementation + %1[#0]UK » %0:l12[#4]K + ArrangeBy keys=[[#4]] // { arity: 5 } + Project (#0..=#3, #5) // { arity: 5 } + Get l12 // { arity: 6 } + ArrangeBy keys=[[#0{max}]] // { arity: 1 } + Reduce aggregates=[max(#0)] // { arity: 1 } + Project (#5) // { arity: 1 } + Get l12 // { arity: 6 } + cte l14 = + Project (#1{personid}, #2{min}) // { arity: 2 } + Reduce group_by=[#0{personid}, #2{personid}] aggregates=[min((#1 + #3))] // { arity: 3 } + Project (#0{personid}, #2, #3{personid}, #5) // { arity: 4 } + Join on=(#1{personid} = #4{personid}) type=differential // { arity: 6 } + implementation + %0:l13[#1]Kef » %1:l13[#1]Kef + ArrangeBy keys=[[#1{personid}]] // { arity: 3 } + Project (#1{personid}..=#3) // { arity: 3 } + Filter (#0 = false) // { arity: 4 } + Get l13 // { arity: 4 } + ArrangeBy keys=[[#1{personid}]] // { arity: 3 } + Project (#1{personid}..=#3) // { arity: 3 } + Filter (#0 = true) // { arity: 4 } + Get l13 // { arity: 4 } + Return // { arity: 2 } + Project (#0{personid}, #1{min}) // { arity: 2 } + Join on=(#1{min} = #2{min_min}) type=differential // { arity: 3 } + implementation + %1[#0]UK » %0:l14[#1]K + ArrangeBy keys=[[#1{min}]] // { arity: 2 } + Get l14 // { arity: 2 } + ArrangeBy keys=[[#0{min_min}]] // { arity: 1 } + Reduce aggregates=[min(#0{min})] // { arity: 1 } + Project (#1{min}) // { arity: 1 } + Get l14 // { arity: 2 } Used Indexes: - materialize.public.person_workat_company_companyid (differential join) diff --git a/test/sqllogictest/limit_expr.slt b/test/sqllogictest/limit_expr.slt index 20c9608f527de..ee37581d4a8a2 100644 --- a/test/sqllogictest/limit_expr.slt +++ b/test/sqllogictest/limit_expr.slt @@ -113,17 +113,17 @@ ORDER BY ---- Explained Query: Finish order_by=[#1{first_name} asc nulls_last, #4{preference} asc nulls_last] output=[#1, #3] - Project (#0{id}..=#2{allowance}, #5, #6) // { arity: 5 } + Project (#0{id}..=#2{allowance}, #5{fruit}, #6{preference}) // { arity: 5 } Join on=(#0{id} = #3{id} AND #2{allowance} = #4{allowance}) type=differential // { arity: 7 } ArrangeBy keys=[[#0{id}, #2{allowance}]] // { arity: 3 } ReadStorage materialize.public.people // { arity: 3 } ArrangeBy keys=[[#0{id}, #1{allowance}]] // { arity: 4 } TopK group_by=[#0{id}, #1{allowance}] order_by=[#3{preference} asc nulls_last] limit=integer_to_bigint(#1{allowance}) offset=1 // { arity: 4 } - Project (#0{id}, #1{allowance}, #3{preference}, #4) // { arity: 4 } + Project (#0{id}, #1{allowance}, #3{fruit}, #4{preference}) // { arity: 4 } Join on=(#0{id} = #2{person_id}) type=differential // { arity: 5 } ArrangeBy keys=[[#0{id}]] // { arity: 2 } Distinct project=[#0{id}, #1{allowance}] // { arity: 2 } - Project (#0{id}, #2) // { arity: 2 } + Project (#0{id}, #2{allowance}) // { arity: 2 } Filter (#0{id}) IS NOT NULL // { arity: 3 } ReadStorage materialize.public.people // { arity: 3 } ArrangeBy keys=[[#0{person_id}]] // { arity: 3 } @@ -168,10 +168,10 @@ ORDER BY ---- Explained Query: Finish order_by=[#1 asc nulls_last, #4{preference} asc nulls_last] output=[#1, #3] - Project (#2..=#4{preference}, #0, #1) // { arity: 5 } + Project (#2..=#4, #0{fruit}, #1{preference}) // { arity: 5 } Map (1, "frank", -4) // { arity: 5 } TopK order_by=[#1{preference} asc nulls_last] limit=-4 offset=1 // { arity: 2 } - Project (#1{preference}, #2) // { arity: 2 } + Project (#1{fruit}, #2{preference}) // { arity: 2 } Filter (#0{person_id} = 1) // { arity: 3 } ReadStorage materialize.public.preferred_fruits // { arity: 3 } @@ -208,7 +208,7 @@ LIMIT 3; Explained Query: Finish order_by=[#0{state} asc nulls_last, #1{name} asc nulls_last] limit=3 output=[#0, #1] TopK group_by=[#0{state}] limit=integer_to_bigint((ascii(substr(#0{state}, 1, 1)) - 64)) // { arity: 2 } - Project (#1{name}, #0{state}) // { arity: 2 } + Project (#1{state}, #0{name}) // { arity: 2 } ReadStorage materialize.public.cities // { arity: 3 } Source materialize.public.cities @@ -259,10 +259,10 @@ Explained Query: With cte l0 = Distinct project=[#0{state}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{state}) // { arity: 1 } ReadStorage materialize.public.cities // { arity: 3 } cte l1 = - Project (#0{state}, #2) // { arity: 2 } + Project (#0{state}, #2{l}) // { arity: 2 } Join on=(#1{sl} = substr(#0{state}, 1, 1)) type=differential // { arity: 3 } ArrangeBy keys=[[substr(#0{state}, 1, 1)]] // { arity: 1 } Get l0 // { arity: 1 } @@ -279,9 +279,9 @@ Explained Query: Project (#0{state}) // { arity: 1 } Get l1 // { arity: 2 } Return // { arity: 2 } - Project (#1{name}, #0{state}) // { arity: 2 } + Project (#1{state}, #0{name}) // { arity: 2 } TopK group_by=[#1{state}, #2{l}] order_by=[#0{name} asc nulls_last] limit=integer_to_bigint(#2{l}) // { arity: 3 } - Project (#0{name}, #1{state}, #3) // { arity: 3 } + Project (#0{name}, #1{state}, #3{l}) // { arity: 3 } Join on=(#1{state} = #2{state}) type=differential // { arity: 4 } ArrangeBy keys=[[#1{state}]] // { arity: 2 } Project (#0{name}, #1{state}) // { arity: 2 } @@ -330,7 +330,7 @@ Explained Query: CrossJoin type=differential // { arity: 2 } ArrangeBy keys=[[]] // { arity: 1 } Distinct project=[#0{state}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{state}) // { arity: 1 } ReadStorage materialize.public.cities // { arity: 3 } ArrangeBy keys=[[]] // { arity: 1 } TopK limit=1 // { arity: 1 } @@ -386,7 +386,7 @@ Explained Query: TopK group_by=[#1{state}] limit=integer_to_bigint((char_length(#1{state}) % 3)) // { arity: 2 } Get l0 // { arity: 2 } Return // { arity: 3 } - Project (#1{name}, #0{state}, #3) // { arity: 3 } + Project (#1{state}, #0{name}, #3{name}) // { arity: 3 } Join on=(#1{state} = #2{state}) type=differential // { arity: 4 } ArrangeBy keys=[[#1{state}]] // { arity: 2 } Get l1 // { arity: 2 } @@ -396,7 +396,7 @@ Explained Query: Join on=(#0{state} = #2{state}) type=differential // { arity: 3 } ArrangeBy keys=[[#0{state}]] // { arity: 1 } Distinct project=[#0{state}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{state}) // { arity: 1 } Get l1 // { arity: 2 } ArrangeBy keys=[[#1{state}]] // { arity: 2 } Get l0 // { arity: 2 } @@ -504,7 +504,7 @@ Explained Query: CrossJoin type=differential // { arity: 2 } ArrangeBy keys=[[]] // { arity: 1 } Distinct project=[#0{state}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{state}) // { arity: 1 } ReadStorage materialize.public.cities // { arity: 3 } ArrangeBy keys=[[]] // { arity: 1 } Project (#0{name}) // { arity: 1 } diff --git a/test/sqllogictest/list.slt b/test/sqllogictest/list.slt index 873b5e1db8ded..b9634ac90b2ff 100644 --- a/test/sqllogictest/list.slt +++ b/test/sqllogictest/list.slt @@ -3027,3 +3027,13 @@ query T SELECT LIST[1,3,7,NULL] @> LIST[1,3,7,NULL] AS contains; ---- false + +# Make sure we can index into a CAST-ed list. + +statement ok +CREATE TABLE jsons (payload jsonb, random_index int, random_id uuid); + +statement ok +CREATE MATERIALIZED VIEW json_mv AS ( + SELECT * FROM jsons WHERE random_id = CAST(payload->>'my_field' AS uuid list)[random_index] +) diff --git a/test/sqllogictest/mz_catalog_server_index_accounting.slt b/test/sqllogictest/mz_catalog_server_index_accounting.slt index 4bb0c083d3872..5202fc259d718 100644 --- a/test/sqllogictest/mz_catalog_server_index_accounting.slt +++ b/test/sqllogictest/mz_catalog_server_index_accounting.slt @@ -37,20 +37,20 @@ mz_arrangement_heap_capacity_raw_s2_primary_idx CREATE␠INDEX␠"mz_arrangemen mz_arrangement_heap_size_raw_s2_primary_idx CREATE␠INDEX␠"mz_arrangement_heap_size_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_arrangement_heap_size_raw"␠("operator_id",␠"worker_id") mz_arrangement_records_raw_s2_primary_idx CREATE␠INDEX␠"mz_arrangement_records_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_arrangement_records_raw"␠("operator_id",␠"worker_id") mz_arrangement_sharing_raw_s2_primary_idx CREATE␠INDEX␠"mz_arrangement_sharing_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_arrangement_sharing_raw"␠("operator_id",␠"worker_id") -mz_cluster_deployment_lineage_ind CREATE␠INDEX␠"mz_cluster_deployment_lineage_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s717␠AS␠"mz_internal"."mz_cluster_deployment_lineage"]␠("cluster_id") -mz_cluster_replica_history_ind CREATE␠INDEX␠"mz_cluster_replica_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s579␠AS␠"mz_internal"."mz_cluster_replica_history"]␠("dropped_at") -mz_cluster_replica_metrics_history_ind CREATE␠INDEX␠"mz_cluster_replica_metrics_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s492␠AS␠"mz_internal"."mz_cluster_replica_metrics_history"]␠("replica_id") -mz_cluster_replica_metrics_ind CREATE␠INDEX␠"mz_cluster_replica_metrics_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s491␠AS␠"mz_internal"."mz_cluster_replica_metrics"]␠("replica_id") -mz_cluster_replica_name_history_ind CREATE␠INDEX␠"mz_cluster_replica_name_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s580␠AS␠"mz_internal"."mz_cluster_replica_name_history"]␠("id") -mz_cluster_replica_sizes_ind CREATE␠INDEX␠"mz_cluster_replica_sizes_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s493␠AS␠"mz_catalog"."mz_cluster_replica_sizes"]␠("size") -mz_cluster_replica_status_history_ind CREATE␠INDEX␠"mz_cluster_replica_status_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s495␠AS␠"mz_internal"."mz_cluster_replica_status_history"]␠("replica_id") -mz_cluster_replica_statuses_ind CREATE␠INDEX␠"mz_cluster_replica_statuses_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s494␠AS␠"mz_internal"."mz_cluster_replica_statuses"]␠("replica_id") -mz_cluster_replicas_ind CREATE␠INDEX␠"mz_cluster_replicas_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s490␠AS␠"mz_catalog"."mz_cluster_replicas"]␠("id") -mz_clusters_ind CREATE␠INDEX␠"mz_clusters_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s484␠AS␠"mz_catalog"."mz_clusters"]␠("id") -mz_columns_ind CREATE␠INDEX␠"mz_columns_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s457␠AS␠"mz_catalog"."mz_columns"]␠("name") -mz_comments_ind CREATE␠INDEX␠"mz_comments_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s507␠AS␠"mz_internal"."mz_comments"]␠("id") +mz_cluster_deployment_lineage_ind CREATE␠INDEX␠"mz_cluster_deployment_lineage_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s719␠AS␠"mz_internal"."mz_cluster_deployment_lineage"]␠("cluster_id") +mz_cluster_replica_history_ind CREATE␠INDEX␠"mz_cluster_replica_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s581␠AS␠"mz_internal"."mz_cluster_replica_history"]␠("dropped_at") +mz_cluster_replica_metrics_history_ind CREATE␠INDEX␠"mz_cluster_replica_metrics_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s494␠AS␠"mz_internal"."mz_cluster_replica_metrics_history"]␠("replica_id") +mz_cluster_replica_metrics_ind CREATE␠INDEX␠"mz_cluster_replica_metrics_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s493␠AS␠"mz_internal"."mz_cluster_replica_metrics"]␠("replica_id") +mz_cluster_replica_name_history_ind CREATE␠INDEX␠"mz_cluster_replica_name_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s582␠AS␠"mz_internal"."mz_cluster_replica_name_history"]␠("id") +mz_cluster_replica_sizes_ind CREATE␠INDEX␠"mz_cluster_replica_sizes_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s495␠AS␠"mz_catalog"."mz_cluster_replica_sizes"]␠("size") +mz_cluster_replica_status_history_ind CREATE␠INDEX␠"mz_cluster_replica_status_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s497␠AS␠"mz_internal"."mz_cluster_replica_status_history"]␠("replica_id") +mz_cluster_replica_statuses_ind CREATE␠INDEX␠"mz_cluster_replica_statuses_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s496␠AS␠"mz_internal"."mz_cluster_replica_statuses"]␠("replica_id") +mz_cluster_replicas_ind CREATE␠INDEX␠"mz_cluster_replicas_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s492␠AS␠"mz_catalog"."mz_cluster_replicas"]␠("id") +mz_clusters_ind CREATE␠INDEX␠"mz_clusters_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s486␠AS␠"mz_catalog"."mz_clusters"]␠("id") +mz_columns_ind CREATE␠INDEX␠"mz_columns_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s459␠AS␠"mz_catalog"."mz_columns"]␠("name") +mz_comments_ind CREATE␠INDEX␠"mz_comments_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s509␠AS␠"mz_internal"."mz_comments"]␠("id") mz_compute_dataflow_global_ids_per_worker_s2_primary_idx CREATE␠INDEX␠"mz_compute_dataflow_global_ids_per_worker_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_compute_dataflow_global_ids_per_worker"␠("id",␠"worker_id") -mz_compute_dependencies_ind CREATE␠INDEX␠"mz_compute_dependencies_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s700␠AS␠"mz_internal"."mz_compute_dependencies"]␠("dependency_id") +mz_compute_dependencies_ind CREATE␠INDEX␠"mz_compute_dependencies_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s702␠AS␠"mz_internal"."mz_compute_dependencies"]␠("dependency_id") mz_compute_error_counts_raw_s2_primary_idx CREATE␠INDEX␠"mz_compute_error_counts_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_compute_error_counts_raw"␠("export_id",␠"worker_id") mz_compute_exports_per_worker_s2_primary_idx CREATE␠INDEX␠"mz_compute_exports_per_worker_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_compute_exports_per_worker"␠("export_id",␠"worker_id") mz_compute_frontiers_per_worker_s2_primary_idx CREATE␠INDEX␠"mz_compute_frontiers_per_worker_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_compute_frontiers_per_worker"␠("export_id",␠"worker_id") @@ -58,74 +58,74 @@ mz_compute_hydration_times_per_worker_s2_primary_idx CREATE␠INDEX␠"mz_compu mz_compute_import_frontiers_per_worker_s2_primary_idx CREATE␠INDEX␠"mz_compute_import_frontiers_per_worker_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_compute_import_frontiers_per_worker"␠("export_id",␠"import_id",␠"worker_id") mz_compute_lir_mapping_per_worker_s2_primary_idx CREATE␠INDEX␠"mz_compute_lir_mapping_per_worker_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_compute_lir_mapping_per_worker"␠("global_id",␠"lir_id",␠"worker_id") mz_compute_operator_durations_histogram_raw_s2_primary_idx CREATE␠INDEX␠"mz_compute_operator_durations_histogram_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_compute_operator_durations_histogram_raw"␠("id",␠"worker_id",␠"duration_ns") -mz_connections_ind CREATE␠INDEX␠"mz_connections_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s488␠AS␠"mz_catalog"."mz_connections"]␠("schema_id") -mz_console_cluster_utilization_overview_ind CREATE␠INDEX␠"mz_console_cluster_utilization_overview_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s704␠AS␠"mz_internal"."mz_console_cluster_utilization_overview"]␠("cluster_id") -mz_continual_tasks_ind CREATE␠INDEX␠"mz_continual_tasks_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s510␠AS␠"mz_internal"."mz_continual_tasks"]␠("id") -mz_databases_ind CREATE␠INDEX␠"mz_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s455␠AS␠"mz_catalog"."mz_databases"]␠("name") +mz_connections_ind CREATE␠INDEX␠"mz_connections_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s490␠AS␠"mz_catalog"."mz_connections"]␠("schema_id") +mz_console_cluster_utilization_overview_ind CREATE␠INDEX␠"mz_console_cluster_utilization_overview_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s706␠AS␠"mz_internal"."mz_console_cluster_utilization_overview"]␠("cluster_id") +mz_continual_tasks_ind CREATE␠INDEX␠"mz_continual_tasks_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s512␠AS␠"mz_internal"."mz_continual_tasks"]␠("id") +mz_databases_ind CREATE␠INDEX␠"mz_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s457␠AS␠"mz_catalog"."mz_databases"]␠("name") mz_dataflow_addresses_per_worker_s2_primary_idx CREATE␠INDEX␠"mz_dataflow_addresses_per_worker_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_dataflow_addresses_per_worker"␠("id",␠"worker_id") mz_dataflow_channels_per_worker_s2_primary_idx CREATE␠INDEX␠"mz_dataflow_channels_per_worker_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_dataflow_channels_per_worker"␠("id",␠"worker_id") mz_dataflow_operator_reachability_raw_s2_primary_idx CREATE␠INDEX␠"mz_dataflow_operator_reachability_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_dataflow_operator_reachability_raw"␠("address",␠"port",␠"worker_id",␠"update_type",␠"time") mz_dataflow_operators_per_worker_s2_primary_idx CREATE␠INDEX␠"mz_dataflow_operators_per_worker_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_dataflow_operators_per_worker"␠("id",␠"worker_id") mz_dataflow_shutdown_durations_histogram_raw_s2_primary_idx CREATE␠INDEX␠"mz_dataflow_shutdown_durations_histogram_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_dataflow_shutdown_durations_histogram_raw"␠("worker_id",␠"duration_ns") -mz_frontiers_ind CREATE␠INDEX␠"mz_frontiers_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s694␠AS␠"mz_internal"."mz_frontiers"]␠("object_id") -mz_indexes_ind CREATE␠INDEX␠"mz_indexes_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s458␠AS␠"mz_catalog"."mz_indexes"]␠("id") -mz_kafka_sources_ind CREATE␠INDEX␠"mz_kafka_sources_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s453␠AS␠"mz_catalog"."mz_kafka_sources"]␠("id") -mz_materialized_views_ind CREATE␠INDEX␠"mz_materialized_views_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s469␠AS␠"mz_catalog"."mz_materialized_views"]␠("id") +mz_frontiers_ind CREATE␠INDEX␠"mz_frontiers_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s696␠AS␠"mz_internal"."mz_frontiers"]␠("object_id") +mz_indexes_ind CREATE␠INDEX␠"mz_indexes_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s460␠AS␠"mz_catalog"."mz_indexes"]␠("id") +mz_kafka_sources_ind CREATE␠INDEX␠"mz_kafka_sources_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s455␠AS␠"mz_catalog"."mz_kafka_sources"]␠("id") +mz_materialized_views_ind CREATE␠INDEX␠"mz_materialized_views_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s471␠AS␠"mz_catalog"."mz_materialized_views"]␠("id") mz_message_batch_counts_received_raw_s2_primary_idx CREATE␠INDEX␠"mz_message_batch_counts_received_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_message_batch_counts_received_raw"␠("channel_id",␠"from_worker_id",␠"to_worker_id") mz_message_batch_counts_sent_raw_s2_primary_idx CREATE␠INDEX␠"mz_message_batch_counts_sent_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_message_batch_counts_sent_raw"␠("channel_id",␠"from_worker_id",␠"to_worker_id") mz_message_counts_received_raw_s2_primary_idx CREATE␠INDEX␠"mz_message_counts_received_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_message_counts_received_raw"␠("channel_id",␠"from_worker_id",␠"to_worker_id") mz_message_counts_sent_raw_s2_primary_idx CREATE␠INDEX␠"mz_message_counts_sent_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_message_counts_sent_raw"␠("channel_id",␠"from_worker_id",␠"to_worker_id") -mz_notices_ind CREATE␠INDEX␠"mz_notices_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s782␠AS␠"mz_internal"."mz_notices"]␠("id") -mz_object_dependencies_ind CREATE␠INDEX␠"mz_object_dependencies_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s454␠AS␠"mz_internal"."mz_object_dependencies"]␠("object_id") -mz_object_history_ind CREATE␠INDEX␠"mz_object_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s518␠AS␠"mz_internal"."mz_object_history"]␠("id") -mz_object_lifetimes_ind CREATE␠INDEX␠"mz_object_lifetimes_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s519␠AS␠"mz_internal"."mz_object_lifetimes"]␠("id") -mz_object_transitive_dependencies_ind CREATE␠INDEX␠"mz_object_transitive_dependencies_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s532␠AS␠"mz_internal"."mz_object_transitive_dependencies"]␠("object_id") -mz_objects_ind CREATE␠INDEX␠"mz_objects_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s515␠AS␠"mz_catalog"."mz_objects"]␠("schema_id") +mz_notices_ind CREATE␠INDEX␠"mz_notices_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s784␠AS␠"mz_internal"."mz_notices"]␠("id") +mz_object_dependencies_ind CREATE␠INDEX␠"mz_object_dependencies_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s456␠AS␠"mz_internal"."mz_object_dependencies"]␠("object_id") +mz_object_history_ind CREATE␠INDEX␠"mz_object_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s520␠AS␠"mz_internal"."mz_object_history"]␠("id") +mz_object_lifetimes_ind CREATE␠INDEX␠"mz_object_lifetimes_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s521␠AS␠"mz_internal"."mz_object_lifetimes"]␠("id") +mz_object_transitive_dependencies_ind CREATE␠INDEX␠"mz_object_transitive_dependencies_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s534␠AS␠"mz_internal"."mz_object_transitive_dependencies"]␠("object_id") +mz_objects_ind CREATE␠INDEX␠"mz_objects_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s517␠AS␠"mz_catalog"."mz_objects"]␠("schema_id") mz_peek_durations_histogram_raw_s2_primary_idx CREATE␠INDEX␠"mz_peek_durations_histogram_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_peek_durations_histogram_raw"␠("worker_id",␠"type",␠"duration_ns") -mz_recent_activity_log_thinned_ind CREATE␠INDEX␠"mz_recent_activity_log_thinned_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s678␠AS␠"mz_internal"."mz_recent_activity_log_thinned"]␠("sql_hash") -mz_recent_sql_text_ind CREATE␠INDEX␠"mz_recent_sql_text_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s674␠AS␠"mz_internal"."mz_recent_sql_text"]␠("sql_hash") -mz_recent_storage_usage_ind CREATE␠INDEX␠"mz_recent_storage_usage_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s774␠AS␠"mz_catalog"."mz_recent_storage_usage"]␠("object_id") -mz_roles_ind CREATE␠INDEX␠"mz_roles_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s477␠AS␠"mz_catalog"."mz_roles"]␠("id") +mz_recent_activity_log_thinned_ind CREATE␠INDEX␠"mz_recent_activity_log_thinned_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s680␠AS␠"mz_internal"."mz_recent_activity_log_thinned"]␠("sql_hash") +mz_recent_sql_text_ind CREATE␠INDEX␠"mz_recent_sql_text_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s676␠AS␠"mz_internal"."mz_recent_sql_text"]␠("sql_hash") +mz_recent_storage_usage_ind CREATE␠INDEX␠"mz_recent_storage_usage_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s776␠AS␠"mz_catalog"."mz_recent_storage_usage"]␠("object_id") +mz_roles_ind CREATE␠INDEX␠"mz_roles_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s479␠AS␠"mz_catalog"."mz_roles"]␠("id") mz_scheduling_elapsed_raw_s2_primary_idx CREATE␠INDEX␠"mz_scheduling_elapsed_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_scheduling_elapsed_raw"␠("id",␠"worker_id") mz_scheduling_parks_histogram_raw_s2_primary_idx CREATE␠INDEX␠"mz_scheduling_parks_histogram_raw_s2_primary_idx"␠IN␠CLUSTER␠[s2]␠ON␠"mz_introspection"."mz_scheduling_parks_histogram_raw"␠("worker_id",␠"slept_for_ns",␠"requested_ns") -mz_schemas_ind CREATE␠INDEX␠"mz_schemas_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s456␠AS␠"mz_catalog"."mz_schemas"]␠("database_id") -mz_secrets_ind CREATE␠INDEX␠"mz_secrets_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s487␠AS␠"mz_catalog"."mz_secrets"]␠("name") -mz_show_all_objects_ind CREATE␠INDEX␠"mz_show_all_objects_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s563␠AS␠"mz_internal"."mz_show_all_objects"]␠("schema_id") -mz_show_cluster_replicas_ind CREATE␠INDEX␠"mz_show_cluster_replicas_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s715␠AS␠"mz_internal"."mz_show_cluster_replicas"]␠("cluster") -mz_show_clusters_ind CREATE␠INDEX␠"mz_show_clusters_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s565␠AS␠"mz_internal"."mz_show_clusters"]␠("name") -mz_show_columns_ind CREATE␠INDEX␠"mz_show_columns_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s564␠AS␠"mz_internal"."mz_show_columns"]␠("id") -mz_show_connections_ind CREATE␠INDEX␠"mz_show_connections_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s573␠AS␠"mz_internal"."mz_show_connections"]␠("schema_id") -mz_show_databases_ind CREATE␠INDEX␠"mz_show_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s567␠AS␠"mz_internal"."mz_show_databases"]␠("name") -mz_show_indexes_ind CREATE␠INDEX␠"mz_show_indexes_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s577␠AS␠"mz_internal"."mz_show_indexes"]␠("schema_id") -mz_show_materialized_views_ind CREATE␠INDEX␠"mz_show_materialized_views_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s576␠AS␠"mz_internal"."mz_show_materialized_views"]␠("schema_id") -mz_show_roles_ind CREATE␠INDEX␠"mz_show_roles_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s572␠AS␠"mz_internal"."mz_show_roles"]␠("name") -mz_show_schemas_ind CREATE␠INDEX␠"mz_show_schemas_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s568␠AS␠"mz_internal"."mz_show_schemas"]␠("database_id") -mz_show_secrets_ind CREATE␠INDEX␠"mz_show_secrets_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s566␠AS␠"mz_internal"."mz_show_secrets"]␠("schema_id") -mz_show_sinks_ind CREATE␠INDEX␠"mz_show_sinks_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s575␠AS␠"mz_internal"."mz_show_sinks"]␠("schema_id") -mz_show_sources_ind CREATE␠INDEX␠"mz_show_sources_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s574␠AS␠"mz_internal"."mz_show_sources"]␠("schema_id") -mz_show_tables_ind CREATE␠INDEX␠"mz_show_tables_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s569␠AS␠"mz_internal"."mz_show_tables"]␠("schema_id") -mz_show_types_ind CREATE␠INDEX␠"mz_show_types_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s571␠AS␠"mz_internal"."mz_show_types"]␠("schema_id") -mz_show_views_ind CREATE␠INDEX␠"mz_show_views_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s570␠AS␠"mz_internal"."mz_show_views"]␠("schema_id") -mz_sink_statistics_ind CREATE␠INDEX␠"mz_sink_statistics_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s691␠AS␠"mz_internal"."mz_sink_statistics"]␠("id") -mz_sink_status_history_ind CREATE␠INDEX␠"mz_sink_status_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s663␠AS␠"mz_internal"."mz_sink_status_history"]␠("sink_id") -mz_sink_statuses_ind CREATE␠INDEX␠"mz_sink_statuses_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s664␠AS␠"mz_internal"."mz_sink_statuses"]␠("id") -mz_sinks_ind CREATE␠INDEX␠"mz_sinks_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s467␠AS␠"mz_catalog"."mz_sinks"]␠("id") -mz_source_statistics_ind CREATE␠INDEX␠"mz_source_statistics_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s689␠AS␠"mz_internal"."mz_source_statistics"]␠("id") -mz_source_statistics_with_history_ind CREATE␠INDEX␠"mz_source_statistics_with_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s687␠AS␠"mz_internal"."mz_source_statistics_with_history"]␠("id") -mz_source_status_history_ind CREATE␠INDEX␠"mz_source_status_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s665␠AS␠"mz_internal"."mz_source_status_history"]␠("source_id") -mz_source_statuses_ind CREATE␠INDEX␠"mz_source_statuses_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s682␠AS␠"mz_internal"."mz_source_statuses"]␠("id") -mz_sources_ind CREATE␠INDEX␠"mz_sources_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s461␠AS␠"mz_catalog"."mz_sources"]␠("id") -mz_tables_ind CREATE␠INDEX␠"mz_tables_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s460␠AS␠"mz_catalog"."mz_tables"]␠("schema_id") -mz_types_ind CREATE␠INDEX␠"mz_types_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s471␠AS␠"mz_catalog"."mz_types"]␠("schema_id") -mz_views_ind CREATE␠INDEX␠"mz_views_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s468␠AS␠"mz_catalog"."mz_views"]␠("schema_id") -mz_wallclock_global_lag_recent_history_ind CREATE␠INDEX␠"mz_wallclock_global_lag_recent_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s698␠AS␠"mz_internal"."mz_wallclock_global_lag_recent_history"]␠("object_id") -mz_webhook_sources_ind CREATE␠INDEX␠"mz_webhook_sources_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s508␠AS␠"mz_internal"."mz_webhook_sources"]␠("id") -pg_attrdef_all_databases_ind CREATE␠INDEX␠"pg_attrdef_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s605␠AS␠"mz_internal"."pg_attrdef_all_databases"]␠("oid",␠"adrelid",␠"adnum",␠"adbin",␠"adsrc") -pg_attribute_all_databases_ind CREATE␠INDEX␠"pg_attribute_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s598␠AS␠"mz_internal"."pg_attribute_all_databases"]␠("attrelid",␠"attname",␠"atttypid",␠"attlen",␠"attnum",␠"atttypmod",␠"attnotnull",␠"atthasdef",␠"attidentity",␠"attgenerated",␠"attisdropped",␠"attcollation",␠"database_name",␠"pg_type_database_name") -pg_class_all_databases_ind CREATE␠INDEX␠"pg_class_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s586␠AS␠"mz_internal"."pg_class_all_databases"]␠("relname") -pg_description_all_databases_ind CREATE␠INDEX␠"pg_description_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s595␠AS␠"mz_internal"."pg_description_all_databases"]␠("objoid",␠"classoid",␠"objsubid",␠"description",␠"oid_database_name",␠"class_database_name") -pg_namespace_all_databases_ind CREATE␠INDEX␠"pg_namespace_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s583␠AS␠"mz_internal"."pg_namespace_all_databases"]␠("nspname") -pg_type_all_databases_ind CREATE␠INDEX␠"pg_type_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s592␠AS␠"mz_internal"."pg_type_all_databases"]␠("oid") +mz_schemas_ind CREATE␠INDEX␠"mz_schemas_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s458␠AS␠"mz_catalog"."mz_schemas"]␠("database_id") +mz_secrets_ind CREATE␠INDEX␠"mz_secrets_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s489␠AS␠"mz_catalog"."mz_secrets"]␠("name") +mz_show_all_objects_ind CREATE␠INDEX␠"mz_show_all_objects_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s565␠AS␠"mz_internal"."mz_show_all_objects"]␠("schema_id") +mz_show_cluster_replicas_ind CREATE␠INDEX␠"mz_show_cluster_replicas_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s717␠AS␠"mz_internal"."mz_show_cluster_replicas"]␠("cluster") +mz_show_clusters_ind CREATE␠INDEX␠"mz_show_clusters_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s567␠AS␠"mz_internal"."mz_show_clusters"]␠("name") +mz_show_columns_ind CREATE␠INDEX␠"mz_show_columns_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s566␠AS␠"mz_internal"."mz_show_columns"]␠("id") +mz_show_connections_ind CREATE␠INDEX␠"mz_show_connections_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s575␠AS␠"mz_internal"."mz_show_connections"]␠("schema_id") +mz_show_databases_ind CREATE␠INDEX␠"mz_show_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s569␠AS␠"mz_internal"."mz_show_databases"]␠("name") +mz_show_indexes_ind CREATE␠INDEX␠"mz_show_indexes_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s579␠AS␠"mz_internal"."mz_show_indexes"]␠("schema_id") +mz_show_materialized_views_ind CREATE␠INDEX␠"mz_show_materialized_views_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s578␠AS␠"mz_internal"."mz_show_materialized_views"]␠("schema_id") +mz_show_roles_ind CREATE␠INDEX␠"mz_show_roles_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s574␠AS␠"mz_internal"."mz_show_roles"]␠("name") +mz_show_schemas_ind CREATE␠INDEX␠"mz_show_schemas_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s570␠AS␠"mz_internal"."mz_show_schemas"]␠("database_id") +mz_show_secrets_ind CREATE␠INDEX␠"mz_show_secrets_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s568␠AS␠"mz_internal"."mz_show_secrets"]␠("schema_id") +mz_show_sinks_ind CREATE␠INDEX␠"mz_show_sinks_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s577␠AS␠"mz_internal"."mz_show_sinks"]␠("schema_id") +mz_show_sources_ind CREATE␠INDEX␠"mz_show_sources_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s576␠AS␠"mz_internal"."mz_show_sources"]␠("schema_id") +mz_show_tables_ind CREATE␠INDEX␠"mz_show_tables_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s571␠AS␠"mz_internal"."mz_show_tables"]␠("schema_id") +mz_show_types_ind CREATE␠INDEX␠"mz_show_types_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s573␠AS␠"mz_internal"."mz_show_types"]␠("schema_id") +mz_show_views_ind CREATE␠INDEX␠"mz_show_views_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s572␠AS␠"mz_internal"."mz_show_views"]␠("schema_id") +mz_sink_statistics_ind CREATE␠INDEX␠"mz_sink_statistics_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s693␠AS␠"mz_internal"."mz_sink_statistics"]␠("id") +mz_sink_status_history_ind CREATE␠INDEX␠"mz_sink_status_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s665␠AS␠"mz_internal"."mz_sink_status_history"]␠("sink_id") +mz_sink_statuses_ind CREATE␠INDEX␠"mz_sink_statuses_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s666␠AS␠"mz_internal"."mz_sink_statuses"]␠("id") +mz_sinks_ind CREATE␠INDEX␠"mz_sinks_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s469␠AS␠"mz_catalog"."mz_sinks"]␠("id") +mz_source_statistics_ind CREATE␠INDEX␠"mz_source_statistics_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s691␠AS␠"mz_internal"."mz_source_statistics"]␠("id") +mz_source_statistics_with_history_ind CREATE␠INDEX␠"mz_source_statistics_with_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s689␠AS␠"mz_internal"."mz_source_statistics_with_history"]␠("id") +mz_source_status_history_ind CREATE␠INDEX␠"mz_source_status_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s667␠AS␠"mz_internal"."mz_source_status_history"]␠("source_id") +mz_source_statuses_ind CREATE␠INDEX␠"mz_source_statuses_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s684␠AS␠"mz_internal"."mz_source_statuses"]␠("id") +mz_sources_ind CREATE␠INDEX␠"mz_sources_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s463␠AS␠"mz_catalog"."mz_sources"]␠("id") +mz_tables_ind CREATE␠INDEX␠"mz_tables_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s462␠AS␠"mz_catalog"."mz_tables"]␠("schema_id") +mz_types_ind CREATE␠INDEX␠"mz_types_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s473␠AS␠"mz_catalog"."mz_types"]␠("schema_id") +mz_views_ind CREATE␠INDEX␠"mz_views_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s470␠AS␠"mz_catalog"."mz_views"]␠("schema_id") +mz_wallclock_global_lag_recent_history_ind CREATE␠INDEX␠"mz_wallclock_global_lag_recent_history_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s700␠AS␠"mz_internal"."mz_wallclock_global_lag_recent_history"]␠("object_id") +mz_webhook_sources_ind CREATE␠INDEX␠"mz_webhook_sources_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s510␠AS␠"mz_internal"."mz_webhook_sources"]␠("id") +pg_attrdef_all_databases_ind CREATE␠INDEX␠"pg_attrdef_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s607␠AS␠"mz_internal"."pg_attrdef_all_databases"]␠("oid",␠"adrelid",␠"adnum",␠"adbin",␠"adsrc") +pg_attribute_all_databases_ind CREATE␠INDEX␠"pg_attribute_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s600␠AS␠"mz_internal"."pg_attribute_all_databases"]␠("attrelid",␠"attname",␠"atttypid",␠"attlen",␠"attnum",␠"atttypmod",␠"attnotnull",␠"atthasdef",␠"attidentity",␠"attgenerated",␠"attisdropped",␠"attcollation",␠"database_name",␠"pg_type_database_name") +pg_class_all_databases_ind CREATE␠INDEX␠"pg_class_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s588␠AS␠"mz_internal"."pg_class_all_databases"]␠("relname") +pg_description_all_databases_ind CREATE␠INDEX␠"pg_description_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s597␠AS␠"mz_internal"."pg_description_all_databases"]␠("objoid",␠"classoid",␠"objsubid",␠"description",␠"oid_database_name",␠"class_database_name") +pg_namespace_all_databases_ind CREATE␠INDEX␠"pg_namespace_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s585␠AS␠"mz_internal"."pg_namespace_all_databases"]␠("nspname") +pg_type_all_databases_ind CREATE␠INDEX␠"pg_type_all_databases_ind"␠IN␠CLUSTER␠[s2]␠ON␠[s594␠AS␠"mz_internal"."pg_type_all_databases"]␠("oid") # Record all transitive dependencies (tables, sources, views, mvs) of indexes on # the mz_catalog_server cluster. diff --git a/test/sqllogictest/oid.slt b/test/sqllogictest/oid.slt index 11a5dda650145..61669070d0fa6 100644 --- a/test/sqllogictest/oid.slt +++ b/test/sqllogictest/oid.slt @@ -124,6 +124,7 @@ SELECT oid, name FROM mz_objects WHERE id LIKE 's%' AND oid < 20000 ORDER BY oid 701 float8 720 octet_length 721 get_byte +723 get_bit 745 current_user 746 session_user 750 array_in @@ -586,6 +587,7 @@ SELECT oid, name FROM mz_objects WHERE id LIKE 's%' AND oid < 20000 ORDER BY oid 5090 anycompatiblearray_recv 5092 anycompatiblenonarray_in 5094 anycompatiblerange_in +6163 bit_count 6177 date_bin 6178 date_bin 6199 extract diff --git a/test/sqllogictest/outer_join_lowering.slt b/test/sqllogictest/outer_join_lowering.slt index cde940f4d21fc..d5ab9f0bbf02f 100644 --- a/test/sqllogictest/outer_join_lowering.slt +++ b/test/sqllogictest/outer_join_lowering.slt @@ -121,7 +121,7 @@ With - () Get materialize.left_joins_raw.dim01 // { arity: 6 } Return // { arity: 4 } - Project (#0{facts_k01}, #4, #1{facts_d01}, #10) // { arity: 4 } + Project (#0{facts_k01}, #4{facts_d01}, #1{dim01_k01}, #10{dim01_d01}) // { arity: 4 } Union // { arity: 15 } Get l1 // { arity: 15 } CrossJoin // { arity: 15 } @@ -180,7 +180,7 @@ With - () Get materialize.left_joins_raw.dim01 // { arity: 6 } Return // { arity: 4 } - Project (#0{facts_k01}, #4, #1{facts_d01}, #10) // { arity: 4 } + Project (#0{facts_k01}, #4{facts_d01}, #1{dim01_k01}, #10{dim01_d01}) // { arity: 4 } Union // { arity: 15 } Map (null, null, null, null, null, null) // { arity: 15 } Union // { arity: 9 } @@ -190,7 +190,7 @@ Return // { arity: 4 } Filter (#1{dim01_k01}) IS NOT NULL // { arity: 9 } Get l0 // { arity: 9 } Distinct project=[#0{dim01_k01}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{dim01_k01}) // { arity: 1 } Get l1 // { arity: 15 } Get l0 // { arity: 9 } Get l1 // { arity: 15 } @@ -229,17 +229,17 @@ With Get materialize.left_joins_raw.facts // { arity: 9 } cte l3 = Filter ((#3{dim01_k01} + #0{x}) = (#11{dim01_k01} + #1{y})) // { arity: 17 } - Project (#0{x}..=#10{facts_d05}, #13{dim01_d02}..=#18) // { arity: 17 } + Project (#0{x}..=#10{facts_d05}, #13{dim01_k01}..=#18{dim01_d05}) // { arity: 17 } Join on=(#0{x} = #11{x} AND #1{y} = #12{y}) // { arity: 19 } Get l2 // { arity: 11 } CrossJoin // { arity: 8 } Get l1 // { arity: 2 } Get materialize.left_joins_raw.dim01 // { arity: 6 } Return // { arity: 6 } - Project (#0{x}, #1{y}, #4{dim01_k01}..=#7) // { arity: 6 } + Project (#0{x}, #1{y}, #4{facts_k01}..=#7{dim01_d01}) // { arity: 6 } Join on=(#0{x} = #2{x} AND #1{y} = #3{y}) // { arity: 8 } Get l0 // { arity: 2 } - Project (#0{x}..=#2{facts_k01}, #6, #3{facts_d01}, #12) // { arity: 6 } + Project (#0{x}..=#2{facts_k01}, #6{facts_d01}, #3{dim01_k01}, #12{dim01_d01}) // { arity: 6 } Union // { arity: 17 } Get l3 // { arity: 17 } CrossJoin // { arity: 17 } @@ -289,17 +289,17 @@ With Get materialize.left_joins_raw.facts // { arity: 9 } cte l3 = Filter ((#3{dim01_k01} + #0{x}) = (#11{dim01_k01} + #1{y})) // { arity: 17 } - Project (#0{x}..=#10{facts_d05}, #13{dim01_d02}..=#18) // { arity: 17 } + Project (#0{x}..=#10{facts_d05}, #13{dim01_k01}..=#18{dim01_d05}) // { arity: 17 } Join on=(#0{x} = #11{x} AND #1{y} = #12{y}) // { arity: 19 } Get l2 // { arity: 11 } CrossJoin // { arity: 8 } Get l1 // { arity: 2 } Get materialize.left_joins_raw.dim01 // { arity: 6 } Return // { arity: 6 } - Project (#0{x}, #1{y}, #4{dim01_k01}..=#7) // { arity: 6 } + Project (#0{x}, #1{y}, #4{facts_k01}..=#7{dim01_d01}) // { arity: 6 } Join on=(#0{x} = #2{x} AND #1{y} = #3{y}) // { arity: 8 } Get l0 // { arity: 2 } - Project (#0{x}..=#2{facts_k01}, #6, #3{facts_d01}, #12) // { arity: 6 } + Project (#0{x}..=#2{facts_k01}, #6{facts_d01}, #3{dim01_k01}, #12{dim01_d01}) // { arity: 6 } Union // { arity: 17 } Map (null, null, null, null, null, null) // { arity: 17 } Union // { arity: 11 } @@ -309,7 +309,7 @@ Return // { arity: 6 } Filter ((#3{dim01_k01} + #0{x})) IS NOT NULL // { arity: 11 } Get l2 // { arity: 11 } Distinct project=[#0{x}..=#2] // { arity: 3 } - Project (#17..=#19) // { arity: 3 } + Project (#17{x}..=#19) // { arity: 3 } Map (#0{x}, #1{y}, (#3{dim01_k01} + #0{x})) // { arity: 20 } Get l3 // { arity: 17 } Get l2 // { arity: 11 } @@ -349,19 +349,19 @@ With Get materialize.left_joins_raw.facts // { arity: 9 } cte l3 = Filter ((#2{dim01_k01} + #1{y}) = (#9{dim01_k01} + #0{x})) // { arity: 17 } - Project (#0{x}..=#7{dim01_d05}, #10{dim02_k01}..=#18) // { arity: 17 } + Project (#0{x}..=#7{dim01_d05}, #10{facts_k01}..=#18{facts_d05}) // { arity: 17 } Join on=(#0{x} = #8{x} AND #1{y} = #9{y}) // { arity: 19 } CrossJoin // { arity: 8 } Get l1 // { arity: 2 } Get materialize.left_joins_raw.dim01 // { arity: 6 } Get l2 // { arity: 11 } Return // { arity: 6 } - Project (#0{x}, #1{y}, #4{dim01_k01}..=#7) // { arity: 6 } + Project (#0{x}, #1{y}, #4{facts_k01}..=#7{dim01_d01}) // { arity: 6 } Join on=(#0{x} = #2{x} AND #1{y} = #3{y}) // { arity: 8 } Get l0 // { arity: 2 } - Project (#0{x}, #1{y}, #8, #12, #9, #3{facts_d01}) // { arity: 6 } + Project (#0{x}, #1{y}, #8{facts_k01}, #12{facts_d01}, #9{dim01_k01}, #3{dim01_d01}) // { arity: 6 } Union // { arity: 17 } - Project (#0{x}, #1{y}, #11{dim03_k01}..=#16{facts_d05}, #2..=#10{dim02_k01}) // { arity: 17 } + Project (#0{x}, #1{y}, #11..=#16, #2{facts_k01}..=#10{facts_d05}) // { arity: 17 } Map (null, null, null, null, null, null) // { arity: 17 } Union // { arity: 11 } Negate // { arity: 11 } @@ -370,7 +370,7 @@ Return // { arity: 6 } Filter ((#3{dim01_k01} + #0{x})) IS NOT NULL // { arity: 11 } Get l2 // { arity: 11 } Distinct project=[#0{x}..=#2] // { arity: 3 } - Project (#17..=#19) // { arity: 3 } + Project (#17{x}..=#19) // { arity: 3 } Map (#0{x}, #1{y}, (#2{dim01_k01} + #1{y})) // { arity: 20 } Get l3 // { arity: 17 } Get l2 // { arity: 11 } @@ -414,17 +414,17 @@ With Get materialize.left_joins_raw.facts // { arity: 9 } cte l3 = Filter ((((#3{dim01_k01} + #0{x}) = (#11{dim01_k01} + #1{y})) AND (#7{facts_d02} = 42)) AND (#13{dim01_d02} = 24)) // { arity: 17 } - Project (#0{x}..=#10{facts_d05}, #13{dim01_d02}..=#18) // { arity: 17 } + Project (#0{x}..=#10{facts_d05}, #13{dim01_k01}..=#18{dim01_d05}) // { arity: 17 } Join on=(#0{x} = #11{x} AND #1{y} = #12{y}) // { arity: 19 } Get l2 // { arity: 11 } CrossJoin // { arity: 8 } Get l1 // { arity: 2 } Get materialize.left_joins_raw.dim01 // { arity: 6 } Return // { arity: 6 } - Project (#0{x}, #1{y}, #4{dim01_k01}..=#7) // { arity: 6 } + Project (#0{x}, #1{y}, #4{facts_k01}..=#7{dim01_d01}) // { arity: 6 } Join on=(#0{x} = #2{x} AND #1{y} = #3{y}) // { arity: 8 } Get l0 // { arity: 2 } - Project (#0{x}..=#2{facts_k01}, #6, #3{facts_d01}, #12) // { arity: 6 } + Project (#0{x}..=#2{facts_k01}, #6{facts_d01}, #3{dim01_k01}, #12{dim01_d01}) // { arity: 6 } Union // { arity: 17 } Get l3 // { arity: 17 } CrossJoin // { arity: 17 } @@ -478,17 +478,17 @@ With Get materialize.left_joins_raw.facts // { arity: 9 } cte l3 = Filter (#7{facts_d02} = 42) AND (#13{dim01_d02} = 24) AND ((#3{dim01_k01} + #0{x}) = (#11{dim01_k01} + #1{y})) // { arity: 17 } - Project (#0{x}..=#10{facts_d05}, #13{dim01_d02}..=#18) // { arity: 17 } + Project (#0{x}..=#10{facts_d05}, #13{dim01_k01}..=#18{dim01_d05}) // { arity: 17 } Join on=(#0{x} = #11{x} AND #1{y} = #12{y}) // { arity: 19 } Get l2 // { arity: 11 } CrossJoin // { arity: 8 } Get l1 // { arity: 2 } Get materialize.left_joins_raw.dim01 // { arity: 6 } Return // { arity: 6 } - Project (#0{x}, #1{y}, #4{dim01_k01}..=#7) // { arity: 6 } + Project (#0{x}, #1{y}, #4{facts_k01}..=#7{dim01_d01}) // { arity: 6 } Join on=(#0{x} = #2{x} AND #1{y} = #3{y}) // { arity: 8 } Get l0 // { arity: 2 } - Project (#0{x}..=#2{facts_k01}, #6, #3{facts_d01}, #12) // { arity: 6 } + Project (#0{x}..=#2{facts_k01}, #6{facts_d01}, #3{dim01_k01}, #12{dim01_d01}) // { arity: 6 } Union // { arity: 17 } Map (null, null, null, null, null, null) // { arity: 17 } Union // { arity: 11 } @@ -498,7 +498,7 @@ Return // { arity: 6 } Filter ((#3{dim01_k01} + #0{x})) IS NOT NULL AND (#7{facts_d02} = 42) // { arity: 11 } Get l2 // { arity: 11 } Distinct project=[#0{x}..=#2] // { arity: 3 } - Project (#17..=#19) // { arity: 3 } + Project (#17{x}..=#19) // { arity: 3 } Map (#0{x}, #1{y}, (#3{dim01_k01} + #0{x})) // { arity: 20 } Get l3 // { arity: 17 } Get l2 // { arity: 11 } @@ -542,19 +542,19 @@ With Get materialize.left_joins_raw.facts // { arity: 9 } cte l3 = Filter (#4{dim01_d02} = 24) AND (#13{facts_d02} = 42) AND ((#2{dim01_k01} + #1{y}) = (#9{dim01_k01} + #0{x})) // { arity: 17 } - Project (#0{x}..=#7{dim01_d05}, #10{dim02_k01}..=#18) // { arity: 17 } + Project (#0{x}..=#7{dim01_d05}, #10{facts_k01}..=#18{facts_d05}) // { arity: 17 } Join on=(#0{x} = #8{x} AND #1{y} = #9{y}) // { arity: 19 } CrossJoin // { arity: 8 } Get l1 // { arity: 2 } Get materialize.left_joins_raw.dim01 // { arity: 6 } Get l2 // { arity: 11 } Return // { arity: 6 } - Project (#0{x}, #1{y}, #4{dim01_k01}..=#7) // { arity: 6 } + Project (#0{x}, #1{y}, #4{facts_k01}..=#7{dim01_d01}) // { arity: 6 } Join on=(#0{x} = #2{x} AND #1{y} = #3{y}) // { arity: 8 } Get l0 // { arity: 2 } - Project (#0{x}, #1{y}, #8, #12, #9, #3{facts_d01}) // { arity: 6 } + Project (#0{x}, #1{y}, #8{facts_k01}, #12{facts_d01}, #9{dim01_k01}, #3{dim01_d01}) // { arity: 6 } Union // { arity: 17 } - Project (#0{x}, #1{y}, #11{dim03_k01}..=#16{facts_d05}, #2..=#10{dim02_k01}) // { arity: 17 } + Project (#0{x}, #1{y}, #11..=#16, #2{facts_k01}..=#10{facts_d05}) // { arity: 17 } Map (null, null, null, null, null, null) // { arity: 17 } Union // { arity: 11 } Negate // { arity: 11 } @@ -563,7 +563,7 @@ Return // { arity: 6 } Filter ((#3{dim01_k01} + #0{x})) IS NOT NULL AND (#7{facts_d02} = 42) // { arity: 11 } Get l2 // { arity: 11 } Distinct project=[#0{x}..=#2] // { arity: 3 } - Project (#17..=#19) // { arity: 3 } + Project (#17{x}..=#19) // { arity: 3 } Map (#0{x}, #1{y}, (#2{dim01_k01} + #1{y})) // { arity: 20 } Get l3 // { arity: 17 } Get l2 // { arity: 11 } @@ -610,22 +610,22 @@ With Get materialize.left_joins_raw.dim02 // { arity: 6 } cte l4 = Filter (coalesce(#2{dim01_k01}, #0{x}) = coalesce(#8{dim02_k01}, #1{y})) AND (#11{dim02_d03} < 24) AND (#5{dim01_d03} > 42) // { arity: 14 } - Project (#0{x}..=#7{dim01_d05}, #10{dim02_d02}..=#15) // { arity: 14 } + Project (#0{x}..=#7{dim01_d05}, #10{dim02_k01}..=#15{dim02_d05}) // { arity: 14 } Join on=(#0{x} = #8{x} AND #1{y} = #9{y}) // { arity: 16 } Get l2 // { arity: 8 } Get l3 // { arity: 8 } cte l5 = Distinct project=[#0{x}..=#2] // { arity: 3 } - Project (#14..=#16) // { arity: 3 } + Project (#14{x}..=#16) // { arity: 3 } Map (#0{x}, #1{y}, coalesce(#2{dim01_k01}, #0{x})) // { arity: 17 } Get l4 // { arity: 14 } Return // { arity: 6 } - Project (#0{x}, #1{y}, #4{dim02_d02}..=#7) // { arity: 6 } + Project (#0{x}, #1{y}, #4{dim01_k01}..=#7{dim02_d02}) // { arity: 6 } Join on=(#0{x} = #2{x} AND #1{y} = #3{y}) // { arity: 8 } Get l0 // { arity: 2 } - Project (#0{x}..=#2{dim01_k01}, #2{dim01_k01}, #10, #10) // { arity: 6 } + Project (#0{x}..=#2{dim01_k01}, #2{dim01_k01}, #10{dim02_d02}, #10{dim02_d02}) // { arity: 6 } Union // { arity: 14 } - Project (#0{x}, #1{y}, #8{dim02_k01}..=#13{dim02_d05}, #2..=#7) // { arity: 14 } + Project (#0{x}, #1{y}, #8..=#13, #2{dim02_k01}..=#7{dim02_d05}) // { arity: 14 } Map (null, null, null, null, null, null) // { arity: 14 } Union // { arity: 8 } Negate // { arity: 8 } @@ -683,7 +683,7 @@ With Get l1 // { arity: 2 } Get materialize.left_joins_raw.facts // { arity: 9 } cte l3 = - Project (#0{x}..=#10{facts_d05}, #13{dim01_d02}..=#18) // { arity: 17 } + Project (#0{x}..=#10{facts_d05}, #13{dim01_k01}..=#18{dim01_d05}) // { arity: 17 } Join on=(#0{x} = #11{x} AND #1{y} = #12{y}) // { arity: 19 } Get l2 // { arity: 11 } CrossJoin // { arity: 8 } @@ -722,7 +722,7 @@ With cte l8 = Project (#0{x}..=#16{dim01_d05}) // { arity: 17 } Filter ((((#3{dim01_k01} + #0{x}) = (#11{dim01_k01} + #1{y})) AND (#7{facts_d02} = 42)) AND #17{any}) // { arity: 18 } - Project (#0{x}..=#16{dim01_d05}, #18) // { arity: 18 } + Project (#0{x}..=#16{dim01_d05}, #18{any}) // { arity: 18 } Join on=(#12{dim01_d01} = #17{dim01_d01}) // { arity: 19 } Get l3 // { arity: 17 } Union // { arity: 2 } @@ -740,10 +740,10 @@ With Constant // { arity: 1 } - (null) Return // { arity: 6 } - Project (#0{x}, #1{y}, #4{dim01_k01}..=#7) // { arity: 6 } + Project (#0{x}, #1{y}, #4{facts_k01}..=#7{dim01_d01}) // { arity: 6 } Join on=(#0{x} = #2{x} AND #1{y} = #3{y}) // { arity: 8 } Get l0 // { arity: 2 } - Project (#0{x}..=#2{facts_k01}, #6, #3{facts_d01}, #12) // { arity: 6 } + Project (#0{x}..=#2{facts_k01}, #6{facts_d01}, #3{dim01_k01}, #12{dim01_d01}) // { arity: 6 } Union // { arity: 17 } Get l8 // { arity: 17 } CrossJoin // { arity: 17 } @@ -791,7 +791,7 @@ Explained Query: With cte l0 = ArrangeBy keys=[[coalesce(#1{dim01_k01}, 5)]] // { arity: 4 } - Project (#0{facts_k01}, #1{dim01_k01}, #4, #5) // { arity: 4 } + Project (#0{facts_k01}, #1{dim01_k01}, #4{facts_d01}, #5{facts_d02}) // { arity: 4 } Filter (#4{facts_d01} > 42) // { arity: 9 } ReadStorage materialize.left_joins_raw.facts // { arity: 9 } cte l1 = @@ -806,16 +806,16 @@ Explained Query: Map (null, null, null) // { arity: 6 } Union // { arity: 3 } Negate // { arity: 3 } - Project (#0{facts_k01}, #2{facts_d02}, #3) // { arity: 3 } + Project (#0{facts_k01}, #2{facts_d01}, #3{facts_d02}) // { arity: 3 } Join on=(#4 = coalesce(#1{dim01_k01}, 5)) type=differential // { arity: 5 } Get l0 // { arity: 4 } ArrangeBy keys=[[#0]] // { arity: 1 } Distinct project=[coalesce(#0{dim01_k01}, 5)] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{dim01_k01}) // { arity: 1 } Get l1 // { arity: 7 } - Project (#0{facts_k01}, #4, #5) // { arity: 3 } + Project (#0{facts_k01}, #4{facts_d01}, #5{facts_d02}) // { arity: 3 } ReadStorage materialize.left_joins_raw.facts // { arity: 9 } - Project (#0{facts_k01}, #2{facts_d02}..=#6) // { arity: 6 } + Project (#0{facts_k01}, #2{facts_d01}..=#6{dim01_d02}) // { arity: 6 } Get l1 // { arity: 7 } Source materialize.left_joins_raw.facts @@ -852,7 +852,7 @@ Explained Query: With cte l0 = ArrangeBy keys=[[coalesce(#1{dim01_k01}, 5)]] // { arity: 4 } - Project (#0{facts_k01}, #1{dim01_k01}, #4, #5) // { arity: 4 } + Project (#0{facts_k01}, #1{dim01_k01}, #4{facts_d01}, #5{facts_d02}) // { arity: 4 } Filter (#4{facts_d01} > 42) // { arity: 9 } ReadStorage materialize.left_joins_raw.facts // { arity: 9 } cte l1 = @@ -867,16 +867,16 @@ Explained Query: Map (null, null, null) // { arity: 6 } Union // { arity: 3 } Negate // { arity: 3 } - Project (#0{facts_k01}, #2{facts_d02}, #3) // { arity: 3 } + Project (#0{facts_k01}, #2{facts_d01}, #3{facts_d02}) // { arity: 3 } Join on=(#4 = coalesce(#1{dim01_k01}, 5)) type=differential // { arity: 5 } Get l0 // { arity: 4 } ArrangeBy keys=[[#0]] // { arity: 1 } Distinct project=[coalesce(#0{dim01_k01}, 5)] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{dim01_k01}) // { arity: 1 } Get l1 // { arity: 7 } - Project (#0{facts_k01}, #4, #5) // { arity: 3 } + Project (#0{facts_k01}, #4{facts_d01}, #5{facts_d02}) // { arity: 3 } ReadStorage materialize.left_joins_raw.facts // { arity: 9 } - Project (#0{facts_k01}, #2{facts_d02}..=#6) // { arity: 6 } + Project (#0{facts_k01}, #2{facts_d01}..=#6{dim01_d02}) // { arity: 6 } Get l1 // { arity: 7 } Source materialize.left_joins_raw.facts @@ -900,7 +900,7 @@ materialize.public.v: With cte l0 = ArrangeBy keys=[[coalesce(#1{dim01_k01}, 5)]] // { arity: 4 } - Project (#0{facts_k01}, #1{dim01_k01}, #4, #5) // { arity: 4 } + Project (#0{facts_k01}, #1{dim01_k01}, #4{facts_d01}, #5{facts_d02}) // { arity: 4 } Filter (#4{facts_d01} > 42) // { arity: 9 } ReadStorage materialize.left_joins_raw.facts // { arity: 9 } cte l1 = @@ -915,16 +915,16 @@ materialize.public.v: Map (null, null, null) // { arity: 6 } Union // { arity: 3 } Negate // { arity: 3 } - Project (#0{facts_k01}, #2{facts_d02}, #3) // { arity: 3 } + Project (#0{facts_k01}, #2{facts_d01}, #3{facts_d02}) // { arity: 3 } Join on=(#4 = coalesce(#1{dim01_k01}, 5)) type=differential // { arity: 5 } Get l0 // { arity: 4 } ArrangeBy keys=[[#0]] // { arity: 1 } Distinct project=[coalesce(#0{dim01_k01}, 5)] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{dim01_k01}) // { arity: 1 } Get l1 // { arity: 7 } - Project (#0{facts_k01}, #4, #5) // { arity: 3 } + Project (#0{facts_k01}, #4{facts_d01}, #5{facts_d02}) // { arity: 3 } ReadStorage materialize.left_joins_raw.facts // { arity: 9 } - Project (#0{facts_k01}, #2{facts_d02}..=#6) // { arity: 6 } + Project (#0{facts_k01}, #2{facts_d01}..=#6{dim01_d02}) // { arity: 6 } Get l1 // { arity: 7 } Source materialize.left_joins_raw.facts @@ -957,7 +957,7 @@ materialize.public.mv: With cte l0 = ArrangeBy keys=[[coalesce(#1{dim01_k01}, 5)]] // { arity: 4 } - Project (#0{facts_k01}, #1{dim01_k01}, #4, #5) // { arity: 4 } + Project (#0{facts_k01}, #1{dim01_k01}, #4{facts_d01}, #5{facts_d02}) // { arity: 4 } Filter (#4{facts_d01} > 42) // { arity: 9 } ReadStorage materialize.left_joins_raw.facts // { arity: 9 } cte l1 = @@ -972,16 +972,16 @@ materialize.public.mv: Map (null, null, null) // { arity: 6 } Union // { arity: 3 } Negate // { arity: 3 } - Project (#0{facts_k01}, #2{facts_d02}, #3) // { arity: 3 } + Project (#0{facts_k01}, #2{facts_d01}, #3{facts_d02}) // { arity: 3 } Join on=(#4 = coalesce(#1{dim01_k01}, 5)) type=differential // { arity: 5 } Get l0 // { arity: 4 } ArrangeBy keys=[[#0]] // { arity: 1 } Distinct project=[coalesce(#0{dim01_k01}, 5)] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{dim01_k01}) // { arity: 1 } Get l1 // { arity: 7 } - Project (#0{facts_k01}, #4, #5) // { arity: 3 } + Project (#0{facts_k01}, #4{facts_d01}, #5{facts_d02}) // { arity: 3 } ReadStorage materialize.left_joins_raw.facts // { arity: 9 } - Project (#0{facts_k01}, #2{facts_d02}..=#6) // { arity: 6 } + Project (#0{facts_k01}, #2{facts_d01}..=#6{dim01_d02}) // { arity: 6 } Get l1 // { arity: 7 } Source materialize.left_joins_raw.facts diff --git a/test/sqllogictest/outer_join_simplification.slt b/test/sqllogictest/outer_join_simplification.slt index f785d379c8944..8520a8e34e7d9 100644 --- a/test/sqllogictest/outer_join_simplification.slt +++ b/test/sqllogictest/outer_join_simplification.slt @@ -692,7 +692,7 @@ with mutually recursive select * from c0 ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#0..=#2, #4) Join on=(#0 = #3) type=differential @@ -702,20 +702,22 @@ Explained Query: ArrangeBy keys=[[#0]] Filter (#0) IS NOT NULL ReadStorage materialize.public.bar - cte l1 = - Distinct project=[#0..=#4] - Union - Map (null, null) - Union - Negate - Project (#0..=#2) - Get l0 - ReadStorage materialize.public.foo_raw - Project (#0..=#2, #0, #3) - Get l0 - Get l1 Return - Get l1 + With Mutually Recursive + cte l1 = + Distinct project=[#0..=#4] + Union + Map (null, null) + Union + Negate + Project (#0..=#2) + Get l0 + ReadStorage materialize.public.foo_raw + Project (#0..=#2, #0, #3) + Get l0 + Get l1 + Return + Get l1 Source materialize.public.foo_raw Source materialize.public.bar @@ -837,7 +839,7 @@ with mutually recursive select * from c0; ---- Explained Query: - With Mutually Recursive + With cte l0 = Project (#0) // { arity: 1 } Filter (#0) IS NOT NULL // { arity: 2 } @@ -845,53 +847,55 @@ Explained Query: cte l1 = Project (#0) // { arity: 1 } ReadStorage materialize.public.quux // { arity: 2 } - cte l2 = - Distinct project=[#0..=#2] // { arity: 3 } - Union // { arity: 3 } - Project (#0..=#2) // { arity: 3 } - Join on=(#0 = #3 AND #1 = #4 AND #7 = case when (#6) IS NULL then null else #5 end) type=delta // { arity: 8 } - ArrangeBy keys=[[#0], [#1]] // { arity: 3 } - Get l2 // { arity: 3 } - ArrangeBy keys=[[#0]] // { arity: 1 } - Union // { arity: 1 } - Get l0 // { arity: 1 } - Threshold // { arity: 1 } - Union // { arity: 1 } - Negate // { arity: 1 } - Get l0 // { arity: 1 } - Distinct project=[#0] // { arity: 1 } - Project (#0) // { arity: 1 } - Get l2 // { arity: 3 } - ArrangeBy keys=[[#0], [case when (#2) IS NULL then null else #1 end]] // { arity: 3 } - Union // { arity: 3 } - Project (#0, #1, #3) // { arity: 3 } - Map (true) // { arity: 4 } - ReadStorage materialize.public.baz // { arity: 3 } - Map (null, null) // { arity: 3 } + Return // { arity: 3 } + With Mutually Recursive + cte l2 = + Distinct project=[#0..=#2] // { arity: 3 } + Union // { arity: 3 } + Project (#0..=#2) // { arity: 3 } + Join on=(#0 = #3 AND #1 = #4 AND #7 = case when (#6) IS NULL then null else #5 end) type=delta // { arity: 8 } + ArrangeBy keys=[[#0], [#1]] // { arity: 3 } + Get l2 // { arity: 3 } + ArrangeBy keys=[[#0]] // { arity: 1 } + Union // { arity: 1 } + Get l0 // { arity: 1 } Threshold // { arity: 1 } Union // { arity: 1 } Negate // { arity: 1 } - Project (#0) // { arity: 1 } - ReadStorage materialize.public.baz // { arity: 3 } + Get l0 // { arity: 1 } Distinct project=[#0] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#0) // { arity: 1 } Get l2 // { arity: 3 } - ArrangeBy keys=[[#0]] // { arity: 1 } - Union // { arity: 1 } - Get l1 // { arity: 1 } - Threshold // { arity: 1 } - Union // { arity: 1 } - Negate // { arity: 1 } - Get l1 // { arity: 1 } - Distinct project=[#0] // { arity: 1 } + ArrangeBy keys=[[#0], [case when (#2) IS NULL then null else #1 end]] // { arity: 3 } + Union // { arity: 3 } + Project (#0, #1, #3) // { arity: 3 } + Map (true) // { arity: 4 } + ReadStorage materialize.public.baz // { arity: 3 } + Map (null, null) // { arity: 3 } + Threshold // { arity: 1 } Union // { arity: 1 } - Project (#1) // { arity: 1 } - ReadStorage materialize.public.baz // { arity: 3 } - Constant // { arity: 1 } - - (null) - ReadStorage materialize.public.foo // { arity: 3 } - Return // { arity: 3 } - Get l2 // { arity: 3 } + Negate // { arity: 1 } + Project (#0) // { arity: 1 } + ReadStorage materialize.public.baz // { arity: 3 } + Distinct project=[#0] // { arity: 1 } + Project (#1) // { arity: 1 } + Get l2 // { arity: 3 } + ArrangeBy keys=[[#0]] // { arity: 1 } + Union // { arity: 1 } + Get l1 // { arity: 1 } + Threshold // { arity: 1 } + Union // { arity: 1 } + Negate // { arity: 1 } + Get l1 // { arity: 1 } + Distinct project=[#0] // { arity: 1 } + Union // { arity: 1 } + Project (#1) // { arity: 1 } + ReadStorage materialize.public.baz // { arity: 3 } + Constant // { arity: 1 } + - (null) + ReadStorage materialize.public.foo // { arity: 3 } + Return // { arity: 3 } + Get l2 // { arity: 3 } Source materialize.public.foo Source materialize.public.bar diff --git a/test/sqllogictest/redacted.slt b/test/sqllogictest/redacted.slt index 5a95957192adb..a3541006ff934 100644 --- a/test/sqllogictest/redacted.slt +++ b/test/sqllogictest/redacted.slt @@ -24,6 +24,12 @@ SELECT redacted_create_sql FROM mz_tables WHERE name = 't' CREATE TABLE materialize.public.t (i [s20 AS pg_catalog.int4]) EOF +query T multiline +SELECT create_sql FROM (SHOW REDACTED CREATE TABLE t); +---- +CREATE TABLE materialize.public.t (i pg_catalog.int4) +EOF + statement ok CREATE CONNECTION kafka_conn TO KAFKA (BROKER 'localhost:9092', SECURITY PROTOCOL PLAINTEXT) WITH (VALIDATE = false); @@ -33,12 +39,24 @@ SELECT redacted_create_sql FROM mz_connections WHERE name = 'kafka_conn' CREATE CONNECTION materialize.public.kafka_conn TO KAFKA (BROKER = 'localhost:9092', SECURITY PROTOCOL = plaintext) EOF +query T multiline +SELECT create_sql FROM (SHOW REDACTED CREATE CONNECTION kafka_conn); +---- +CREATE CONNECTION materialize.public.kafka_conn TO KAFKA (BROKER = 'localhost:9092', SECURITY PROTOCOL = plaintext) +EOF + query T multiline SELECT pretty_sql(redacted_create_sql) FROM mz_connections WHERE name = 'kafka_conn' ---- CREATE CONNECTION materialize.public.kafka_conn TO KAFKA (BROKER = 'localhost:9092', SECURITY PROTOCOL = plaintext); EOF +query T multiline +SELECT pretty_sql(create_sql) FROM (SHOW REDACTED CREATE CONNECTION kafka_conn); +---- +CREATE CONNECTION materialize.public.kafka_conn TO KAFKA (BROKER = 'localhost:9092', SECURITY PROTOCOL = plaintext); +EOF + simple conn=mz_system,user=mz_system ALTER SYSTEM SET enable_redacted_test_option TO true; ---- @@ -53,12 +71,24 @@ SELECT redacted_create_sql FROM mz_tables WHERE name = 'redactable_t' CREATE TABLE materialize.public.redactable_t (a [s20 AS pg_catalog.int4]) WITH (RETAIN HISTORY = FOR '2s', REDACTED = '') EOF +query T multiline +SELECT create_sql FROM (SHOW REDACTED CREATE TABLE redactable_t); +---- +CREATE TABLE materialize.public.redactable_t (a pg_catalog.int4) WITH (RETAIN HISTORY = FOR '2s', REDACTED = '') +EOF + query T multiline SELECT pretty_sql(redacted_create_sql) FROM mz_tables WHERE name = 'redactable_t' ---- CREATE TABLE materialize.public.redactable_t (a [s20 AS pg_catalog.int4]) WITH (RETAIN HISTORY = FOR '2s', REDACTED = ''); EOF +query T multiline +SELECT pretty_sql(create_sql) FROM (SHOW REDACTED CREATE TABLE redactable_t) +---- +CREATE TABLE materialize.public.redactable_t (a pg_catalog.int4) WITH (RETAIN HISTORY = FOR '2s', REDACTED = ''); +EOF + statement ok CREATE INDEX t_idx_i ON t (i) @@ -68,6 +98,12 @@ SELECT redacted_create_sql FROM mz_indexes WHERE name = 't_idx_i' CREATE INDEX t_idx_i IN CLUSTER [u1] ON [u1 AS materialize.public.t] (i) EOF +query T multiline +SELECT create_sql FROM (SHOW REDACTED CREATE INDEX t_idx_i); +---- +CREATE INDEX t_idx_i IN CLUSTER quickstart ON materialize.public.t (i) +EOF + statement ok CREATE VIEW v AS SELECT 1 @@ -77,12 +113,24 @@ SELECT redacted_create_sql FROM mz_views WHERE name = 'v' CREATE VIEW materialize.public.v AS SELECT '' EOF +query T multiline +SELECT create_sql FROM (SHOW REDACTED CREATE VIEW v); +---- +CREATE VIEW materialize.public.v AS SELECT '' +EOF + query T multiline SELECT pretty_sql(redacted_create_sql) FROM mz_views WHERE name = 'v' ---- CREATE VIEW materialize.public.v AS SELECT ''; EOF +query T multiline +SELECT pretty_sql(create_sql) FROM (SHOW REDACTED CREATE VIEW v); +---- +CREATE VIEW materialize.public.v AS SELECT ''; +EOF + statement ok CREATE SOURCE s FROM LOAD GENERATOR COUNTER @@ -93,6 +141,12 @@ SELECT regexp_replace(redacted_create_sql, 'u[0-9]+', 'uX', 'g') FROM mz_sources CREATE SOURCE materialize.public.s IN CLUSTER [uX] FROM LOAD GENERATOR COUNTER EXPOSE PROGRESS AS [uX AS materialize.public.s_progress] EOF +query T multiline +SELECT regexp_replace(create_sql, 'u[0-9]+', 'uX', 'g') FROM (SHOW REDACTED CREATE SOURCE s); +---- +CREATE SOURCE materialize.public.s IN CLUSTER quickstart FROM LOAD GENERATOR COUNTER EXPOSE PROGRESS AS materialize.public.s_progress +EOF + query T multiline SELECT regexp_replace(pretty_sql(redacted_create_sql), 'u[0-9]+', 'uX', 'g') FROM mz_sources WHERE name = 's' ---- @@ -102,6 +156,15 @@ FROM LOAD GENERATOR COUNTER EXPOSE PROGRESS AS [uX AS materialize.public.s_progress]; EOF +query T multiline +SELECT regexp_replace(pretty_sql(create_sql), 'u[0-9]+', 'uX', 'g') FROM (SHOW REDACTED CREATE SOURCE s); +---- +CREATE SOURCE materialize.public.s +IN CLUSTER quickstart +FROM LOAD GENERATOR COUNTER +EXPOSE PROGRESS AS materialize.public.s_progress; +EOF + statement ok CREATE TYPE ty AS LIST (ELEMENT TYPE=bool) @@ -110,3 +173,52 @@ SELECT redacted_create_sql FROM mz_types WHERE name = 'ty' ---- CREATE TYPE materialize.public.ty AS LIST (ELEMENT TYPE = [s6 AS pg_catalog.bool]) EOF + +statement ok +CREATE MATERIALIZED VIEW mv1 AS +SELECT i+i+5 FROM t; + +query T multiline +SELECT regexp_replace(redacted_create_sql, 'AS OF [0-9]+', 'AS OF xxxxxxx', 'g') FROM mz_materialized_views WHERE name = 'mv1' +---- +CREATE MATERIALIZED VIEW materialize.public.mv1 IN CLUSTER [u1] WITH (REFRESH = ON COMMIT) AS SELECT i + i + '' FROM [u1 AS materialize.public.t] AS OF xxxxxxx +EOF + +query T multiline +SELECT create_sql FROM (SHOW REDACTED CREATE MATERIALIZED VIEW mv1); +---- +CREATE MATERIALIZED VIEW materialize.public.mv1 IN CLUSTER quickstart WITH (REFRESH = ON COMMIT) AS SELECT i + i + '' FROM materialize.public.t +EOF + +query T multiline +SELECT pretty_sql(redacted_create_sql) FROM mz_materialized_views WHERE name = 'mv1' +---- +CREATE MATERIALIZED VIEW materialize.public.mv1 + IN CLUSTER [u1] + WITH (REFRESH = ON COMMIT) + AS SELECT i + i + '' FROM [u1 AS materialize.public.t]; +EOF + +query T multiline +SELECT pretty_sql(create_sql) FROM (SHOW REDACTED CREATE MATERIALIZED VIEW mv1); +---- +CREATE MATERIALIZED VIEW materialize.public.mv1 + IN CLUSTER quickstart + WITH (REFRESH = ON COMMIT) + AS SELECT i + i + '' FROM materialize.public.t; +EOF + +query error db error: ERROR: unknown catalog item 'aaaaaaa' +SHOW REDACTED CREATE MATERIALIZED VIEW aaaaaaa; + +query error db error: ERROR: materialize\.public\.v is not a materialized view +SHOW REDACTED CREATE MATERIALIZED VIEW v; + +query error db error: ERROR: materialize\.public\.mv1 is not a view +SHOW REDACTED CREATE VIEW mv1; + +query error Expected end of statement, found TYPE +SHOW CREATE TYPE ty; + +query error Expected end of statement, found TYPE +SHOW REDACTED CREATE TYPE ty; diff --git a/test/sqllogictest/shard_errors.slt b/test/sqllogictest/shard_errors.slt index 44fa86719b773..8e17b62067439 100644 --- a/test/sqllogictest/shard_errors.slt +++ b/test/sqllogictest/shard_errors.slt @@ -41,3 +41,6 @@ INSERT INTO bar VALUES (1); # Make sure we get the error even if we project away all columns. query error division by zero SELECT count(*) FROM baz; + +query error item doesn't exist +INSPECT SHARD 'u666' diff --git a/test/sqllogictest/slt.slt b/test/sqllogictest/slt.slt index eaa07dfe7ac57..4f982d85442f7 100644 --- a/test/sqllogictest/slt.slt +++ b/test/sqllogictest/slt.slt @@ -30,3 +30,49 @@ SELECT * FROM t ORDER BY a 3 3 4 + +query III colnames +SELECT 1 AS "column name with spaces!", 2 AS "nospaces", 3 AS "space again"; +---- +column␠name␠with␠spaces! nospaces space␠again +1 +2 +3 + +mode standard + +query TT +SELECT 'result' || chr(10) || 'with' || chr(10) || 'newline', 'no newline in this one, but there are spaces' +UNION +SELECT 'one' || chr(10) || 'more' || chr(10) || 'row (with spaces)', 'easy' +---- +one⏎more⏎row (with spaces) +easy +result⏎with⏎newline +no newline in this one, but there are spaces + +query T multiline +SELECT 'result' || chr(10) || 'with' || chr(10) || 'newline'; +---- +result +with +newline +EOF + +mode cockroach + +query TT +SELECT 'result' || chr(10) || 'with' || chr(10) || 'newline', 'no newline in this one, but there are spaces' +UNION +SELECT 'one' || chr(10) || 'more' || chr(10) || 'row (with spaces)', 'easy' +---- +one⏎more⏎row␠(with␠spaces) easy +result⏎with⏎newline no␠newline␠in␠this␠one,␠but␠there␠are␠spaces + +query T multiline +SELECT 'result' || chr(10) || 'with' || chr(10) || 'newline'; +---- +result +with +newline +EOF diff --git a/test/sqllogictest/system-cluster.slt b/test/sqllogictest/system-cluster.slt index db2c091f996b7..0488c80a49385 100644 --- a/test/sqllogictest/system-cluster.slt +++ b/test/sqllogictest/system-cluster.slt @@ -496,19 +496,19 @@ Explained Query: Finish order_by=[#0 desc nulls_first] limit=10 output=[#0..=#2] With cte l0 = - Filter (#6 <= 100) AND (#6 >= 0) AND (#3) IS NOT NULL - Map (timestamp_tz_to_mz_timestamp(#0)) - ReadIndex on=mz_internal.mz_source_status_history mz_source_status_history_ind=[lookup value=("u6")] + Project (#0, #3) + Filter (#6 <= 100) AND (#6 >= 0) AND (#3) IS NOT NULL + Map (timestamp_tz_to_mz_timestamp(#0)) + ReadIndex on=mz_internal.mz_source_status_history mz_source_status_history_ind=[lookup value=("u6")] Return Project (#1, #0, #3) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] Reduce group_by=[#1] aggregates=[max((extract_epoch_tstz(#0) * 1000))] - Project (#0, #3) - Get l0 + Get l0 ArrangeBy keys=[[#0]] Reduce group_by=[#0] aggregates=[count(*)] - Project (#3) + Project (#1) Get l0 Used Indexes: diff --git a/test/sqllogictest/timedomain.slt b/test/sqllogictest/timedomain.slt index 90cd38394284c..8c1bbdfbb36b2 100644 --- a/test/sqllogictest/timedomain.slt +++ b/test/sqllogictest/timedomain.slt @@ -223,8 +223,9 @@ Explained Query: Filter (#2) IS NULL AND (#1 = "view") ReadStorage mz_internal.mz_comments cte l1 = - Filter (#2 = "u3") - ReadStorage mz_catalog.mz_views + Project (#0, #3) + Filter (#2 = "u3") + ReadStorage mz_catalog.mz_views Return Project (#0, #2) Map (coalesce(#1, "")) @@ -235,13 +236,12 @@ Explained Query: Project (#1) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] - Project (#0, #3) - Get l1 + Get l1 ArrangeBy keys=[[#0]] Distinct project=[#0] Project (#0) Get l0 - Project (#3) + Project (#1) Get l1 Project (#2, #3) Filter (#1 = "u3") diff --git a/test/sqllogictest/tpch_create_index.slt b/test/sqllogictest/tpch_create_index.slt index 5011520801c70..e1b89815515d4 100644 --- a/test/sqllogictest/tpch_create_index.slt +++ b/test/sqllogictest/tpch_create_index.slt @@ -197,10 +197,10 @@ materialize.public.q01_primary_idx: ReadGlobalFromSameDataflow materialize.public.q01 // { arity: 10 } materialize.public.q01: - Project (#0{l_returnflag}..=#5{sum}, #9{count}..=#11, #6) // { arity: 10 } + Project (#0{l_returnflag}..=#5{sum}, #9..=#11, #6{count}) // { arity: 10 } Map (bigint_to_numeric(case when (#6{count} = 0) then null else #6{count} end), (#2{sum_l_quantity} / #8), (#3{sum_l_extendedprice} / #8), (#7{sum_l_discount} / #8)) // { arity: 12 } Reduce group_by=[#4{l_returnflag}, #5{l_linestatus}] aggregates=[sum(#0{l_quantity}), sum(#1{l_extendedprice}), sum((#1{l_extendedprice} * (1 - #2{l_discount}))), sum(((#1{l_extendedprice} * (1 - #2{l_discount})) * (1 + #3{l_tax}))), count(*), sum(#2{l_discount})] // { arity: 8 } - Project (#4{l_returnflag}..=#9) // { arity: 6 } + Project (#4{l_quantity}..=#9{l_linestatus}) // { arity: 6 } Filter (date_to_timestamp(#10{l_shipdate}) <= 1998-10-02 00:00:00) // { arity: 16 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } @@ -281,7 +281,7 @@ materialize.public.q02: ArrangeBy keys=[[#0{r_regionkey}]] // { arity: 3 } ReadIndex on=region pk_region_regionkey=[delta join lookup] // { arity: 3 } cte l4 = - Project (#0{p_partkey}, #2{s_name}, #10, #11, #13..=#15, #19, #22) // { arity: 9 } + Project (#0{p_partkey}, #2{p_mfgr}, #10{s_name}, #11{s_address}, #13{s_phone}..=#15{s_comment}, #19{ps_supplycost}, #22{n_name}) // { arity: 9 } Filter (#5{p_size} = 15) AND (#26{r_name} = "EUROPE") AND like["%BRASS"](varchar_to_text(#4{p_type})) // { arity: 28 } Join on=(#0{p_partkey} = #16{ps_partkey} AND #9{s_suppkey} = #17{ps_suppkey} AND #12{s_nationkey} = #21{n_nationkey} AND #23{n_regionkey} = #25{r_regionkey}) type=delta // { arity: 28 } implementation @@ -297,7 +297,7 @@ materialize.public.q02: Get l2 // { arity: 4 } Get l3 // { arity: 3 } Return // { arity: 8 } - Project (#5{s_address}, #2{n_name}, #8, #0{s_acctbal}, #1{s_name}, #3{p_partkey}, #4{p_mfgr}, #6{s_phone}) // { arity: 8 } + Project (#5{s_acctbal}, #2{s_name}, #8{n_name}, #0{p_partkey}, #1{p_mfgr}, #3{s_address}, #4{s_phone}, #6{s_comment}) // { arity: 8 } Join on=(#0{p_partkey} = #9{p_partkey} AND #7{ps_supplycost} = #10{min_ps_supplycost}) type=differential // { arity: 11 } implementation %1[#0, #1]UKK » %0:l4[#0, #7]KK @@ -305,7 +305,7 @@ materialize.public.q02: Get l4 // { arity: 9 } ArrangeBy keys=[[#0{p_partkey}, #1{min_ps_supplycost}]] // { arity: 2 } Reduce group_by=[#0{p_partkey}] aggregates=[min(#1{ps_supplycost})] // { arity: 2 } - Project (#0{p_partkey}, #4) // { arity: 2 } + Project (#0{p_partkey}, #4{ps_supplycost}) // { arity: 2 } Filter (#18{r_name} = "EUROPE") // { arity: 20 } Join on=(#0{p_partkey} = #1{ps_partkey} AND #2{ps_suppkey} = #6{s_suppkey} AND #9{s_nationkey} = #13{n_nationkey} AND #15{n_regionkey} = #17{r_regionkey}) type=delta // { arity: 20 } implementation @@ -375,9 +375,9 @@ materialize.public.q03_primary_idx: ReadGlobalFromSameDataflow materialize.public.q03 // { arity: 4 } materialize.public.q03: - Project (#0{o_orderkey}, #3{o_shippriority}, #1{sum}, #2{o_orderdate}) // { arity: 4 } + Project (#0{o_orderkey}, #3{sum}, #1{o_orderdate}, #2{o_shippriority}) // { arity: 4 } Reduce group_by=[#0{o_orderkey}..=#2{o_shippriority}] aggregates=[sum((#3{l_extendedprice} * (1 - #4{l_discount})))] // { arity: 4 } - Project (#8, #12, #15, #22, #23) // { arity: 5 } + Project (#8{o_orderkey}, #12{o_orderdate}, #15{o_shippriority}, #22{l_extendedprice}, #23{l_discount}) // { arity: 5 } Filter (#6{c_mktsegment} = "BUILDING") AND (#12{o_orderdate} < 1995-03-15) AND (#27{l_shipdate} > 1995-03-15) // { arity: 33 } Join on=(#0{c_custkey} = #9{o_custkey} AND #8{o_orderkey} = #17{l_orderkey}) type=delta // { arity: 33 } implementation @@ -439,7 +439,7 @@ materialize.public.q04_primary_idx: materialize.public.q04: Reduce group_by=[#0{o_orderpriority}] aggregates=[count(*)] // { arity: 2 } - Project (#5) // { arity: 1 } + Project (#5{o_orderpriority}) // { arity: 1 } Filter (#4{o_orderdate} >= 1993-07-01) AND (date_to_timestamp(#4{o_orderdate}) < 1993-10-01 00:00:00) // { arity: 10 } Join on=(#0{o_orderkey} = #9{l_orderkey}) type=differential // { arity: 10 } implementation @@ -501,7 +501,7 @@ materialize.public.q05_primary_idx: materialize.public.q05: Reduce group_by=[#2{n_name}] aggregates=[sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 2 } - Project (#19, #20, #24) // { arity: 3 } + Project (#19{l_extendedprice}, #20{l_discount}, #24{n_name}) // { arity: 3 } Filter (#28{r_name} = "ASIA") AND (#12{o_orderdate} < 1995-01-01) AND (#12{o_orderdate} >= 1994-01-01) // { arity: 30 } Join on=(#0{c_custkey} = #9{o_custkey} AND #3{c_nationkey} = #22{s_nationkey} = #23{n_nationkey} AND #8{o_orderkey} = #17{l_orderkey} AND #18{l_suppkey} = #21{s_suppkey} AND #25{n_regionkey} = #27{r_regionkey}) type=delta // { arity: 30 } implementation @@ -516,10 +516,10 @@ materialize.public.q05: ArrangeBy keys=[[#0{o_orderkey}], [#1{o_custkey}]] // { arity: 9 } ReadIndex on=orders pk_orders_orderkey=[delta join lookup] fk_orders_custkey=[delta join lookup] // { arity: 9 } ArrangeBy keys=[[#0{l_orderkey}], [#0{l_orderkey}, #1{l_suppkey}]] // { arity: 4 } - Project (#0{l_orderkey}, #2{l_extendedprice}, #5, #6) // { arity: 4 } + Project (#0{l_orderkey}, #2{l_suppkey}, #5{l_extendedprice}, #6{l_discount}) // { arity: 4 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } ArrangeBy keys=[[#0{s_suppkey}, #1{s_nationkey}]] // { arity: 2 } - Project (#0{s_suppkey}, #3) // { arity: 2 } + Project (#0{s_suppkey}, #3{s_nationkey}) // { arity: 2 } Filter (#0{s_suppkey}) IS NOT NULL // { arity: 7 } ReadIndex on=supplier pk_supplier_suppkey=[*** full scan ***] // { arity: 7 } ArrangeBy keys=[[#0{n_nationkey}], [#2{n_regionkey}]] // { arity: 4 } @@ -570,7 +570,7 @@ materialize.public.q06: With cte l0 = Reduce aggregates=[sum((#0{l_extendedprice} * #1{l_discount}))] // { arity: 1 } - Project (#5, #6) // { arity: 2 } + Project (#5{l_extendedprice}, #6{l_discount}) // { arity: 2 } Filter (#4{l_quantity} < 24) AND (#6{l_discount} <= 0.07) AND (#6{l_discount} >= 0.05) AND (#10{l_shipdate} >= 1994-01-01) AND (date_to_timestamp(#10{l_shipdate}) < 1995-01-01 00:00:00) // { arity: 16 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } Return // { arity: 1 } @@ -652,7 +652,7 @@ materialize.public.q07: ReadIndex on=nation pk_nation_nationkey=[delta join lookup] // { arity: 4 } Return // { arity: 4 } Reduce group_by=[#3{n_name}, #4{n_name}, extract_year_d(#2{l_shipdate})] aggregates=[sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 4 } - Project (#12, #13, #17, #41, #45) // { arity: 5 } + Project (#12{l_extendedprice}, #13{l_discount}, #17{l_shipdate}, #41{n_name}, #45{n_name}) // { arity: 5 } Filter (#17{l_shipdate} <= 1996-12-31) AND (#17{l_shipdate} >= 1995-01-01) AND (#48 OR #49) AND (#50 OR #51) AND ((#48 AND #51) OR (#49 AND #50)) // { arity: 52 } Map ((#41{n_name} = "FRANCE"), (#41{n_name} = "GERMANY"), (#45{n_name} = "FRANCE"), (#45{n_name} = "GERMANY")) // { arity: 52 } Join on=(#0{s_suppkey} = #9{l_suppkey} AND #3{s_nationkey} = #40{n_nationkey} AND #7{l_orderkey} = #23{o_orderkey} AND #24{o_custkey} = #32{c_custkey} AND #35{c_nationkey} = #44{n_nationkey}) type=delta // { arity: 48 } @@ -745,7 +745,7 @@ materialize.public.q08: Project (#0, #3) // { arity: 2 } Map ((#1{sum} / #2{sum})) // { arity: 4 } Reduce group_by=[extract_year_d(#2{o_orderdate})] aggregates=[sum(case when (#3{n_name} = "BRAZIL") then (#0{l_extendedprice} * (1 - #1{l_discount})) else 0 end), sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 3 } - Project (#21, #22, #36, #54) // { arity: 4 } + Project (#21{l_extendedprice}, #22{l_discount}, #36{o_orderdate}, #54{n_name}) // { arity: 4 } Filter (#58{r_name} = "AMERICA") AND (#36{o_orderdate} <= 1996-12-31) AND (#36{o_orderdate} >= 1995-01-01) AND ("ECONOMY ANODIZED STEEL" = varchar_to_text(#4{p_type})) // { arity: 60 } Join on=(#0{p_partkey} = #17{l_partkey} AND #9{s_suppkey} = #18{l_suppkey} AND #12{s_nationkey} = #53{n_nationkey} AND #16{l_orderkey} = #32{o_orderkey} AND #33{o_custkey} = #41{c_custkey} AND #44{c_nationkey} = #49{n_nationkey} AND #51{n_regionkey} = #57{r_regionkey}) type=delta // { arity: 60 } implementation @@ -842,7 +842,7 @@ materialize.public.q09_primary_idx: materialize.public.q09: Reduce group_by=[#5{n_name}, extract_year_d(#4{o_orderdate})] aggregates=[sum(((#1{l_extendedprice} * (1 - #2{l_discount})) - (#3{ps_supplycost} * #0{l_quantity})))] // { arity: 3 } - Project (#20..=#22, #35, #41, #47) // { arity: 6 } + Project (#20{l_quantity}..=#22{l_discount}, #35{ps_supplycost}, #41{o_orderdate}, #47{n_name}) // { arity: 6 } Filter like["%green%"](varchar_to_text(#1{p_name})) // { arity: 50 } Join on=(#0{p_partkey} = #17{l_partkey} = #32{ps_partkey} AND #9{s_suppkey} = #18{l_suppkey} = #33{ps_suppkey} AND #12{s_nationkey} = #46{n_nationkey} AND #16{l_orderkey} = #37{o_orderkey}) type=delta // { arity: 50 } implementation @@ -929,9 +929,9 @@ materialize.public.q10_primary_idx: ReadGlobalFromSameDataflow materialize.public.q10 // { arity: 8 } materialize.public.q10: - Project (#0{c_custkey}, #1{c_name}, #7{c_comment}, #2{sum}, #4{n_name}, #5{c_address}, #3{c_acctbal}, #6{c_phone}) // { arity: 8 } + Project (#0{c_custkey}, #1{c_name}, #7{sum}, #2{c_acctbal}, #4{n_name}, #5{c_address}, #3{c_phone}, #6{c_comment}) // { arity: 8 } Reduce group_by=[#0{c_custkey}, #1{c_name}, #4{c_acctbal}, #3{c_phone}, #8{n_name}, #2{c_address}, #5{c_comment}] aggregates=[sum((#6{l_extendedprice} * (1 - #7{l_discount})))] // { arity: 8 } - Project (#0{c_custkey}..=#2{c_address}, #4{c_acctbal}, #5{c_comment}, #7{l_discount}, #22, #23, #34) // { arity: 9 } + Project (#0{c_custkey}..=#2{c_address}, #4{c_phone}, #5{c_acctbal}, #7{c_comment}, #22{l_extendedprice}, #23{l_discount}, #34{n_name}) // { arity: 9 } Filter (#25{l_returnflag} = "R") AND (#12{o_orderdate} < 1994-01-01) AND (#12{o_orderdate} >= 1993-10-01) AND (date_to_timestamp(#12{o_orderdate}) < 1994-01-01 00:00:00) // { arity: 37 } Join on=(#0{c_custkey} = #9{o_custkey} AND #3{c_nationkey} = #33{n_nationkey} AND #8{o_orderkey} = #17{l_orderkey}) type=delta // { arity: 37 } implementation @@ -1005,7 +1005,7 @@ materialize.public.q11_primary_idx: materialize.public.q11: With cte l0 = - Project (#0{ps_partkey}, #2{ps_supplycost}, #3) // { arity: 3 } + Project (#0{ps_partkey}, #2{ps_availqty}, #3{ps_supplycost}) // { arity: 3 } Filter (#13{n_name} = "GERMANY") // { arity: 16 } Join on=(#1{ps_suppkey} = #5{s_suppkey} AND #8{s_nationkey} = #12{n_nationkey}) type=delta // { arity: 16 } implementation @@ -1029,7 +1029,7 @@ materialize.public.q11: Get l0 // { arity: 3 } ArrangeBy keys=[[]] // { arity: 1 } Reduce aggregates=[sum((#1{ps_supplycost} * integer_to_numeric(#0{ps_availqty})))] // { arity: 1 } - Project (#1{ps_supplycost}, #2) // { arity: 2 } + Project (#1{ps_availqty}, #2{ps_supplycost}) // { arity: 2 } Get l0 // { arity: 3 } Used Indexes: @@ -1087,7 +1087,7 @@ materialize.public.q12_primary_idx: materialize.public.q12: Reduce group_by=[#1{l_shipmode}] aggregates=[sum(case when ((#0{o_orderpriority} = "2-HIGH") OR (#0{o_orderpriority} = "1-URGENT")) then 1 else 0 end), sum(case when ((#0{o_orderpriority} != "2-HIGH") AND (#0{o_orderpriority} != "1-URGENT")) then 1 else 0 end)] // { arity: 3 } - Project (#5, #23) // { arity: 2 } + Project (#5{o_orderpriority}, #23{l_shipmode}) // { arity: 2 } Filter (#21{l_receiptdate} >= 1994-01-01) AND (#19{l_shipdate} < #20{l_commitdate}) AND (#20{l_commitdate} < #21{l_receiptdate}) AND (date_to_timestamp(#21{l_receiptdate}) < 1995-01-01 00:00:00) AND ((#23{l_shipmode} = "MAIL") OR (#23{l_shipmode} = "SHIP")) // { arity: 25 } Join on=(#0{o_orderkey} = #9{l_orderkey}) type=differential // { arity: 25 } implementation @@ -1146,7 +1146,7 @@ materialize.public.q13: ArrangeBy keys=[[#0{c_custkey}]] // { arity: 8 } ReadIndex on=customer pk_customer_custkey=[differential join] // { arity: 8 } cte l1 = - Project (#0{c_custkey}, #8) // { arity: 2 } + Project (#0{c_custkey}, #8{o_orderkey}) // { arity: 2 } Filter NOT(like["%special%requests%"](varchar_to_text(#16{o_comment}))) // { arity: 17 } Join on=(#0{c_custkey} = #9{o_custkey}) type=differential // { arity: 17 } implementation @@ -1156,7 +1156,7 @@ materialize.public.q13: ReadIndex on=orders fk_orders_custkey=[differential join] // { arity: 9 } Return // { arity: 2 } Reduce group_by=[#0{count_o_orderkey}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{count_o_orderkey}) // { arity: 1 } Reduce group_by=[#0{c_custkey}] aggregates=[count(#1{o_orderkey})] // { arity: 2 } Union // { arity: 2 } Map (null) // { arity: 2 } @@ -1215,7 +1215,7 @@ materialize.public.q14: With cte l0 = Reduce aggregates=[sum(case when like["PROMO%"](varchar_to_text(#2{p_type})) then (#0{l_extendedprice} * (1 - #1{l_discount})) else 0 end), sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 2 } - Project (#5, #6, #20) // { arity: 3 } + Project (#5{l_extendedprice}, #6{l_discount}, #20{p_type}) // { arity: 3 } Filter (#10{l_shipdate} >= 1995-09-01) AND (date_to_timestamp(#10{l_shipdate}) < 1995-10-01 00:00:00) // { arity: 25 } Join on=(#1{l_partkey} = #16{p_partkey}) type=differential // { arity: 25 } implementation @@ -1296,11 +1296,11 @@ materialize.public.q15: With cte l0 = Reduce group_by=[#0{l_suppkey}] aggregates=[sum((#1{l_extendedprice} * (1 - #2{l_discount})))] // { arity: 2 } - Project (#2{l_discount}, #5, #6) // { arity: 3 } + Project (#2{l_suppkey}, #5{l_extendedprice}, #6{l_discount}) // { arity: 3 } Filter (#10{l_shipdate} >= 1996-01-01) AND (date_to_timestamp(#10{l_shipdate}) < 1996-04-01 00:00:00) // { arity: 16 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } Return // { arity: 5 } - Project (#0{s_suppkey}..=#2{s_address}, #4{sum}, #8) // { arity: 5 } + Project (#0{s_suppkey}..=#2{s_address}, #4{s_phone}, #8{sum}) // { arity: 5 } Join on=(#0{s_suppkey} = #7{l_suppkey} AND #8{sum} = #9{max_sum}) type=delta // { arity: 10 } implementation %0:supplier » %1:l0[#0]UKA » %2[#0]UK @@ -1312,7 +1312,7 @@ materialize.public.q15: Get l0 // { arity: 2 } ArrangeBy keys=[[#0{max_sum}]] // { arity: 1 } Reduce aggregates=[max(#0{sum})] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{sum}) // { arity: 1 } Get l0 // { arity: 2 } Used Indexes: @@ -1371,7 +1371,7 @@ materialize.public.q16_primary_idx: materialize.public.q16: With cte l0 = - Project (#1{p_brand}, #8..=#10) // { arity: 4 } + Project (#1{ps_suppkey}, #8{p_brand}..=#10{p_size}) // { arity: 4 } Filter (#8{p_brand} != "Brand#45") AND NOT(like["MEDIUM POLISHED%"](varchar_to_text(#9{p_type}))) AND ((#10{p_size} = 3) OR (#10{p_size} = 9) OR (#10{p_size} = 14) OR (#10{p_size} = 19) OR (#10{p_size} = 23) OR (#10{p_size} = 36) OR (#10{p_size} = 45) OR (#10{p_size} = 49)) // { arity: 14 } Join on=(#0{ps_partkey} = #5{p_partkey}) type=differential // { arity: 14 } implementation @@ -1456,7 +1456,7 @@ materialize.public.q17: ArrangeBy keys=[[#1{l_partkey}]] // { arity: 16 } ReadIndex on=lineitem fk_lineitem_partkey=[differential join] // { arity: 16 } cte l1 = - Project (#1{l_quantity}, #4, #5) // { arity: 3 } + Project (#1{l_partkey}, #4{l_quantity}, #5{l_extendedprice}) // { arity: 3 } Filter (#19{p_brand} = "Brand#23") AND (#22{p_container} = "MED BOX") // { arity: 25 } Join on=(#1{l_partkey} = #16{p_partkey}) type=differential // { arity: 25 } implementation @@ -1466,7 +1466,7 @@ materialize.public.q17: ReadIndex on=part pk_part_partkey=[differential join] // { arity: 9 } cte l2 = Reduce aggregates=[sum(#0{l_extendedprice})] // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{l_extendedprice}) // { arity: 1 } Filter (#1{l_quantity} < (0.2 * (#4{sum_l_quantity} / bigint_to_numeric(case when (#5{count} = 0) then null else #5{count} end)))) // { arity: 6 } Join on=(#0{l_partkey} = #3{l_partkey}) type=differential // { arity: 6 } implementation @@ -1475,7 +1475,7 @@ materialize.public.q17: Get l1 // { arity: 3 } ArrangeBy keys=[[#0{l_partkey}]] // { arity: 3 } Reduce group_by=[#0{l_partkey}] aggregates=[sum(#1{l_quantity}), count(*)] // { arity: 3 } - Project (#0{l_partkey}, #5) // { arity: 2 } + Project (#0{l_partkey}, #5{l_quantity}) // { arity: 2 } Join on=(#0{l_partkey} = #2{l_partkey}) type=differential // { arity: 17 } implementation %0[#0]UKA » %1:l0[#1]KA @@ -1558,7 +1558,7 @@ materialize.public.q18: ArrangeBy keys=[[#0{l_orderkey}]] // { arity: 16 } ReadIndex on=lineitem fk_lineitem_orderkey=[differential join, delta join lookup] // { arity: 16 } cte l1 = - Project (#0{c_custkey}, #1{c_name}, #8, #11, #12, #21) // { arity: 6 } + Project (#0{c_custkey}, #1{c_name}, #8{o_orderkey}, #11{o_totalprice}, #12{o_orderdate}, #21{l_quantity}) // { arity: 6 } Join on=(#0{c_custkey} = #9{o_custkey} AND #8{o_orderkey} = #17{l_orderkey}) type=delta // { arity: 33 } implementation %0:customer » %1:orders[#1]KA » %2:l0[#0]KA @@ -1580,13 +1580,13 @@ materialize.public.q18: Get l1 // { arity: 6 } ArrangeBy keys=[[#0{o_orderkey}]] // { arity: 2 } Reduce group_by=[#0{o_orderkey}] aggregates=[sum(#1{l_quantity})] // { arity: 2 } - Project (#0{o_orderkey}, #5) // { arity: 2 } + Project (#0{o_orderkey}, #5{l_quantity}) // { arity: 2 } Join on=(#0{o_orderkey} = #1{l_orderkey}) type=differential // { arity: 17 } implementation %0[#0]UKA » %1:l0[#0]KA ArrangeBy keys=[[#0{o_orderkey}]] // { arity: 1 } Distinct project=[#0{o_orderkey}] // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{o_orderkey}) // { arity: 1 } Get l1 // { arity: 6 } Get l0 // { arity: 16 } @@ -1654,7 +1654,7 @@ materialize.public.q19: With cte l0 = Reduce aggregates=[sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 1 } - Project (#5, #6) // { arity: 2 } + Project (#5{l_extendedprice}, #6{l_discount}) // { arity: 2 } Filter (#13{l_shipinstruct} = "DELIVER IN PERSON") AND (#21{p_size} >= 1) AND ((#14{l_shipmode} = "AIR") OR (#14{l_shipmode} = "AIR REG")) AND ((#25 AND #26) OR (#27 AND #28) OR (#29 AND #30)) AND ((#31 AND #32 AND #33) OR (#34 AND #35 AND #36) OR (#37 AND #38 AND #39)) AND ((#25 AND #26 AND #34 AND #35 AND #36) OR (#27 AND #28 AND #37 AND #38 AND #39) OR (#29 AND #30 AND #31 AND #32 AND #33)) // { arity: 40 } Map ((#4{l_quantity} <= 20), (#4{l_quantity} >= 10), (#4{l_quantity} <= 30), (#4{l_quantity} >= 20), (#4{l_quantity} <= 11), (#4{l_quantity} >= 1), (#19{p_brand} = "Brand#12"), (#21{p_size} <= 5), ((#22{p_container} = "SM BOX") OR (#22{p_container} = "SM PKG") OR (#22{p_container} = "SM CASE") OR (#22{p_container} = "SM PACK")), (#19{p_brand} = "Brand#23"), (#21{p_size} <= 10), ((#22{p_container} = "MED BAG") OR (#22{p_container} = "MED BOX") OR (#22{p_container} = "MED PKG") OR (#22{p_container} = "MED PACK")), (#19{p_brand} = "Brand#34"), (#21{p_size} <= 15), ((#22{p_container} = "LG BOX") OR (#22{p_container} = "LG PKG") OR (#22{p_container} = "LG CASE") OR (#22{p_container} = "LG PACK"))) // { arity: 40 } Join on=(#1{l_partkey} = #16{p_partkey}) type=differential // { arity: 25 } @@ -1767,7 +1767,7 @@ materialize.public.q20: Filter (#0{p_partkey}) IS NOT NULL AND like["forest%"](varchar_to_text(#1{p_name})) // { arity: 9 } ReadIndex on=part pk_part_partkey=[*** full scan ***] // { arity: 9 } Return // { arity: 2 } - Project (#1{s_address}, #2) // { arity: 2 } + Project (#1{s_name}, #2{s_address}) // { arity: 2 } Join on=(#0{s_suppkey} = #3{s_suppkey}) type=differential // { arity: 4 } implementation %1[#0]UKA » %0:l0[#0]K @@ -1781,21 +1781,21 @@ materialize.public.q20: implementation %1[#1, #0]UKK » %0:l1[#0, #1]KKf ArrangeBy keys=[[#0{s_suppkey}, #1{ps_partkey}]] // { arity: 3 } - Project (#0{s_suppkey}, #1{ps_partkey}, #3) // { arity: 3 } + Project (#0{s_suppkey}, #1{ps_partkey}, #3{ps_availqty}) // { arity: 3 } Filter (#0{s_suppkey} = #2{ps_suppkey}) // { arity: 4 } Get l1 // { arity: 4 } ArrangeBy keys=[[#1{ps_suppkey}, #0{ps_partkey}]] // { arity: 3 } Project (#0{ps_partkey}, #1{ps_suppkey}, #3) // { arity: 3 } Map ((0.5 * #2{sum_l_quantity})) // { arity: 4 } Reduce group_by=[#0{ps_partkey}, #1{ps_suppkey}] aggregates=[sum(#2{l_quantity})] // { arity: 3 } - Project (#0{ps_partkey}, #1{ps_suppkey}, #6) // { arity: 3 } + Project (#0{ps_partkey}, #1{ps_suppkey}, #6{l_quantity}) // { arity: 3 } Filter (#12{l_shipdate} >= 1995-01-01) AND (date_to_timestamp(#12{l_shipdate}) < 1996-01-01 00:00:00) // { arity: 18 } Join on=(#0{ps_partkey} = #3{l_partkey} AND #1{ps_suppkey} = #4{l_suppkey}) type=differential // { arity: 18 } implementation %0[#0, #1]UKKA » %1:lineitem[#1, #2]KKAiif ArrangeBy keys=[[#0{ps_partkey}, #1{ps_suppkey}]] // { arity: 2 } Distinct project=[#0{ps_partkey}, #1{ps_suppkey}] // { arity: 2 } - Project (#1{ps_suppkey}, #2) // { arity: 2 } + Project (#1{ps_partkey}, #2{ps_suppkey}) // { arity: 2 } Get l1 // { arity: 4 } ArrangeBy keys=[[#1{l_partkey}, #2{l_suppkey}]] // { arity: 16 } ReadIndex on=lineitem fk_lineitem_partsuppkey=[differential join] // { arity: 16 } @@ -1868,7 +1868,7 @@ materialize.public.q21_primary_idx: materialize.public.q21: With cte l0 = - Project (#0{s_suppkey}, #1{s_name}, #7) // { arity: 3 } + Project (#0{s_suppkey}, #1{s_name}, #7{l_orderkey}) // { arity: 3 } Filter (#25{o_orderstatus} = "F") AND (#33{n_name} = "SAUDI ARABIA") AND (#19{l_receiptdate} > #18{l_commitdate}) // { arity: 36 } Join on=(#0{s_suppkey} = #9{l_suppkey} AND #3{s_nationkey} = #32{n_nationkey} AND #7{l_orderkey} = #23{o_orderkey}) type=delta // { arity: 36 } implementation @@ -1903,16 +1903,16 @@ materialize.public.q21: %1:l1[#0]KA » %0[#0]K ArrangeBy keys=[[#0{l_orderkey}]] // { arity: 2 } Distinct project=[#1{l_orderkey}, #0{s_suppkey}] // { arity: 2 } - Project (#0{s_suppkey}, #2) // { arity: 2 } + Project (#0{s_suppkey}, #2{l_orderkey}) // { arity: 2 } Get l0 // { arity: 3 } Get l1 // { arity: 16 } cte l3 = Distinct project=[#1{l_orderkey}, #0{s_suppkey}] // { arity: 2 } - Project (#0{s_suppkey}, #2) // { arity: 2 } + Project (#0{s_suppkey}, #2{l_orderkey}) // { arity: 2 } Get l2 // { arity: 3 } Return // { arity: 2 } Reduce group_by=[#0{s_name}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{s_name}) // { arity: 1 } Join on=(#0{s_suppkey} = #4{s_suppkey} AND #2{l_orderkey} = #3{l_orderkey}) type=differential // { arity: 5 } implementation %0:l2[#2, #0]KK » %1[#0, #1]KK @@ -2009,8 +2009,9 @@ materialize.public.q22_primary_idx: materialize.public.q22: With cte l0 = - Map (substr(char_to_text(#4{c_phone}), 1, 2)) // { arity: 9 } - ReadIndex on=customer pk_customer_custkey=[*** full scan ***] // { arity: 8 } + Project (#0{c_custkey}, #4{c_phone}, #5{c_acctbal}, #8) // { arity: 4 } + Map (substr(char_to_text(#4{c_phone}), 1, 2)) // { arity: 9 } + ReadIndex on=customer pk_customer_custkey=[*** full scan ***] // { arity: 8 } cte l1 = Project (#0{c_custkey}..=#2{c_acctbal}) // { arity: 3 } Filter (#2{c_acctbal} > (#3{sum_c_acctbal} / bigint_to_numeric(case when (#4{count} = 0) then null else #4{count} end))) // { arity: 5 } @@ -2018,21 +2019,21 @@ materialize.public.q22: implementation %1[×]UA » %0:l0[×]ef ArrangeBy keys=[[]] // { arity: 3 } - Project (#0{c_custkey}, #4, #5) // { arity: 3 } - Filter ((#8 = "13") OR (#8 = "17") OR (#8 = "18") OR (#8 = "23") OR (#8 = "29") OR (#8 = "30") OR (#8 = "31")) // { arity: 9 } - Get l0 // { arity: 9 } + Project (#0{c_custkey}..=#2{c_acctbal}) // { arity: 3 } + Filter ((#3 = "13") OR (#3 = "17") OR (#3 = "18") OR (#3 = "23") OR (#3 = "29") OR (#3 = "30") OR (#3 = "31")) // { arity: 4 } + Get l0 // { arity: 4 } ArrangeBy keys=[[]] // { arity: 2 } Reduce aggregates=[sum(#0{c_acctbal}), count(*)] // { arity: 2 } - Project (#5) // { arity: 1 } - Filter (#5{c_acctbal} > 0) AND ((#8 = "13") OR (#8 = "17") OR (#8 = "18") OR (#8 = "23") OR (#8 = "29") OR (#8 = "30") OR (#8 = "31")) // { arity: 9 } - Get l0 // { arity: 9 } + Project (#2{c_acctbal}) // { arity: 1 } + Filter (#2{c_acctbal} > 0) AND ((#3 = "13") OR (#3 = "17") OR (#3 = "18") OR (#3 = "23") OR (#3 = "29") OR (#3 = "30") OR (#3 = "31")) // { arity: 4 } + Get l0 // { arity: 4 } cte l2 = Distinct project=[#0{c_custkey}] // { arity: 1 } Project (#0{c_custkey}) // { arity: 1 } Get l1 // { arity: 3 } Return // { arity: 3 } Reduce group_by=[substr(char_to_text(#0{c_phone}), 1, 2)] aggregates=[count(*), sum(#1{c_acctbal})] // { arity: 3 } - Project (#1{c_acctbal}, #2) // { arity: 2 } + Project (#1{c_phone}, #2{c_acctbal}) // { arity: 2 } Join on=(#0{c_custkey} = #3{c_custkey}) type=differential // { arity: 4 } implementation %0:l1[#0]K » %1[#0]K @@ -2049,7 +2050,7 @@ materialize.public.q22: Get l2 // { arity: 1 } ArrangeBy keys=[[#0{o_custkey}]] // { arity: 1 } Distinct project=[#0{o_custkey}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{o_custkey}) // { arity: 1 } ReadIndex on=orders pk_orders_orderkey=[*** full scan ***] // { arity: 9 } Get l2 // { arity: 1 } diff --git a/test/sqllogictest/tpch_create_materialized_view.slt b/test/sqllogictest/tpch_create_materialized_view.slt index 39b450388e451..c02148000a028 100644 --- a/test/sqllogictest/tpch_create_materialized_view.slt +++ b/test/sqllogictest/tpch_create_materialized_view.slt @@ -191,10 +191,10 @@ ORDER BY l_linestatus; ---- materialize.public.q01: - Project (#0{l_returnflag}..=#5{sum}, #9{count}..=#11, #6) // { arity: 10 } + Project (#0{l_returnflag}..=#5{sum}, #9..=#11, #6{count}) // { arity: 10 } Map (bigint_to_numeric(case when (#6{count} = 0) then null else #6{count} end), (#2{sum_l_quantity} / #8), (#3{sum_l_extendedprice} / #8), (#7{sum_l_discount} / #8)) // { arity: 12 } Reduce group_by=[#4{l_returnflag}, #5{l_linestatus}] aggregates=[sum(#0{l_quantity}), sum(#1{l_extendedprice}), sum((#1{l_extendedprice} * (1 - #2{l_discount}))), sum(((#1{l_extendedprice} * (1 - #2{l_discount})) * (1 + #3{l_tax}))), count(*), sum(#2{l_discount})] // { arity: 8 } - Project (#4{l_returnflag}..=#9) // { arity: 6 } + Project (#4{l_quantity}..=#9{l_linestatus}) // { arity: 6 } Filter (date_to_timestamp(#10{l_shipdate}) <= 1998-10-02 00:00:00) // { arity: 16 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } @@ -260,7 +260,7 @@ materialize.public.q02: ArrangeBy keys=[[#0{r_regionkey}]] // { arity: 3 } ReadIndex on=region pk_region_regionkey=[delta join lookup] // { arity: 3 } cte l4 = - Project (#0{p_partkey}, #2{s_name}, #10, #11, #13..=#15, #19, #22) // { arity: 9 } + Project (#0{p_partkey}, #2{p_mfgr}, #10{s_name}, #11{s_address}, #13{s_phone}..=#15{s_comment}, #19{ps_supplycost}, #22{n_name}) // { arity: 9 } Filter (#5{p_size} = 15) AND (#26{r_name} = "EUROPE") AND like["%BRASS"](varchar_to_text(#4{p_type})) // { arity: 28 } Join on=(#0{p_partkey} = #16{ps_partkey} AND #9{s_suppkey} = #17{ps_suppkey} AND #12{s_nationkey} = #21{n_nationkey} AND #23{n_regionkey} = #25{r_regionkey}) type=delta // { arity: 28 } implementation @@ -276,7 +276,7 @@ materialize.public.q02: Get l2 // { arity: 4 } Get l3 // { arity: 3 } Return // { arity: 8 } - Project (#5{s_address}, #2{n_name}, #8, #0{s_acctbal}, #1{s_name}, #3{p_partkey}, #4{p_mfgr}, #6{s_phone}) // { arity: 8 } + Project (#5{s_acctbal}, #2{s_name}, #8{n_name}, #0{p_partkey}, #1{p_mfgr}, #3{s_address}, #4{s_phone}, #6{s_comment}) // { arity: 8 } Join on=(#0{p_partkey} = #9{p_partkey} AND #7{ps_supplycost} = #10{min_ps_supplycost}) type=differential // { arity: 11 } implementation %1[#0, #1]UKK » %0:l4[#0, #7]KK @@ -284,7 +284,7 @@ materialize.public.q02: Get l4 // { arity: 9 } ArrangeBy keys=[[#0{p_partkey}, #1{min_ps_supplycost}]] // { arity: 2 } Reduce group_by=[#0{p_partkey}] aggregates=[min(#1{ps_supplycost})] // { arity: 2 } - Project (#0{p_partkey}, #4) // { arity: 2 } + Project (#0{p_partkey}, #4{ps_supplycost}) // { arity: 2 } Filter (#18{r_name} = "EUROPE") // { arity: 20 } Join on=(#0{p_partkey} = #1{ps_partkey} AND #2{ps_suppkey} = #6{s_suppkey} AND #9{s_nationkey} = #13{n_nationkey} AND #15{n_regionkey} = #17{r_regionkey}) type=delta // { arity: 20 } implementation @@ -345,9 +345,9 @@ ORDER BY o_orderdate; ---- materialize.public.q03: - Project (#0{o_orderkey}, #3{o_shippriority}, #1{sum}, #2{o_orderdate}) // { arity: 4 } + Project (#0{o_orderkey}, #3{sum}, #1{o_orderdate}, #2{o_shippriority}) // { arity: 4 } Reduce group_by=[#0{o_orderkey}..=#2{o_shippriority}] aggregates=[sum((#3{l_extendedprice} * (1 - #4{l_discount})))] // { arity: 4 } - Project (#8, #12, #15, #22, #23) // { arity: 5 } + Project (#8{o_orderkey}, #12{o_orderdate}, #15{o_shippriority}, #22{l_extendedprice}, #23{l_discount}) // { arity: 5 } Filter (#6{c_mktsegment} = "BUILDING") AND (#12{o_orderdate} < 1995-03-15) AND (#27{l_shipdate} > 1995-03-15) // { arity: 33 } Join on=(#0{c_custkey} = #9{o_custkey} AND #8{o_orderkey} = #17{l_orderkey}) type=delta // { arity: 33 } implementation @@ -400,7 +400,7 @@ ORDER BY ---- materialize.public.q04: Reduce group_by=[#0{o_orderpriority}] aggregates=[count(*)] // { arity: 2 } - Project (#5) // { arity: 1 } + Project (#5{o_orderpriority}) // { arity: 1 } Filter (#4{o_orderdate} >= 1993-07-01) AND (date_to_timestamp(#4{o_orderdate}) < 1993-10-01 00:00:00) // { arity: 10 } Join on=(#0{o_orderkey} = #9{l_orderkey}) type=differential // { arity: 10 } implementation @@ -453,7 +453,7 @@ ORDER BY ---- materialize.public.q05: Reduce group_by=[#2{n_name}] aggregates=[sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 2 } - Project (#19, #20, #24) // { arity: 3 } + Project (#19{l_extendedprice}, #20{l_discount}, #24{n_name}) // { arity: 3 } Filter (#28{r_name} = "ASIA") AND (#12{o_orderdate} < 1995-01-01) AND (#12{o_orderdate} >= 1994-01-01) // { arity: 30 } Join on=(#0{c_custkey} = #9{o_custkey} AND #3{c_nationkey} = #22{s_nationkey} = #23{n_nationkey} AND #8{o_orderkey} = #17{l_orderkey} AND #18{l_suppkey} = #21{s_suppkey} AND #25{n_regionkey} = #27{r_regionkey}) type=delta // { arity: 30 } implementation @@ -468,10 +468,10 @@ materialize.public.q05: ArrangeBy keys=[[#0{o_orderkey}], [#1{o_custkey}]] // { arity: 9 } ReadIndex on=orders pk_orders_orderkey=[delta join lookup] fk_orders_custkey=[delta join lookup] // { arity: 9 } ArrangeBy keys=[[#0{l_orderkey}], [#0{l_orderkey}, #1{l_suppkey}]] // { arity: 4 } - Project (#0{l_orderkey}, #2{l_extendedprice}, #5, #6) // { arity: 4 } + Project (#0{l_orderkey}, #2{l_suppkey}, #5{l_extendedprice}, #6{l_discount}) // { arity: 4 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } ArrangeBy keys=[[#0{s_suppkey}, #1{s_nationkey}]] // { arity: 2 } - Project (#0{s_suppkey}, #3) // { arity: 2 } + Project (#0{s_suppkey}, #3{s_nationkey}) // { arity: 2 } Filter (#0{s_suppkey}) IS NOT NULL // { arity: 7 } ReadIndex on=supplier pk_supplier_suppkey=[*** full scan ***] // { arity: 7 } ArrangeBy keys=[[#0{n_nationkey}], [#2{n_regionkey}]] // { arity: 4 } @@ -513,7 +513,7 @@ materialize.public.q06: With cte l0 = Reduce aggregates=[sum((#0{l_extendedprice} * #1{l_discount}))] // { arity: 1 } - Project (#5, #6) // { arity: 2 } + Project (#5{l_extendedprice}, #6{l_discount}) // { arity: 2 } Filter (#4{l_quantity} < 24) AND (#6{l_discount} <= 0.07) AND (#6{l_discount} >= 0.05) AND (#10{l_shipdate} >= 1994-01-01) AND (date_to_timestamp(#10{l_shipdate}) < 1995-01-01 00:00:00) // { arity: 16 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } Return // { arity: 1 } @@ -586,7 +586,7 @@ materialize.public.q07: ReadIndex on=nation pk_nation_nationkey=[delta join lookup] // { arity: 4 } Return // { arity: 4 } Reduce group_by=[#3{n_name}, #4{n_name}, extract_year_d(#2{l_shipdate})] aggregates=[sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 4 } - Project (#12, #13, #17, #41, #45) // { arity: 5 } + Project (#12{l_extendedprice}, #13{l_discount}, #17{l_shipdate}, #41{n_name}, #45{n_name}) // { arity: 5 } Filter (#17{l_shipdate} <= 1996-12-31) AND (#17{l_shipdate} >= 1995-01-01) AND (#48 OR #49) AND (#50 OR #51) AND ((#48 AND #51) OR (#49 AND #50)) // { arity: 52 } Map ((#41{n_name} = "FRANCE"), (#41{n_name} = "GERMANY"), (#45{n_name} = "FRANCE"), (#45{n_name} = "GERMANY")) // { arity: 52 } Join on=(#0{s_suppkey} = #9{l_suppkey} AND #3{s_nationkey} = #40{n_nationkey} AND #7{l_orderkey} = #23{o_orderkey} AND #24{o_custkey} = #32{c_custkey} AND #35{c_nationkey} = #44{n_nationkey}) type=delta // { arity: 48 } @@ -670,7 +670,7 @@ materialize.public.q08: Project (#0, #3) // { arity: 2 } Map ((#1{sum} / #2{sum})) // { arity: 4 } Reduce group_by=[extract_year_d(#2{o_orderdate})] aggregates=[sum(case when (#3{n_name} = "BRAZIL") then (#0{l_extendedprice} * (1 - #1{l_discount})) else 0 end), sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 3 } - Project (#21, #22, #36, #54) // { arity: 4 } + Project (#21{l_extendedprice}, #22{l_discount}, #36{o_orderdate}, #54{n_name}) // { arity: 4 } Filter (#58{r_name} = "AMERICA") AND (#36{o_orderdate} <= 1996-12-31) AND (#36{o_orderdate} >= 1995-01-01) AND ("ECONOMY ANODIZED STEEL" = varchar_to_text(#4{p_type})) // { arity: 60 } Join on=(#0{p_partkey} = #17{l_partkey} AND #9{s_suppkey} = #18{l_suppkey} AND #12{s_nationkey} = #53{n_nationkey} AND #16{l_orderkey} = #32{o_orderkey} AND #33{o_custkey} = #41{c_custkey} AND #44{c_nationkey} = #49{n_nationkey} AND #51{n_regionkey} = #57{r_regionkey}) type=delta // { arity: 60 } implementation @@ -758,7 +758,7 @@ ORDER BY ---- materialize.public.q09: Reduce group_by=[#5{n_name}, extract_year_d(#4{o_orderdate})] aggregates=[sum(((#1{l_extendedprice} * (1 - #2{l_discount})) - (#3{ps_supplycost} * #0{l_quantity})))] // { arity: 3 } - Project (#20..=#22, #35, #41, #47) // { arity: 6 } + Project (#20{l_quantity}..=#22{l_discount}, #35{ps_supplycost}, #41{o_orderdate}, #47{n_name}) // { arity: 6 } Filter like["%green%"](varchar_to_text(#1{p_name})) // { arity: 50 } Join on=(#0{p_partkey} = #17{l_partkey} = #32{ps_partkey} AND #9{s_suppkey} = #18{l_suppkey} = #33{ps_suppkey} AND #12{s_nationkey} = #46{n_nationkey} AND #16{l_orderkey} = #37{o_orderkey}) type=delta // { arity: 50 } implementation @@ -836,9 +836,9 @@ ORDER BY revenue DESC; ---- materialize.public.q10: - Project (#0{c_custkey}, #1{c_name}, #7{c_comment}, #2{sum}, #4{n_name}, #5{c_address}, #3{c_acctbal}, #6{c_phone}) // { arity: 8 } + Project (#0{c_custkey}, #1{c_name}, #7{sum}, #2{c_acctbal}, #4{n_name}, #5{c_address}, #3{c_phone}, #6{c_comment}) // { arity: 8 } Reduce group_by=[#0{c_custkey}, #1{c_name}, #4{c_acctbal}, #3{c_phone}, #8{n_name}, #2{c_address}, #5{c_comment}] aggregates=[sum((#6{l_extendedprice} * (1 - #7{l_discount})))] // { arity: 8 } - Project (#0{c_custkey}..=#2{c_address}, #4{c_acctbal}, #5{c_comment}, #7{l_discount}, #22, #23, #34) // { arity: 9 } + Project (#0{c_custkey}..=#2{c_address}, #4{c_phone}, #5{c_acctbal}, #7{c_comment}, #22{l_extendedprice}, #23{l_discount}, #34{n_name}) // { arity: 9 } Filter (#25{l_returnflag} = "R") AND (#12{o_orderdate} < 1994-01-01) AND (#12{o_orderdate} >= 1993-10-01) AND (date_to_timestamp(#12{o_orderdate}) < 1994-01-01 00:00:00) // { arity: 37 } Join on=(#0{c_custkey} = #9{o_custkey} AND #3{c_nationkey} = #33{n_nationkey} AND #8{o_orderkey} = #17{l_orderkey}) type=delta // { arity: 37 } implementation @@ -903,7 +903,7 @@ ORDER BY materialize.public.q11: With cte l0 = - Project (#0{ps_partkey}, #2{ps_supplycost}, #3) // { arity: 3 } + Project (#0{ps_partkey}, #2{ps_availqty}, #3{ps_supplycost}) // { arity: 3 } Filter (#13{n_name} = "GERMANY") // { arity: 16 } Join on=(#1{ps_suppkey} = #5{s_suppkey} AND #8{s_nationkey} = #12{n_nationkey}) type=delta // { arity: 16 } implementation @@ -927,7 +927,7 @@ materialize.public.q11: Get l0 // { arity: 3 } ArrangeBy keys=[[]] // { arity: 1 } Reduce aggregates=[sum((#1{ps_supplycost} * integer_to_numeric(#0{ps_availqty})))] // { arity: 1 } - Project (#1{ps_supplycost}, #2) // { arity: 2 } + Project (#1{ps_availqty}, #2{ps_supplycost}) // { arity: 2 } Get l0 // { arity: 3 } Used Indexes: @@ -976,7 +976,7 @@ ORDER BY ---- materialize.public.q12: Reduce group_by=[#1{l_shipmode}] aggregates=[sum(case when ((#0{o_orderpriority} = "2-HIGH") OR (#0{o_orderpriority} = "1-URGENT")) then 1 else 0 end), sum(case when ((#0{o_orderpriority} != "2-HIGH") AND (#0{o_orderpriority} != "1-URGENT")) then 1 else 0 end)] // { arity: 3 } - Project (#5, #23) // { arity: 2 } + Project (#5{o_orderpriority}, #23{l_shipmode}) // { arity: 2 } Filter (#21{l_receiptdate} >= 1994-01-01) AND (#19{l_shipdate} < #20{l_commitdate}) AND (#20{l_commitdate} < #21{l_receiptdate}) AND (date_to_timestamp(#21{l_receiptdate}) < 1995-01-01 00:00:00) AND ((#23{l_shipmode} = "MAIL") OR (#23{l_shipmode} = "SHIP")) // { arity: 25 } Join on=(#0{o_orderkey} = #9{l_orderkey}) type=differential // { arity: 25 } implementation @@ -1026,7 +1026,7 @@ materialize.public.q13: ArrangeBy keys=[[#0{c_custkey}]] // { arity: 8 } ReadIndex on=customer pk_customer_custkey=[differential join] // { arity: 8 } cte l1 = - Project (#0{c_custkey}, #8) // { arity: 2 } + Project (#0{c_custkey}, #8{o_orderkey}) // { arity: 2 } Filter NOT(like["%special%requests%"](varchar_to_text(#16{o_comment}))) // { arity: 17 } Join on=(#0{c_custkey} = #9{o_custkey}) type=differential // { arity: 17 } implementation @@ -1036,7 +1036,7 @@ materialize.public.q13: ReadIndex on=orders fk_orders_custkey=[differential join] // { arity: 9 } Return // { arity: 2 } Reduce group_by=[#0{count_o_orderkey}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{count_o_orderkey}) // { arity: 1 } Reduce group_by=[#0{c_custkey}] aggregates=[count(#1{o_orderkey})] // { arity: 2 } Union // { arity: 2 } Map (null) // { arity: 2 } @@ -1086,7 +1086,7 @@ materialize.public.q14: With cte l0 = Reduce aggregates=[sum(case when like["PROMO%"](varchar_to_text(#2{p_type})) then (#0{l_extendedprice} * (1 - #1{l_discount})) else 0 end), sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 2 } - Project (#5, #6, #20) // { arity: 3 } + Project (#5{l_extendedprice}, #6{l_discount}, #20{p_type}) // { arity: 3 } Filter (#10{l_shipdate} >= 1995-09-01) AND (date_to_timestamp(#10{l_shipdate}) < 1995-10-01 00:00:00) // { arity: 25 } Join on=(#1{l_partkey} = #16{p_partkey}) type=differential // { arity: 25 } implementation @@ -1158,11 +1158,11 @@ materialize.public.q15: With cte l0 = Reduce group_by=[#0{l_suppkey}] aggregates=[sum((#1{l_extendedprice} * (1 - #2{l_discount})))] // { arity: 2 } - Project (#2{l_discount}, #5, #6) // { arity: 3 } + Project (#2{l_suppkey}, #5{l_extendedprice}, #6{l_discount}) // { arity: 3 } Filter (#10{l_shipdate} >= 1996-01-01) AND (date_to_timestamp(#10{l_shipdate}) < 1996-04-01 00:00:00) // { arity: 16 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } Return // { arity: 5 } - Project (#0{s_suppkey}..=#2{s_address}, #4{sum}, #8) // { arity: 5 } + Project (#0{s_suppkey}..=#2{s_address}, #4{s_phone}, #8{sum}) // { arity: 5 } Join on=(#0{s_suppkey} = #7{l_suppkey} AND #8{sum} = #9{max_sum}) type=delta // { arity: 10 } implementation %0:supplier » %1:l0[#0]UKA » %2[#0]UK @@ -1174,7 +1174,7 @@ materialize.public.q15: Get l0 // { arity: 2 } ArrangeBy keys=[[#0{max_sum}]] // { arity: 1 } Reduce aggregates=[max(#0{sum})] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{sum}) // { arity: 1 } Get l0 // { arity: 2 } Used Indexes: @@ -1227,7 +1227,7 @@ ORDER BY materialize.public.q16: With cte l0 = - Project (#1{p_brand}, #8..=#10) // { arity: 4 } + Project (#1{ps_suppkey}, #8{p_brand}..=#10{p_size}) // { arity: 4 } Filter (#8{p_brand} != "Brand#45") AND NOT(like["MEDIUM POLISHED%"](varchar_to_text(#9{p_type}))) AND ((#10{p_size} = 3) OR (#10{p_size} = 9) OR (#10{p_size} = 14) OR (#10{p_size} = 19) OR (#10{p_size} = 23) OR (#10{p_size} = 36) OR (#10{p_size} = 45) OR (#10{p_size} = 49)) // { arity: 14 } Join on=(#0{ps_partkey} = #5{p_partkey}) type=differential // { arity: 14 } implementation @@ -1303,7 +1303,7 @@ materialize.public.q17: ArrangeBy keys=[[#1{l_partkey}]] // { arity: 16 } ReadIndex on=lineitem fk_lineitem_partkey=[differential join] // { arity: 16 } cte l1 = - Project (#1{l_quantity}, #4, #5) // { arity: 3 } + Project (#1{l_partkey}, #4{l_quantity}, #5{l_extendedprice}) // { arity: 3 } Filter (#19{p_brand} = "Brand#23") AND (#22{p_container} = "MED BOX") // { arity: 25 } Join on=(#1{l_partkey} = #16{p_partkey}) type=differential // { arity: 25 } implementation @@ -1313,7 +1313,7 @@ materialize.public.q17: ReadIndex on=part pk_part_partkey=[differential join] // { arity: 9 } cte l2 = Reduce aggregates=[sum(#0{l_extendedprice})] // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{l_extendedprice}) // { arity: 1 } Filter (#1{l_quantity} < (0.2 * (#4{sum_l_quantity} / bigint_to_numeric(case when (#5{count} = 0) then null else #5{count} end)))) // { arity: 6 } Join on=(#0{l_partkey} = #3{l_partkey}) type=differential // { arity: 6 } implementation @@ -1322,7 +1322,7 @@ materialize.public.q17: Get l1 // { arity: 3 } ArrangeBy keys=[[#0{l_partkey}]] // { arity: 3 } Reduce group_by=[#0{l_partkey}] aggregates=[sum(#1{l_quantity}), count(*)] // { arity: 3 } - Project (#0{l_partkey}, #5) // { arity: 2 } + Project (#0{l_partkey}, #5{l_quantity}) // { arity: 2 } Join on=(#0{l_partkey} = #2{l_partkey}) type=differential // { arity: 17 } implementation %0[#0]UKA » %1:l0[#1]KA @@ -1396,7 +1396,7 @@ materialize.public.q18: ArrangeBy keys=[[#0{l_orderkey}]] // { arity: 16 } ReadIndex on=lineitem fk_lineitem_orderkey=[differential join, delta join lookup] // { arity: 16 } cte l1 = - Project (#0{c_custkey}, #1{c_name}, #8, #11, #12, #21) // { arity: 6 } + Project (#0{c_custkey}, #1{c_name}, #8{o_orderkey}, #11{o_totalprice}, #12{o_orderdate}, #21{l_quantity}) // { arity: 6 } Join on=(#0{c_custkey} = #9{o_custkey} AND #8{o_orderkey} = #17{l_orderkey}) type=delta // { arity: 33 } implementation %0:customer » %1:orders[#1]KA » %2:l0[#0]KA @@ -1418,13 +1418,13 @@ materialize.public.q18: Get l1 // { arity: 6 } ArrangeBy keys=[[#0{o_orderkey}]] // { arity: 2 } Reduce group_by=[#0{o_orderkey}] aggregates=[sum(#1{l_quantity})] // { arity: 2 } - Project (#0{o_orderkey}, #5) // { arity: 2 } + Project (#0{o_orderkey}, #5{l_quantity}) // { arity: 2 } Join on=(#0{o_orderkey} = #1{l_orderkey}) type=differential // { arity: 17 } implementation %0[#0]UKA » %1:l0[#0]KA ArrangeBy keys=[[#0{o_orderkey}]] // { arity: 1 } Distinct project=[#0{o_orderkey}] // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{o_orderkey}) // { arity: 1 } Get l1 // { arity: 6 } Get l0 // { arity: 16 } @@ -1483,7 +1483,7 @@ materialize.public.q19: With cte l0 = Reduce aggregates=[sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 1 } - Project (#5, #6) // { arity: 2 } + Project (#5{l_extendedprice}, #6{l_discount}) // { arity: 2 } Filter (#13{l_shipinstruct} = "DELIVER IN PERSON") AND (#21{p_size} >= 1) AND ((#14{l_shipmode} = "AIR") OR (#14{l_shipmode} = "AIR REG")) AND ((#25 AND #26) OR (#27 AND #28) OR (#29 AND #30)) AND ((#31 AND #32 AND #33) OR (#34 AND #35 AND #36) OR (#37 AND #38 AND #39)) AND ((#25 AND #26 AND #34 AND #35 AND #36) OR (#27 AND #28 AND #37 AND #38 AND #39) OR (#29 AND #30 AND #31 AND #32 AND #33)) // { arity: 40 } Map ((#4{l_quantity} <= 20), (#4{l_quantity} >= 10), (#4{l_quantity} <= 30), (#4{l_quantity} >= 20), (#4{l_quantity} <= 11), (#4{l_quantity} >= 1), (#19{p_brand} = "Brand#12"), (#21{p_size} <= 5), ((#22{p_container} = "SM BOX") OR (#22{p_container} = "SM PKG") OR (#22{p_container} = "SM CASE") OR (#22{p_container} = "SM PACK")), (#19{p_brand} = "Brand#23"), (#21{p_size} <= 10), ((#22{p_container} = "MED BAG") OR (#22{p_container} = "MED BOX") OR (#22{p_container} = "MED PKG") OR (#22{p_container} = "MED PACK")), (#19{p_brand} = "Brand#34"), (#21{p_size} <= 15), ((#22{p_container} = "LG BOX") OR (#22{p_container} = "LG PKG") OR (#22{p_container} = "LG CASE") OR (#22{p_container} = "LG PACK"))) // { arity: 40 } Join on=(#1{l_partkey} = #16{p_partkey}) type=differential // { arity: 25 } @@ -1587,7 +1587,7 @@ materialize.public.q20: Filter (#0{p_partkey}) IS NOT NULL AND like["forest%"](varchar_to_text(#1{p_name})) // { arity: 9 } ReadIndex on=part pk_part_partkey=[*** full scan ***] // { arity: 9 } Return // { arity: 2 } - Project (#1{s_address}, #2) // { arity: 2 } + Project (#1{s_name}, #2{s_address}) // { arity: 2 } Join on=(#0{s_suppkey} = #3{s_suppkey}) type=differential // { arity: 4 } implementation %1[#0]UKA » %0:l0[#0]K @@ -1601,21 +1601,21 @@ materialize.public.q20: implementation %1[#1, #0]UKK » %0:l1[#0, #1]KKf ArrangeBy keys=[[#0{s_suppkey}, #1{ps_partkey}]] // { arity: 3 } - Project (#0{s_suppkey}, #1{ps_partkey}, #3) // { arity: 3 } + Project (#0{s_suppkey}, #1{ps_partkey}, #3{ps_availqty}) // { arity: 3 } Filter (#0{s_suppkey} = #2{ps_suppkey}) // { arity: 4 } Get l1 // { arity: 4 } ArrangeBy keys=[[#1{ps_suppkey}, #0{ps_partkey}]] // { arity: 3 } Project (#0{ps_partkey}, #1{ps_suppkey}, #3) // { arity: 3 } Map ((0.5 * #2{sum_l_quantity})) // { arity: 4 } Reduce group_by=[#0{ps_partkey}, #1{ps_suppkey}] aggregates=[sum(#2{l_quantity})] // { arity: 3 } - Project (#0{ps_partkey}, #1{ps_suppkey}, #6) // { arity: 3 } + Project (#0{ps_partkey}, #1{ps_suppkey}, #6{l_quantity}) // { arity: 3 } Filter (#12{l_shipdate} >= 1995-01-01) AND (date_to_timestamp(#12{l_shipdate}) < 1996-01-01 00:00:00) // { arity: 18 } Join on=(#0{ps_partkey} = #3{l_partkey} AND #1{ps_suppkey} = #4{l_suppkey}) type=differential // { arity: 18 } implementation %0[#0, #1]UKKA » %1:lineitem[#1, #2]KKAiif ArrangeBy keys=[[#0{ps_partkey}, #1{ps_suppkey}]] // { arity: 2 } Distinct project=[#0{ps_partkey}, #1{ps_suppkey}] // { arity: 2 } - Project (#1{ps_suppkey}, #2) // { arity: 2 } + Project (#1{ps_partkey}, #2{ps_suppkey}) // { arity: 2 } Get l1 // { arity: 4 } ArrangeBy keys=[[#1{l_partkey}, #2{l_suppkey}]] // { arity: 16 } ReadIndex on=lineitem fk_lineitem_partsuppkey=[differential join] // { arity: 16 } @@ -1679,7 +1679,7 @@ ORDER BY materialize.public.q21: With cte l0 = - Project (#0{s_suppkey}, #1{s_name}, #7) // { arity: 3 } + Project (#0{s_suppkey}, #1{s_name}, #7{l_orderkey}) // { arity: 3 } Filter (#25{o_orderstatus} = "F") AND (#33{n_name} = "SAUDI ARABIA") AND (#19{l_receiptdate} > #18{l_commitdate}) // { arity: 36 } Join on=(#0{s_suppkey} = #9{l_suppkey} AND #3{s_nationkey} = #32{n_nationkey} AND #7{l_orderkey} = #23{o_orderkey}) type=delta // { arity: 36 } implementation @@ -1714,16 +1714,16 @@ materialize.public.q21: %1:l1[#0]KA » %0[#0]K ArrangeBy keys=[[#0{l_orderkey}]] // { arity: 2 } Distinct project=[#1{l_orderkey}, #0{s_suppkey}] // { arity: 2 } - Project (#0{s_suppkey}, #2) // { arity: 2 } + Project (#0{s_suppkey}, #2{l_orderkey}) // { arity: 2 } Get l0 // { arity: 3 } Get l1 // { arity: 16 } cte l3 = Distinct project=[#1{l_orderkey}, #0{s_suppkey}] // { arity: 2 } - Project (#0{s_suppkey}, #2) // { arity: 2 } + Project (#0{s_suppkey}, #2{l_orderkey}) // { arity: 2 } Get l2 // { arity: 3 } Return // { arity: 2 } Reduce group_by=[#0{s_name}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{s_name}) // { arity: 1 } Join on=(#0{s_suppkey} = #4{s_suppkey} AND #2{l_orderkey} = #3{l_orderkey}) type=differential // { arity: 5 } implementation %0:l2[#2, #0]KK » %1[#0, #1]KK @@ -1811,8 +1811,9 @@ ORDER BY materialize.public.q22: With cte l0 = - Map (substr(char_to_text(#4{c_phone}), 1, 2)) // { arity: 9 } - ReadIndex on=customer pk_customer_custkey=[*** full scan ***] // { arity: 8 } + Project (#0{c_custkey}, #4{c_phone}, #5{c_acctbal}, #8) // { arity: 4 } + Map (substr(char_to_text(#4{c_phone}), 1, 2)) // { arity: 9 } + ReadIndex on=customer pk_customer_custkey=[*** full scan ***] // { arity: 8 } cte l1 = Project (#0{c_custkey}..=#2{c_acctbal}) // { arity: 3 } Filter (#2{c_acctbal} > (#3{sum_c_acctbal} / bigint_to_numeric(case when (#4{count} = 0) then null else #4{count} end))) // { arity: 5 } @@ -1820,21 +1821,21 @@ materialize.public.q22: implementation %1[×]UA » %0:l0[×]ef ArrangeBy keys=[[]] // { arity: 3 } - Project (#0{c_custkey}, #4, #5) // { arity: 3 } - Filter ((#8 = "13") OR (#8 = "17") OR (#8 = "18") OR (#8 = "23") OR (#8 = "29") OR (#8 = "30") OR (#8 = "31")) // { arity: 9 } - Get l0 // { arity: 9 } + Project (#0{c_custkey}..=#2{c_acctbal}) // { arity: 3 } + Filter ((#3 = "13") OR (#3 = "17") OR (#3 = "18") OR (#3 = "23") OR (#3 = "29") OR (#3 = "30") OR (#3 = "31")) // { arity: 4 } + Get l0 // { arity: 4 } ArrangeBy keys=[[]] // { arity: 2 } Reduce aggregates=[sum(#0{c_acctbal}), count(*)] // { arity: 2 } - Project (#5) // { arity: 1 } - Filter (#5{c_acctbal} > 0) AND ((#8 = "13") OR (#8 = "17") OR (#8 = "18") OR (#8 = "23") OR (#8 = "29") OR (#8 = "30") OR (#8 = "31")) // { arity: 9 } - Get l0 // { arity: 9 } + Project (#2{c_acctbal}) // { arity: 1 } + Filter (#2{c_acctbal} > 0) AND ((#3 = "13") OR (#3 = "17") OR (#3 = "18") OR (#3 = "23") OR (#3 = "29") OR (#3 = "30") OR (#3 = "31")) // { arity: 4 } + Get l0 // { arity: 4 } cte l2 = Distinct project=[#0{c_custkey}] // { arity: 1 } Project (#0{c_custkey}) // { arity: 1 } Get l1 // { arity: 3 } Return // { arity: 3 } Reduce group_by=[substr(char_to_text(#0{c_phone}), 1, 2)] aggregates=[count(*), sum(#1{c_acctbal})] // { arity: 3 } - Project (#1{c_acctbal}, #2) // { arity: 2 } + Project (#1{c_phone}, #2{c_acctbal}) // { arity: 2 } Join on=(#0{c_custkey} = #3{c_custkey}) type=differential // { arity: 4 } implementation %0:l1[#0]K » %1[#0]K @@ -1851,7 +1852,7 @@ materialize.public.q22: Get l2 // { arity: 1 } ArrangeBy keys=[[#0{o_custkey}]] // { arity: 1 } Distinct project=[#0{o_custkey}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{o_custkey}) // { arity: 1 } ReadIndex on=orders pk_orders_orderkey=[*** full scan ***] // { arity: 9 } Get l2 // { arity: 1 } diff --git a/test/sqllogictest/tpch_select.slt b/test/sqllogictest/tpch_select.slt index 608bed2adc0a0..21f6d32767d58 100644 --- a/test/sqllogictest/tpch_select.slt +++ b/test/sqllogictest/tpch_select.slt @@ -190,10 +190,10 @@ ORDER BY ---- Explained Query: Finish order_by=[#0{l_returnflag} asc nulls_last, #1{l_linestatus} asc nulls_last] output=[#0..=#9] - Project (#0{l_returnflag}..=#5{sum}, #9{count}..=#11, #6) // { arity: 10 } + Project (#0{l_returnflag}..=#5{sum}, #9..=#11, #6{count}) // { arity: 10 } Map (bigint_to_numeric(case when (#6{count} = 0) then null else #6{count} end), (#2{sum_l_quantity} / #8), (#3{sum_l_extendedprice} / #8), (#7{sum_l_discount} / #8)) // { arity: 12 } Reduce group_by=[#4{l_returnflag}, #5{l_linestatus}] aggregates=[sum(#0{l_quantity}), sum(#1{l_extendedprice}), sum((#1{l_extendedprice} * (1 - #2{l_discount}))), sum(((#1{l_extendedprice} * (1 - #2{l_discount})) * (1 + #3{l_tax}))), count(*), sum(#2{l_discount})] // { arity: 8 } - Project (#4{l_returnflag}..=#9) // { arity: 6 } + Project (#4{l_quantity}..=#9{l_linestatus}) // { arity: 6 } Filter (date_to_timestamp(#10{l_shipdate}) <= 1998-10-02 00:00:00) // { arity: 16 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } @@ -259,7 +259,7 @@ Explained Query: ArrangeBy keys=[[#0{r_regionkey}]] // { arity: 3 } ReadIndex on=region pk_region_regionkey=[delta join lookup] // { arity: 3 } cte l4 = - Project (#0{p_partkey}, #2{s_name}, #10, #11, #13..=#15, #19, #22) // { arity: 9 } + Project (#0{p_partkey}, #2{p_mfgr}, #10{s_name}, #11{s_address}, #13{s_phone}..=#15{s_comment}, #19{ps_supplycost}, #22{n_name}) // { arity: 9 } Filter (#5{p_size} = 15) AND (#26{r_name} = "EUROPE") AND like["%BRASS"](varchar_to_text(#4{p_type})) // { arity: 28 } Join on=(#0{p_partkey} = #16{ps_partkey} AND #9{s_suppkey} = #17{ps_suppkey} AND #12{s_nationkey} = #21{n_nationkey} AND #23{n_regionkey} = #25{r_regionkey}) type=delta // { arity: 28 } implementation @@ -275,7 +275,7 @@ Explained Query: Get l2 // { arity: 4 } Get l3 // { arity: 3 } Return // { arity: 8 } - Project (#5{s_address}, #2{n_name}, #8, #0{s_acctbal}, #1{s_name}, #3{p_partkey}, #4{p_mfgr}, #6{s_phone}) // { arity: 8 } + Project (#5{s_acctbal}, #2{s_name}, #8{n_name}, #0{p_partkey}, #1{p_mfgr}, #3{s_address}, #4{s_phone}, #6{s_comment}) // { arity: 8 } Join on=(#0{p_partkey} = #9{p_partkey} AND #7{ps_supplycost} = #10{min_ps_supplycost}) type=differential // { arity: 11 } implementation %1[#0, #1]UKK » %0:l4[#0, #7]KK @@ -283,7 +283,7 @@ Explained Query: Get l4 // { arity: 9 } ArrangeBy keys=[[#0{p_partkey}, #1{min_ps_supplycost}]] // { arity: 2 } Reduce group_by=[#0{p_partkey}] aggregates=[min(#1{ps_supplycost})] // { arity: 2 } - Project (#0{p_partkey}, #4) // { arity: 2 } + Project (#0{p_partkey}, #4{ps_supplycost}) // { arity: 2 } Filter (#18{r_name} = "EUROPE") // { arity: 20 } Join on=(#0{p_partkey} = #1{ps_partkey} AND #2{ps_suppkey} = #6{s_suppkey} AND #9{s_nationkey} = #13{n_nationkey} AND #15{n_regionkey} = #17{r_regionkey}) type=delta // { arity: 20 } implementation @@ -344,9 +344,9 @@ ORDER BY ---- Explained Query: Finish order_by=[#1{sum} desc nulls_first, #2{o_orderdate} asc nulls_last] output=[#0..=#3] - Project (#0{o_orderkey}, #3{o_shippriority}, #1{sum}, #2{o_orderdate}) // { arity: 4 } + Project (#0{o_orderkey}, #3{sum}, #1{o_orderdate}, #2{o_shippriority}) // { arity: 4 } Reduce group_by=[#0{o_orderkey}..=#2{o_shippriority}] aggregates=[sum((#3{l_extendedprice} * (1 - #4{l_discount})))] // { arity: 4 } - Project (#8, #12, #15, #22, #23) // { arity: 5 } + Project (#8{o_orderkey}, #12{o_orderdate}, #15{o_shippriority}, #22{l_extendedprice}, #23{l_discount}) // { arity: 5 } Filter (#6{c_mktsegment} = "BUILDING") AND (#12{o_orderdate} < 1995-03-15) AND (#27{l_shipdate} > 1995-03-15) // { arity: 33 } Join on=(#0{c_custkey} = #9{o_custkey} AND #8{o_orderkey} = #17{l_orderkey}) type=delta // { arity: 33 } implementation @@ -399,7 +399,7 @@ ORDER BY Explained Query: Finish order_by=[#0{o_orderpriority} asc nulls_last] output=[#0, #1] Reduce group_by=[#0{o_orderpriority}] aggregates=[count(*)] // { arity: 2 } - Project (#5) // { arity: 1 } + Project (#5{o_orderpriority}) // { arity: 1 } Filter (#4{o_orderdate} >= 1993-07-01) AND (date_to_timestamp(#4{o_orderdate}) < 1993-10-01 00:00:00) // { arity: 10 } Join on=(#0{o_orderkey} = #9{l_orderkey}) type=differential // { arity: 10 } implementation @@ -452,7 +452,7 @@ ORDER BY Explained Query: Finish order_by=[#1{sum} desc nulls_first] output=[#0, #1] Reduce group_by=[#2{n_name}] aggregates=[sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 2 } - Project (#19, #20, #24) // { arity: 3 } + Project (#19{l_extendedprice}, #20{l_discount}, #24{n_name}) // { arity: 3 } Filter (#28{r_name} = "ASIA") AND (#12{o_orderdate} < 1995-01-01) AND (#12{o_orderdate} >= 1994-01-01) // { arity: 30 } Join on=(#0{c_custkey} = #9{o_custkey} AND #3{c_nationkey} = #22{s_nationkey} = #23{n_nationkey} AND #8{o_orderkey} = #17{l_orderkey} AND #18{l_suppkey} = #21{s_suppkey} AND #25{n_regionkey} = #27{r_regionkey}) type=delta // { arity: 30 } implementation @@ -467,10 +467,10 @@ Explained Query: ArrangeBy keys=[[#0{o_orderkey}], [#1{o_custkey}]] // { arity: 9 } ReadIndex on=orders pk_orders_orderkey=[delta join lookup] fk_orders_custkey=[delta join lookup] // { arity: 9 } ArrangeBy keys=[[#0{l_orderkey}], [#0{l_orderkey}, #1{l_suppkey}]] // { arity: 4 } - Project (#0{l_orderkey}, #2{l_extendedprice}, #5, #6) // { arity: 4 } + Project (#0{l_orderkey}, #2{l_suppkey}, #5{l_extendedprice}, #6{l_discount}) // { arity: 4 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } ArrangeBy keys=[[#0{s_suppkey}, #1{s_nationkey}]] // { arity: 2 } - Project (#0{s_suppkey}, #3) // { arity: 2 } + Project (#0{s_suppkey}, #3{s_nationkey}) // { arity: 2 } Filter (#0{s_suppkey}) IS NOT NULL // { arity: 7 } ReadIndex on=supplier pk_supplier_suppkey=[*** full scan ***] // { arity: 7 } ArrangeBy keys=[[#0{n_nationkey}], [#2{n_regionkey}]] // { arity: 4 } @@ -511,7 +511,7 @@ Explained Query: With cte l0 = Reduce aggregates=[sum((#0{l_extendedprice} * #1{l_discount}))] // { arity: 1 } - Project (#5, #6) // { arity: 2 } + Project (#5{l_extendedprice}, #6{l_discount}) // { arity: 2 } Filter (#4{l_quantity} < 24) AND (#6{l_discount} <= 0.07) AND (#6{l_discount} >= 0.05) AND (#10{l_shipdate} >= 1994-01-01) AND (date_to_timestamp(#10{l_shipdate}) < 1995-01-01 00:00:00) // { arity: 16 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } Return // { arity: 1 } @@ -584,7 +584,7 @@ Explained Query: ReadIndex on=nation pk_nation_nationkey=[delta join lookup] // { arity: 4 } Return // { arity: 4 } Reduce group_by=[#3{n_name}, #4{n_name}, extract_year_d(#2{l_shipdate})] aggregates=[sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 4 } - Project (#12, #13, #17, #41, #45) // { arity: 5 } + Project (#12{l_extendedprice}, #13{l_discount}, #17{l_shipdate}, #41{n_name}, #45{n_name}) // { arity: 5 } Filter (#17{l_shipdate} <= 1996-12-31) AND (#17{l_shipdate} >= 1995-01-01) AND (#48 OR #49) AND (#50 OR #51) AND ((#48 AND #51) OR (#49 AND #50)) // { arity: 52 } Map ((#41{n_name} = "FRANCE"), (#41{n_name} = "GERMANY"), (#45{n_name} = "FRANCE"), (#45{n_name} = "GERMANY")) // { arity: 52 } Join on=(#0{s_suppkey} = #9{l_suppkey} AND #3{s_nationkey} = #40{n_nationkey} AND #7{l_orderkey} = #23{o_orderkey} AND #24{o_custkey} = #32{c_custkey} AND #35{c_nationkey} = #44{n_nationkey}) type=delta // { arity: 48 } @@ -668,7 +668,7 @@ Explained Query: Project (#0, #3) // { arity: 2 } Map ((#1{sum} / #2{sum})) // { arity: 4 } Reduce group_by=[extract_year_d(#2{o_orderdate})] aggregates=[sum(case when (#3{n_name} = "BRAZIL") then (#0{l_extendedprice} * (1 - #1{l_discount})) else 0 end), sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 3 } - Project (#21, #22, #36, #54) // { arity: 4 } + Project (#21{l_extendedprice}, #22{l_discount}, #36{o_orderdate}, #54{n_name}) // { arity: 4 } Filter (#58{r_name} = "AMERICA") AND (#36{o_orderdate} <= 1996-12-31) AND (#36{o_orderdate} >= 1995-01-01) AND ("ECONOMY ANODIZED STEEL" = varchar_to_text(#4{p_type})) // { arity: 60 } Join on=(#0{p_partkey} = #17{l_partkey} AND #9{s_suppkey} = #18{l_suppkey} AND #12{s_nationkey} = #53{n_nationkey} AND #16{l_orderkey} = #32{o_orderkey} AND #33{o_custkey} = #41{c_custkey} AND #44{c_nationkey} = #49{n_nationkey} AND #51{n_regionkey} = #57{r_regionkey}) type=delta // { arity: 60 } implementation @@ -756,7 +756,7 @@ ORDER BY Explained Query: Finish order_by=[#0{n_name} asc nulls_last, #1 desc nulls_first] output=[#0..=#2] Reduce group_by=[#5{n_name}, extract_year_d(#4{o_orderdate})] aggregates=[sum(((#1{l_extendedprice} * (1 - #2{l_discount})) - (#3{ps_supplycost} * #0{l_quantity})))] // { arity: 3 } - Project (#20..=#22, #35, #41, #47) // { arity: 6 } + Project (#20{l_quantity}..=#22{l_discount}, #35{ps_supplycost}, #41{o_orderdate}, #47{n_name}) // { arity: 6 } Filter like["%green%"](varchar_to_text(#1{p_name})) // { arity: 50 } Join on=(#0{p_partkey} = #17{l_partkey} = #32{ps_partkey} AND #9{s_suppkey} = #18{l_suppkey} = #33{ps_suppkey} AND #12{s_nationkey} = #46{n_nationkey} AND #16{l_orderkey} = #37{o_orderkey}) type=delta // { arity: 50 } implementation @@ -834,9 +834,9 @@ ORDER BY ---- Explained Query: Finish order_by=[#2{sum} desc nulls_first] output=[#0..=#7] - Project (#0{c_custkey}, #1{c_name}, #7{c_comment}, #2{sum}, #4{n_name}, #5{c_address}, #3{c_acctbal}, #6{c_phone}) // { arity: 8 } + Project (#0{c_custkey}, #1{c_name}, #7{sum}, #2{c_acctbal}, #4{n_name}, #5{c_address}, #3{c_phone}, #6{c_comment}) // { arity: 8 } Reduce group_by=[#0{c_custkey}, #1{c_name}, #4{c_acctbal}, #3{c_phone}, #8{n_name}, #2{c_address}, #5{c_comment}] aggregates=[sum((#6{l_extendedprice} * (1 - #7{l_discount})))] // { arity: 8 } - Project (#0{c_custkey}..=#2{c_address}, #4{c_acctbal}, #5{c_comment}, #7{l_discount}, #22, #23, #34) // { arity: 9 } + Project (#0{c_custkey}..=#2{c_address}, #4{c_phone}, #5{c_acctbal}, #7{c_comment}, #22{l_extendedprice}, #23{l_discount}, #34{n_name}) // { arity: 9 } Filter (#25{l_returnflag} = "R") AND (#12{o_orderdate} < 1994-01-01) AND (#12{o_orderdate} >= 1993-10-01) AND (date_to_timestamp(#12{o_orderdate}) < 1994-01-01 00:00:00) // { arity: 37 } Join on=(#0{c_custkey} = #9{o_custkey} AND #3{c_nationkey} = #33{n_nationkey} AND #8{o_orderkey} = #17{l_orderkey}) type=delta // { arity: 37 } implementation @@ -901,7 +901,7 @@ Explained Query: Finish order_by=[#1{sum} desc nulls_first] output=[#0, #1] With cte l0 = - Project (#0{ps_partkey}, #2{ps_supplycost}, #3) // { arity: 3 } + Project (#0{ps_partkey}, #2{ps_availqty}, #3{ps_supplycost}) // { arity: 3 } Filter (#13{n_name} = "GERMANY") // { arity: 16 } Join on=(#1{ps_suppkey} = #5{s_suppkey} AND #8{s_nationkey} = #12{n_nationkey}) type=delta // { arity: 16 } implementation @@ -925,7 +925,7 @@ Explained Query: Get l0 // { arity: 3 } ArrangeBy keys=[[]] // { arity: 1 } Reduce aggregates=[sum((#1{ps_supplycost} * integer_to_numeric(#0{ps_availqty})))] // { arity: 1 } - Project (#1{ps_supplycost}, #2) // { arity: 2 } + Project (#1{ps_availqty}, #2{ps_supplycost}) // { arity: 2 } Get l0 // { arity: 3 } Used Indexes: @@ -974,7 +974,7 @@ ORDER BY Explained Query: Finish order_by=[#0{l_shipmode} asc nulls_last] output=[#0..=#2] Reduce group_by=[#1{l_shipmode}] aggregates=[sum(case when ((#0{o_orderpriority} = "2-HIGH") OR (#0{o_orderpriority} = "1-URGENT")) then 1 else 0 end), sum(case when ((#0{o_orderpriority} != "2-HIGH") AND (#0{o_orderpriority} != "1-URGENT")) then 1 else 0 end)] // { arity: 3 } - Project (#5, #23) // { arity: 2 } + Project (#5{o_orderpriority}, #23{l_shipmode}) // { arity: 2 } Filter (#21{l_receiptdate} >= 1994-01-01) AND (#19{l_shipdate} < #20{l_commitdate}) AND (#20{l_commitdate} < #21{l_receiptdate}) AND (date_to_timestamp(#21{l_receiptdate}) < 1995-01-01 00:00:00) AND ((#23{l_shipmode} = "MAIL") OR (#23{l_shipmode} = "SHIP")) // { arity: 25 } Join on=(#0{o_orderkey} = #9{l_orderkey}) type=differential // { arity: 25 } implementation @@ -1024,7 +1024,7 @@ Explained Query: ArrangeBy keys=[[#0{c_custkey}]] // { arity: 8 } ReadIndex on=customer pk_customer_custkey=[differential join] // { arity: 8 } cte l1 = - Project (#0{c_custkey}, #8) // { arity: 2 } + Project (#0{c_custkey}, #8{o_orderkey}) // { arity: 2 } Filter NOT(like["%special%requests%"](varchar_to_text(#16{o_comment}))) // { arity: 17 } Join on=(#0{c_custkey} = #9{o_custkey}) type=differential // { arity: 17 } implementation @@ -1034,7 +1034,7 @@ Explained Query: ReadIndex on=orders fk_orders_custkey=[differential join] // { arity: 9 } Return // { arity: 2 } Reduce group_by=[#0{count_o_orderkey}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{count_o_orderkey}) // { arity: 1 } Reduce group_by=[#0{c_custkey}] aggregates=[count(#1{o_orderkey})] // { arity: 2 } Union // { arity: 2 } Map (null) // { arity: 2 } @@ -1083,7 +1083,7 @@ Explained Query: With cte l0 = Reduce aggregates=[sum(case when like["PROMO%"](varchar_to_text(#2{p_type})) then (#0{l_extendedprice} * (1 - #1{l_discount})) else 0 end), sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 2 } - Project (#5, #6, #20) // { arity: 3 } + Project (#5{l_extendedprice}, #6{l_discount}, #20{p_type}) // { arity: 3 } Filter (#10{l_shipdate} >= 1995-09-01) AND (date_to_timestamp(#10{l_shipdate}) < 1995-10-01 00:00:00) // { arity: 25 } Join on=(#1{l_partkey} = #16{p_partkey}) type=differential // { arity: 25 } implementation @@ -1155,11 +1155,11 @@ Explained Query: With cte l0 = Reduce group_by=[#0{l_suppkey}] aggregates=[sum((#1{l_extendedprice} * (1 - #2{l_discount})))] // { arity: 2 } - Project (#2{l_discount}, #5, #6) // { arity: 3 } + Project (#2{l_suppkey}, #5{l_extendedprice}, #6{l_discount}) // { arity: 3 } Filter (#10{l_shipdate} >= 1996-01-01) AND (date_to_timestamp(#10{l_shipdate}) < 1996-04-01 00:00:00) // { arity: 16 } ReadIndex on=lineitem pk_lineitem_orderkey_linenumber=[*** full scan ***] // { arity: 16 } Return // { arity: 5 } - Project (#0{s_suppkey}..=#2{s_address}, #4{sum}, #8) // { arity: 5 } + Project (#0{s_suppkey}..=#2{s_address}, #4{s_phone}, #8{sum}) // { arity: 5 } Join on=(#0{s_suppkey} = #7{l_suppkey} AND #8{sum} = #9{max_sum}) type=delta // { arity: 10 } implementation %0:supplier » %1:l0[#0]UKA » %2[#0]UK @@ -1171,7 +1171,7 @@ Explained Query: Get l0 // { arity: 2 } ArrangeBy keys=[[#0{max_sum}]] // { arity: 1 } Reduce aggregates=[max(#0{sum})] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{sum}) // { arity: 1 } Get l0 // { arity: 2 } Used Indexes: @@ -1224,7 +1224,7 @@ Explained Query: Finish order_by=[#3{count_ps_suppkey} desc nulls_first, #0{p_brand} asc nulls_last, #1{p_type} asc nulls_last, #2{p_size} asc nulls_last] output=[#0..=#3] With cte l0 = - Project (#1{p_brand}, #8..=#10) // { arity: 4 } + Project (#1{ps_suppkey}, #8{p_brand}..=#10{p_size}) // { arity: 4 } Filter (#8{p_brand} != "Brand#45") AND NOT(like["MEDIUM POLISHED%"](varchar_to_text(#9{p_type}))) AND ((#10{p_size} = 3) OR (#10{p_size} = 9) OR (#10{p_size} = 14) OR (#10{p_size} = 19) OR (#10{p_size} = 23) OR (#10{p_size} = 36) OR (#10{p_size} = 45) OR (#10{p_size} = 49)) // { arity: 14 } Join on=(#0{ps_partkey} = #5{p_partkey}) type=differential // { arity: 14 } implementation @@ -1299,7 +1299,7 @@ Explained Query: ArrangeBy keys=[[#1{l_partkey}]] // { arity: 16 } ReadIndex on=lineitem fk_lineitem_partkey=[differential join] // { arity: 16 } cte l1 = - Project (#1{l_quantity}, #4, #5) // { arity: 3 } + Project (#1{l_partkey}, #4{l_quantity}, #5{l_extendedprice}) // { arity: 3 } Filter (#19{p_brand} = "Brand#23") AND (#22{p_container} = "MED BOX") // { arity: 25 } Join on=(#1{l_partkey} = #16{p_partkey}) type=differential // { arity: 25 } implementation @@ -1309,7 +1309,7 @@ Explained Query: ReadIndex on=part pk_part_partkey=[differential join] // { arity: 9 } cte l2 = Reduce aggregates=[sum(#0{l_extendedprice})] // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{l_extendedprice}) // { arity: 1 } Filter (#1{l_quantity} < (0.2 * (#4{sum_l_quantity} / bigint_to_numeric(case when (#5{count} = 0) then null else #5{count} end)))) // { arity: 6 } Join on=(#0{l_partkey} = #3{l_partkey}) type=differential // { arity: 6 } implementation @@ -1318,7 +1318,7 @@ Explained Query: Get l1 // { arity: 3 } ArrangeBy keys=[[#0{l_partkey}]] // { arity: 3 } Reduce group_by=[#0{l_partkey}] aggregates=[sum(#1{l_quantity}), count(*)] // { arity: 3 } - Project (#0{l_partkey}, #5) // { arity: 2 } + Project (#0{l_partkey}, #5{l_quantity}) // { arity: 2 } Join on=(#0{l_partkey} = #2{l_partkey}) type=differential // { arity: 17 } implementation %0[#0]UKA » %1:l0[#1]KA @@ -1392,7 +1392,7 @@ Explained Query: ArrangeBy keys=[[#0{l_orderkey}]] // { arity: 16 } ReadIndex on=lineitem fk_lineitem_orderkey=[differential join, delta join lookup] // { arity: 16 } cte l1 = - Project (#0{c_custkey}, #1{c_name}, #8, #11, #12, #21) // { arity: 6 } + Project (#0{c_custkey}, #1{c_name}, #8{o_orderkey}, #11{o_totalprice}, #12{o_orderdate}, #21{l_quantity}) // { arity: 6 } Join on=(#0{c_custkey} = #9{o_custkey} AND #8{o_orderkey} = #17{l_orderkey}) type=delta // { arity: 33 } implementation %0:customer » %1:orders[#1]KA » %2:l0[#0]KA @@ -1414,13 +1414,13 @@ Explained Query: Get l1 // { arity: 6 } ArrangeBy keys=[[#0{o_orderkey}]] // { arity: 2 } Reduce group_by=[#0{o_orderkey}] aggregates=[sum(#1{l_quantity})] // { arity: 2 } - Project (#0{o_orderkey}, #5) // { arity: 2 } + Project (#0{o_orderkey}, #5{l_quantity}) // { arity: 2 } Join on=(#0{o_orderkey} = #1{l_orderkey}) type=differential // { arity: 17 } implementation %0[#0]UKA » %1:l0[#0]KA ArrangeBy keys=[[#0{o_orderkey}]] // { arity: 1 } Distinct project=[#0{o_orderkey}] // { arity: 1 } - Project (#2) // { arity: 1 } + Project (#2{o_orderkey}) // { arity: 1 } Get l1 // { arity: 6 } Get l0 // { arity: 16 } @@ -1478,7 +1478,7 @@ Explained Query: With cte l0 = Reduce aggregates=[sum((#0{l_extendedprice} * (1 - #1{l_discount})))] // { arity: 1 } - Project (#5, #6) // { arity: 2 } + Project (#5{l_extendedprice}, #6{l_discount}) // { arity: 2 } Filter (#13{l_shipinstruct} = "DELIVER IN PERSON") AND (#21{p_size} >= 1) AND ((#14{l_shipmode} = "AIR") OR (#14{l_shipmode} = "AIR REG")) AND ((#25 AND #26) OR (#27 AND #28) OR (#29 AND #30)) AND ((#31 AND #32 AND #33) OR (#34 AND #35 AND #36) OR (#37 AND #38 AND #39)) AND ((#25 AND #26 AND #34 AND #35 AND #36) OR (#27 AND #28 AND #37 AND #38 AND #39) OR (#29 AND #30 AND #31 AND #32 AND #33)) // { arity: 40 } Map ((#4{l_quantity} <= 20), (#4{l_quantity} >= 10), (#4{l_quantity} <= 30), (#4{l_quantity} >= 20), (#4{l_quantity} <= 11), (#4{l_quantity} >= 1), (#19{p_brand} = "Brand#12"), (#21{p_size} <= 5), ((#22{p_container} = "SM BOX") OR (#22{p_container} = "SM PKG") OR (#22{p_container} = "SM CASE") OR (#22{p_container} = "SM PACK")), (#19{p_brand} = "Brand#23"), (#21{p_size} <= 10), ((#22{p_container} = "MED BAG") OR (#22{p_container} = "MED BOX") OR (#22{p_container} = "MED PKG") OR (#22{p_container} = "MED PACK")), (#19{p_brand} = "Brand#34"), (#21{p_size} <= 15), ((#22{p_container} = "LG BOX") OR (#22{p_container} = "LG PKG") OR (#22{p_container} = "LG CASE") OR (#22{p_container} = "LG PACK"))) // { arity: 40 } Join on=(#1{l_partkey} = #16{p_partkey}) type=differential // { arity: 25 } @@ -1582,7 +1582,7 @@ Explained Query: Filter (#0{p_partkey}) IS NOT NULL AND like["forest%"](varchar_to_text(#1{p_name})) // { arity: 9 } ReadIndex on=part pk_part_partkey=[*** full scan ***] // { arity: 9 } Return // { arity: 2 } - Project (#1{s_address}, #2) // { arity: 2 } + Project (#1{s_name}, #2{s_address}) // { arity: 2 } Join on=(#0{s_suppkey} = #3{s_suppkey}) type=differential // { arity: 4 } implementation %1[#0]UKA » %0:l0[#0]K @@ -1596,21 +1596,21 @@ Explained Query: implementation %1[#1, #0]UKK » %0:l1[#0, #1]KKf ArrangeBy keys=[[#0{s_suppkey}, #1{ps_partkey}]] // { arity: 3 } - Project (#0{s_suppkey}, #1{ps_partkey}, #3) // { arity: 3 } + Project (#0{s_suppkey}, #1{ps_partkey}, #3{ps_availqty}) // { arity: 3 } Filter (#0{s_suppkey} = #2{ps_suppkey}) // { arity: 4 } Get l1 // { arity: 4 } ArrangeBy keys=[[#1{ps_suppkey}, #0{ps_partkey}]] // { arity: 3 } Project (#0{ps_partkey}, #1{ps_suppkey}, #3) // { arity: 3 } Map ((0.5 * #2{sum_l_quantity})) // { arity: 4 } Reduce group_by=[#0{ps_partkey}, #1{ps_suppkey}] aggregates=[sum(#2{l_quantity})] // { arity: 3 } - Project (#0{ps_partkey}, #1{ps_suppkey}, #6) // { arity: 3 } + Project (#0{ps_partkey}, #1{ps_suppkey}, #6{l_quantity}) // { arity: 3 } Filter (#12{l_shipdate} >= 1995-01-01) AND (date_to_timestamp(#12{l_shipdate}) < 1996-01-01 00:00:00) // { arity: 18 } Join on=(#0{ps_partkey} = #3{l_partkey} AND #1{ps_suppkey} = #4{l_suppkey}) type=differential // { arity: 18 } implementation %0[#0, #1]UKKA » %1:lineitem[#1, #2]KKAiif ArrangeBy keys=[[#0{ps_partkey}, #1{ps_suppkey}]] // { arity: 2 } Distinct project=[#0{ps_partkey}, #1{ps_suppkey}] // { arity: 2 } - Project (#1{ps_suppkey}, #2) // { arity: 2 } + Project (#1{ps_partkey}, #2{ps_suppkey}) // { arity: 2 } Get l1 // { arity: 4 } ArrangeBy keys=[[#1{l_partkey}, #2{l_suppkey}]] // { arity: 16 } ReadIndex on=lineitem fk_lineitem_partsuppkey=[differential join] // { arity: 16 } @@ -1674,7 +1674,7 @@ Explained Query: Finish order_by=[#1{count} desc nulls_first, #0{s_name} asc nulls_last] output=[#0, #1] With cte l0 = - Project (#0{s_suppkey}, #1{s_name}, #7) // { arity: 3 } + Project (#0{s_suppkey}, #1{s_name}, #7{l_orderkey}) // { arity: 3 } Filter (#25{o_orderstatus} = "F") AND (#33{n_name} = "SAUDI ARABIA") AND (#19{l_receiptdate} > #18{l_commitdate}) // { arity: 36 } Join on=(#0{s_suppkey} = #9{l_suppkey} AND #3{s_nationkey} = #32{n_nationkey} AND #7{l_orderkey} = #23{o_orderkey}) type=delta // { arity: 36 } implementation @@ -1709,16 +1709,16 @@ Explained Query: %1:l1[#0]KA » %0[#0]K ArrangeBy keys=[[#0{l_orderkey}]] // { arity: 2 } Distinct project=[#1{l_orderkey}, #0{s_suppkey}] // { arity: 2 } - Project (#0{s_suppkey}, #2) // { arity: 2 } + Project (#0{s_suppkey}, #2{l_orderkey}) // { arity: 2 } Get l0 // { arity: 3 } Get l1 // { arity: 16 } cte l3 = Distinct project=[#1{l_orderkey}, #0{s_suppkey}] // { arity: 2 } - Project (#0{s_suppkey}, #2) // { arity: 2 } + Project (#0{s_suppkey}, #2{l_orderkey}) // { arity: 2 } Get l2 // { arity: 3 } Return // { arity: 2 } Reduce group_by=[#0{s_name}] aggregates=[count(*)] // { arity: 2 } - Project (#1) // { arity: 1 } + Project (#1{s_name}) // { arity: 1 } Join on=(#0{s_suppkey} = #4{s_suppkey} AND #2{l_orderkey} = #3{l_orderkey}) type=differential // { arity: 5 } implementation %0:l2[#2, #0]KK » %1[#0, #1]KK @@ -1806,8 +1806,9 @@ Explained Query: Finish order_by=[#0 asc nulls_last] output=[#0..=#2] With cte l0 = - Map (substr(char_to_text(#4{c_phone}), 1, 2)) // { arity: 9 } - ReadIndex on=customer pk_customer_custkey=[*** full scan ***] // { arity: 8 } + Project (#0{c_custkey}, #4{c_phone}, #5{c_acctbal}, #8) // { arity: 4 } + Map (substr(char_to_text(#4{c_phone}), 1, 2)) // { arity: 9 } + ReadIndex on=customer pk_customer_custkey=[*** full scan ***] // { arity: 8 } cte l1 = Project (#0{c_custkey}..=#2{c_acctbal}) // { arity: 3 } Filter (#2{c_acctbal} > (#3{sum_c_acctbal} / bigint_to_numeric(case when (#4{count} = 0) then null else #4{count} end))) // { arity: 5 } @@ -1815,21 +1816,21 @@ Explained Query: implementation %1[×]UA » %0:l0[×]ef ArrangeBy keys=[[]] // { arity: 3 } - Project (#0{c_custkey}, #4, #5) // { arity: 3 } - Filter ((#8 = "13") OR (#8 = "17") OR (#8 = "18") OR (#8 = "23") OR (#8 = "29") OR (#8 = "30") OR (#8 = "31")) // { arity: 9 } - Get l0 // { arity: 9 } + Project (#0{c_custkey}..=#2{c_acctbal}) // { arity: 3 } + Filter ((#3 = "13") OR (#3 = "17") OR (#3 = "18") OR (#3 = "23") OR (#3 = "29") OR (#3 = "30") OR (#3 = "31")) // { arity: 4 } + Get l0 // { arity: 4 } ArrangeBy keys=[[]] // { arity: 2 } Reduce aggregates=[sum(#0{c_acctbal}), count(*)] // { arity: 2 } - Project (#5) // { arity: 1 } - Filter (#5{c_acctbal} > 0) AND ((#8 = "13") OR (#8 = "17") OR (#8 = "18") OR (#8 = "23") OR (#8 = "29") OR (#8 = "30") OR (#8 = "31")) // { arity: 9 } - Get l0 // { arity: 9 } + Project (#2{c_acctbal}) // { arity: 1 } + Filter (#2{c_acctbal} > 0) AND ((#3 = "13") OR (#3 = "17") OR (#3 = "18") OR (#3 = "23") OR (#3 = "29") OR (#3 = "30") OR (#3 = "31")) // { arity: 4 } + Get l0 // { arity: 4 } cte l2 = Distinct project=[#0{c_custkey}] // { arity: 1 } Project (#0{c_custkey}) // { arity: 1 } Get l1 // { arity: 3 } Return // { arity: 3 } Reduce group_by=[substr(char_to_text(#0{c_phone}), 1, 2)] aggregates=[count(*), sum(#1{c_acctbal})] // { arity: 3 } - Project (#1{c_acctbal}, #2) // { arity: 2 } + Project (#1{c_phone}, #2{c_acctbal}) // { arity: 2 } Join on=(#0{c_custkey} = #3{c_custkey}) type=differential // { arity: 4 } implementation %0:l1[#0]K » %1[#0]K @@ -1846,7 +1847,7 @@ Explained Query: Get l2 // { arity: 1 } ArrangeBy keys=[[#0{o_custkey}]] // { arity: 1 } Distinct project=[#0{o_custkey}] // { arity: 1 } - Project (#1) // { arity: 1 } + Project (#1{o_custkey}) // { arity: 1 } ReadIndex on=orders pk_orders_orderkey=[*** full scan ***] // { arity: 9 } Get l2 // { arity: 1 } diff --git a/test/sqllogictest/transactions.slt b/test/sqllogictest/transactions.slt index 57a6e269097ca..e3a3bf1822521 100644 --- a/test/sqllogictest/transactions.slt +++ b/test/sqllogictest/transactions.slt @@ -988,8 +988,9 @@ Explained Query: Filter (#2) IS NULL AND (#1 = "view") ReadStorage mz_internal.mz_comments cte l1 = - Filter (#2 = "u3") - ReadStorage mz_catalog.mz_views + Project (#0, #3) + Filter (#2 = "u3") + ReadStorage mz_catalog.mz_views Return Project (#0, #2) Map (coalesce(#1, "")) @@ -1000,13 +1001,12 @@ Explained Query: Project (#1) Join on=(#0 = #2) type=differential ArrangeBy keys=[[#0]] - Project (#0, #3) - Get l1 + Get l1 ArrangeBy keys=[[#0]] Distinct project=[#0] Project (#0) Get l0 - Project (#3) + Project (#1) Get l1 Project (#2, #3) Filter (#1 = "u3") diff --git a/test/sqllogictest/transform/join_fusion.slt b/test/sqllogictest/transform/join_fusion.slt index 0e86d23ed2de5..a4fe7499d6aaa 100644 --- a/test/sqllogictest/transform/join_fusion.slt +++ b/test/sqllogictest/transform/join_fusion.slt @@ -151,7 +151,8 @@ EXPLAIN WITH(arity, join implementations) SELECT MIN( o_orderkey ) Explained Query: With cte l0 = - ReadIndex on=materialize.public.lineitem fk_lineitem_orderkey=[lookup value=(38)] // { arity: 9 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.lineitem fk_lineitem_orderkey=[lookup value=(38)] // { arity: 9 } cte l1 = Reduce aggregates=[min(#0)] // { arity: 1 } Project (#1) // { arity: 1 } @@ -170,14 +171,13 @@ Explained Query: ReadIndex on=orders pk_orders_orderkey=[*** full scan ***] // { arity: 4 } ArrangeBy keys=[[#0]] // { arity: 1 } Union // { arity: 1 } - Project (#0) // { arity: 1 } - Get l0 // { arity: 9 } + Get l0 // { arity: 1 } Map (error("more than one record produced in subquery")) // { arity: 1 } Project () // { arity: 0 } Filter error("more than one record produced in subquery") AND (#0 > 1) // { arity: 1 } Reduce aggregates=[count(*)] // { arity: 1 } Project () // { arity: 0 } - Get l0 // { arity: 9 } + Get l0 // { arity: 1 } Return // { arity: 1 } Union // { arity: 1 } Get l1 // { arity: 1 } diff --git a/test/sqllogictest/transform/join_index.slt b/test/sqllogictest/transform/join_index.slt index bca551d722641..faa20b0fec499 100644 --- a/test/sqllogictest/transform/join_index.slt +++ b/test/sqllogictest/transform/join_index.slt @@ -741,7 +741,8 @@ UNION ALL Explained Query: With cte l0 = - ReadIndex on=materialize.public.big big_idx_a=[lookup value=(5)] // { arity: 15 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.big big_idx_a=[lookup value=(5)] // { arity: 15 } Return // { arity: 1 } Union // { arity: 1 } CrossJoin type=delta // { arity: 1 } @@ -757,9 +758,8 @@ Explained Query: ReadIndex on=big big_idx_a=[*** full scan ***] // { arity: 14 } ArrangeBy keys=[[]] // { arity: 0 } Project () // { arity: 0 } - Get l0 // { arity: 15 } - Project (#0) // { arity: 1 } - Get l0 // { arity: 15 } + Get l0 // { arity: 1 } + Get l0 // { arity: 1 } Used Indexes: - materialize.public.big_idx_a (*** full scan ***, lookup) diff --git a/test/sqllogictest/transform/literal_lifting.slt b/test/sqllogictest/transform/literal_lifting.slt index edb5f22cd0898..bfc0a8c3304df 100644 --- a/test/sqllogictest/transform/literal_lifting.slt +++ b/test/sqllogictest/transform/literal_lifting.slt @@ -1012,7 +1012,7 @@ WITH MUTUALLY RECURSIVE SELECT * FROM c0 UNION ALL SELECT * FROM c1 ---- Explained Query: - With Mutually Recursive + With cte l0 = Distinct project=[#0] // { arity: 1 } Union // { arity: 1 } @@ -1020,18 +1020,20 @@ Explained Query: ReadStorage materialize.public.t1 // { arity: 2 } Project (#1) // { arity: 1 } ReadStorage materialize.public.t1 // { arity: 2 } - cte l1 = - Map (42) // { arity: 2 } - Distinct project=[#0] // { arity: 1 } - Union // { arity: 1 } - Get l0 // { arity: 1 } - Project (#0) // { arity: 1 } - Get l1 // { arity: 2 } Return // { arity: 2 } - Union // { arity: 2 } - Map (42) // { arity: 2 } - Get l0 // { arity: 1 } - Get l1 // { arity: 2 } + With Mutually Recursive + cte l1 = + Map (42) // { arity: 2 } + Distinct project=[#0] // { arity: 1 } + Union // { arity: 1 } + Get l0 // { arity: 1 } + Project (#0) // { arity: 1 } + Get l1 // { arity: 2 } + Return // { arity: 2 } + Union // { arity: 2 } + Map (42) // { arity: 2 } + Get l0 // { arity: 1 } + Get l1 // { arity: 2 } Source materialize.public.t1 diff --git a/test/sqllogictest/transform/normalize_lets.slt b/test/sqllogictest/transform/normalize_lets.slt index db6754d01f29c..6e6aea033b8e8 100644 --- a/test/sqllogictest/transform/normalize_lets.slt +++ b/test/sqllogictest/transform/normalize_lets.slt @@ -542,3 +542,91 @@ where (~ (select "replication_factor" from mz_catalog.mz_clusters limit 1 offset end ) limit 117; + +## Ensure that we hoist WMR-invariant Let bindings, to avoid a `raw` modifier on arrangements +## that can be accessed through keys (and which do not otherwise require linear work). + +statement ok +create table potato (a TEXT, b TEXT); + +statement ok +create index on potato(a); + +## The only thing that needs to stay true about what follows is that `potato` is used only +## as indexed access, and has `raw = false` to avoid decanting its contents. +query T multiline +EXPLAIN PHYSICAL PLAN AS TEXT FOR WITH MUTUALLY RECURSIVE +walk(a TEXT, b TEXT) AS ( + SELECT a, b + FROM potato + WHERE a = 'russet' + + UNION + + SELECT potato.a, potato.b + FROM potato + INNER JOIN walk + ON potato.a = walk.b +) +select * from walk; +---- +Explained Query: + Return + Return + Get::PassArrangements l1 + raw=true + With Mutually Recursive + cte l1 = + ArrangeBy + input_key=[#0, #1] + raw=true + Reduce::Distinct + val_plan + project=() + key_plan=id + Union + Join::Linear + linear_stage[0] + lookup={ relation=0, key=[#0] } + stream={ key=[#0], thinning=() } + source={ relation=1, key=[#0] } + Get::PassArrangements l0 + raw=false + arrangements[0]={ key=[#0], permutation=id, thinning=(#1) } + types=[text?, text?] + ArrangeBy + raw=true + arrangements[0]={ key=[#0], permutation=id, thinning=() } + types=[text] + Constant + - ("russet") + Join::Linear + linear_stage[0] + lookup={ relation=1, key=[#0] } + stream={ key=[#0], thinning=(#1) } + source={ relation=0, key=[#0] } + Get::PassArrangements l0 + raw=false + arrangements[0]={ key=[#0], permutation=id, thinning=(#1) } + types=[text?, text?] + ArrangeBy + raw=true + arrangements[0]={ key=[#0], permutation=id, thinning=() } + types=[text] + Get::Collection l1 + project=(#1) + filter=((#1) IS NOT NULL) + raw=true + With + cte l0 = + Get::PassArrangements materialize.public.potato + raw=false + arrangements[0]={ key=[#0], permutation=id, thinning=(#1) } + types=[text?, text?] + +Used Indexes: + - materialize.public.potato_a_idx (differential join, lookup) + +Target cluster: quickstart + +EOF diff --git a/test/sqllogictest/transform/projection_lifting.slt b/test/sqllogictest/transform/projection_lifting.slt index f8dc41ebd2397..50a03ab6ee047 100644 --- a/test/sqllogictest/transform/projection_lifting.slt +++ b/test/sqllogictest/transform/projection_lifting.slt @@ -92,25 +92,27 @@ WITH MUTUALLY RECURSIVE SELECT * FROM triangle_cycles; ---- Explained Query: - With Mutually Recursive + With cte l0 = Filter (#0) IS NOT NULL AND (#1) IS NOT NULL ReadStorage materialize.public.edges - cte l1 = - Distinct project=[#0..=#2] - Union - Project (#1, #3, #0) - Join on=(#0 = #5 AND #1 = #2 AND #3 = #4) type=differential - ArrangeBy keys=[[#1]] - Get l0 - ArrangeBy keys=[[#0]] - Get l0 - ArrangeBy keys=[[#0, #1]] - Get l0 - Project (#2, #0, #1) - Get l1 Return - Get l1 + With Mutually Recursive + cte l1 = + Distinct project=[#0..=#2] + Union + Project (#1, #3, #0) + Join on=(#0 = #5 AND #1 = #2 AND #3 = #4) type=differential + ArrangeBy keys=[[#1]] + Get l0 + ArrangeBy keys=[[#0]] + Get l0 + ArrangeBy keys=[[#0, #1]] + Get l0 + Project (#2, #0, #1) + Get l1 + Return + Get l1 Source materialize.public.edges filter=((#0) IS NOT NULL AND (#1) IS NOT NULL) diff --git a/test/sqllogictest/transform/reduction_pushdown.slt b/test/sqllogictest/transform/reduction_pushdown.slt index 2f3ad8ddbba2a..d5349ddeb2e06 100644 --- a/test/sqllogictest/transform/reduction_pushdown.slt +++ b/test/sqllogictest/transform/reduction_pushdown.slt @@ -129,3 +129,88 @@ Source materialize.public.pk1 Target cluster: quickstart EOF + +## Regression test for https://github.com/MaterializeInc/database-issues/issues/9013 +query RRR +SELECT + (a1.f1) AS c1, + (a2.f1) AS c2, + (a1.f2) AS c3 +FROM ( + SELECT + a1.f2 AS f1, + a1.f2 + a1.f1 AS f2 + FROM pk1 AS a1 + ORDER BY 1, 2 + LIMIT 1 OFFSET 1 +) AS a1 +LEFT JOIN ( + SELECT a1.f2 AS f1 + FROM t2 AS a1 + ORDER BY 1 + LIMIT 1 OFFSET 7 +) AS a2 +ON (a1.f2 != 4) +WHERE + a2.f1 IS NULL + AND a1.f1 + a1.f2 = NULLIF(a1.f1, a2.f1) +GROUP BY 1, 2, 3; +---- + +query T multiline +EXPLAIN WITH (arity, humanized expressions) +SELECT + (a1.f1) AS c1, + (a2.f1) AS c2, + (a1.f2) AS c3 +FROM ( + SELECT + a1.f2 AS f1, + a1.f2 + a1.f1 AS f2 + FROM pk1 AS a1 + ORDER BY 1, 2 + LIMIT 1 OFFSET 1 +) AS a1 +LEFT JOIN ( + SELECT a1.f2 AS f1 + FROM t2 AS a1 + ORDER BY 1 + LIMIT 1 OFFSET 7 +) AS a2 +ON (a1.f2 != 4) +WHERE + a2.f1 IS NULL + AND a1.f1 + a1.f2 = NULLIF(a1.f1, a2.f1) +GROUP BY 1, 2, 3; +---- +Explained Query: + With + cte l0 = + Filter (#0{f2} = (#0{f2} + #1)) // { arity: 2 } + TopK order_by=[#0{f2} asc nulls_last, #1 asc nulls_last] limit=1 offset=1 // { arity: 2 } + Project (#1{f2}, #2) // { arity: 2 } + Map ((#1{f2} + #0{f1})) // { arity: 3 } + ReadStorage materialize.public.pk1 // { arity: 2 } + Return // { arity: 3 } + Project (#0{f2}, #3, #2) // { arity: 3 } + Map (null) // { arity: 4 } + Distinct project=[#0{f2}, (#0{f2} + #1), #1] // { arity: 3 } + Union // { arity: 2 } + Negate // { arity: 2 } + CrossJoin type=differential // { arity: 2 } + ArrangeBy keys=[[]] // { arity: 2 } + Filter (#1 != 4) // { arity: 2 } + Get l0 // { arity: 2 } + ArrangeBy keys=[[]] // { arity: 0 } + Project () // { arity: 0 } + TopK order_by=[#0{f2} asc nulls_last] limit=1 offset=7 // { arity: 1 } + Project (#1{f2}) // { arity: 1 } + ReadStorage materialize.public.t2 // { arity: 2 } + Get l0 // { arity: 2 } + +Source materialize.public.t2 +Source materialize.public.pk1 + +Target cluster: quickstart + +EOF diff --git a/test/sqllogictest/transform/redundant_join.slt b/test/sqllogictest/transform/redundant_join.slt index 83ad60cec5ad2..9185f0ca3f5e1 100644 --- a/test/sqllogictest/transform/redundant_join.slt +++ b/test/sqllogictest/transform/redundant_join.slt @@ -140,31 +140,28 @@ SELECT * FROM ( ); ---- Explained Query: - With Mutually Recursive + With cte l0 = Distinct project=[(#0 % 2)] // { arity: 1 } Project (#0) // { arity: 1 } ReadStorage materialize.public.t2 // { arity: 2 } - cte l1 = - Distinct project=[#0..=#2] // { arity: 3 } - Union // { arity: 3 } + Return // { arity: 1 } + With Mutually Recursive + cte l1 = + Distinct project=[#0..=#2] // { arity: 3 } + Union // { arity: 3 } + Get l1 // { arity: 3 } + Project (#0, #0, #0) // { arity: 3 } + Get l0 // { arity: 1 } + Map ((#0 % 2)) // { arity: 3 } + ReadStorage materialize.public.t2 // { arity: 2 } + Return // { arity: 1 } + Union // { arity: 1 } + Project (#2) // { arity: 1 } Get l1 // { arity: 3 } - Project (#0, #0, #0) // { arity: 3 } + Project (#1) // { arity: 1 } + Map (42) // { arity: 2 } Get l0 // { arity: 1 } - Join on=(#2 = (#0 % 2)) type=differential // { arity: 3 } - implementation - %1:l0[#0]UK » %0:t2[(#0 % 2)]K - ArrangeBy keys=[[(#0 % 2)]] // { arity: 2 } - ReadStorage materialize.public.t2 // { arity: 2 } - ArrangeBy keys=[[#0]] // { arity: 1 } - Get l0 // { arity: 1 } - Return // { arity: 1 } - Union // { arity: 1 } - Project (#2) // { arity: 1 } - Get l1 // { arity: 3 } - Project (#1) // { arity: 1 } - Map (42) // { arity: 2 } - Get l0 // { arity: 1 } Source materialize.public.t2 diff --git a/test/sqllogictest/transform/relation_cse.slt b/test/sqllogictest/transform/relation_cse.slt index 8bae59b34cf30..ca22ce8d1f180 100644 --- a/test/sqllogictest/transform/relation_cse.slt +++ b/test/sqllogictest/transform/relation_cse.slt @@ -136,17 +136,17 @@ EXPLAIN WITH(arity, join implementations) SELECT * FROM t1 AS a1 LEFT JOIN t1 AS Explained Query: With cte l0 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0, #1) // { arity: 2 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } Return // { arity: 3 } CrossJoin type=differential // { arity: 3 } implementation %0:l0[×]e » %1:l0[×]e ArrangeBy keys=[[]] // { arity: 2 } - Project (#0, #1) // { arity: 2 } - Get l0 // { arity: 3 } + Get l0 // { arity: 2 } ArrangeBy keys=[[]] // { arity: 1 } Project (#1) // { arity: 1 } - Get l0 // { arity: 3 } + Get l0 // { arity: 2 } Used Indexes: - materialize.public.i1 (lookup) @@ -202,18 +202,18 @@ EXPLAIN WITH(arity, join implementations) SELECT * FROM t1 WHERE f1 = (SELECT f1 Explained Query: With cte l0 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } cte l1 = ArrangeBy keys=[[#0]] // { arity: 1 } Union // { arity: 1 } - Project (#0) // { arity: 1 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } Map (error("more than one record produced in subquery")) // { arity: 1 } Project () // { arity: 0 } Filter (#0 > 1) // { arity: 1 } Reduce aggregates=[count(*)] // { arity: 1 } Project () // { arity: 0 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } Return // { arity: 2 } Project (#0, #1) // { arity: 2 } Join on=(#0 = #2 AND #1 = #3) type=delta // { arity: 4 } @@ -379,19 +379,18 @@ AND a2.f2 = 3 Explained Query: With cte l0 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0, #1) // { arity: 2 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } Return // { arity: 4 } CrossJoin type=differential // { arity: 4 } implementation %0:l0[×]ef » %1:l0[×]ef ArrangeBy keys=[[]] // { arity: 2 } - Project (#0, #1) // { arity: 2 } - Filter (#1 = 2) // { arity: 3 } - Get l0 // { arity: 3 } + Filter (#1 = 2) // { arity: 2 } + Get l0 // { arity: 2 } ArrangeBy keys=[[]] // { arity: 2 } - Project (#0, #1) // { arity: 2 } - Filter (#1 = 3) // { arity: 3 } - Get l0 // { arity: 3 } + Filter (#1 = 3) // { arity: 2 } + Get l0 // { arity: 2 } Used Indexes: - materialize.public.i1 (lookup) @@ -459,17 +458,17 @@ EXPLAIN WITH(arity, join implementations) SELECT (SELECT f1 FROM t1 WHERE f1 = 1 Explained Query: With cte l0 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } cte l1 = Union // { arity: 1 } - Project (#0) // { arity: 1 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } Map (error("more than one record produced in subquery")) // { arity: 1 } Project () // { arity: 0 } Filter (#0 > 1) // { arity: 1 } Reduce aggregates=[count(*)] // { arity: 1 } Project () // { arity: 0 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } Return // { arity: 2 } Project (#0, #0) // { arity: 2 } CrossJoin type=differential // { arity: 1 } @@ -503,17 +502,17 @@ EXPLAIN WITH(arity, join implementations) SELECT MIN((SELECT f1 FROM t1 WHERE f1 Explained Query: With cte l0 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } cte l1 = Union // { arity: 1 } - Project (#0) // { arity: 1 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } Map (error("more than one record produced in subquery")) // { arity: 1 } Project () // { arity: 0 } Filter (#0 > 1) // { arity: 1 } Reduce aggregates=[count(*)] // { arity: 1 } Project () // { arity: 0 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } cte l2 = ArrangeBy keys=[[]] // { arity: 1 } Union // { arity: 1 } @@ -566,14 +565,14 @@ EXPLAIN WITH(arity, join implementations) SELECT (SELECT f1 FROM t1 WHERE f1 = 1 Explained Query: With cte l0 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } cte l1 = Project () // { arity: 0 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } cte l2 = Union // { arity: 1 } - Project (#0) // { arity: 1 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } Map (error("more than one record produced in subquery")) // { arity: 1 } Project () // { arity: 0 } Filter (#0 > 1) // { arity: 1 } @@ -618,19 +617,17 @@ SELECT f1 FROM t1 WHERE f1 = 1 Explained Query: With cte l0 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } - cte l1 = Project (#0) // { arity: 1 } - Get l0 // { arity: 3 } - cte l2 = + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + cte l1 = Union // { arity: 1 } - Get l1 // { arity: 1 } + Get l0 // { arity: 1 } Map (error("more than one record produced in subquery")) // { arity: 1 } Project () // { arity: 0 } Filter (#0 > 1) // { arity: 1 } Reduce aggregates=[count(*)] // { arity: 1 } Project () // { arity: 0 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } Return // { arity: 1 } Union // { arity: 1 } CrossJoin type=differential // { arity: 1 } @@ -641,16 +638,16 @@ Explained Query: ReadIndex on=t1 i1=[*** full scan ***] // { arity: 2 } ArrangeBy keys=[[]] // { arity: 1 } Union // { arity: 1 } - Get l2 // { arity: 1 } + Get l1 // { arity: 1 } Map (null) // { arity: 1 } Union // { arity: 0 } Negate // { arity: 0 } Distinct project=[] // { arity: 0 } Project () // { arity: 0 } - Get l2 // { arity: 1 } + Get l1 // { arity: 1 } Constant // { arity: 0 } - () - Get l1 // { arity: 1 } + Get l0 // { arity: 1 } Used Indexes: - materialize.public.i1 (*** full scan ***, lookup) @@ -755,9 +752,11 @@ Explained Query: ArrangeBy keys=[[#0]] // { arity: 2 } ReadIndex on=t1 i1=[lookup] // { arity: 2 } cte l1 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } cte l2 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(2)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(2)] // { arity: 3 } Return // { arity: 1 } Union // { arity: 1 } CrossJoin type=differential // { arity: 1 } @@ -765,19 +764,17 @@ Explained Query: %0:l1[×]e » %1:l1[×]e ArrangeBy keys=[[]] // { arity: 0 } Project () // { arity: 0 } - Get l1 // { arity: 3 } + Get l1 // { arity: 1 } ArrangeBy keys=[[]] // { arity: 1 } - Project (#0) // { arity: 1 } - Get l1 // { arity: 3 } + Get l1 // { arity: 1 } CrossJoin type=differential // { arity: 1 } implementation %0:l2[×]e » %1:l2[×]e ArrangeBy keys=[[]] // { arity: 0 } Project () // { arity: 0 } - Get l2 // { arity: 3 } + Get l2 // { arity: 1 } ArrangeBy keys=[[]] // { arity: 1 } - Project (#0) // { arity: 1 } - Get l2 // { arity: 3 } + Get l2 // { arity: 1 } Used Indexes: - materialize.public.i1 (lookup) @@ -796,15 +793,15 @@ WHERE s1.f1 = 1 AND s2.f1 = 1 Explained Query: With cte l0 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } cte l1 = ArrangeBy keys=[[]] // { arity: 0 } Project () // { arity: 0 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } cte l2 = ArrangeBy keys=[[]] // { arity: 1 } - Project (#0) // { arity: 1 } - Get l0 // { arity: 3 } + Get l0 // { arity: 1 } Return // { arity: 2 } CrossJoin type=delta // { arity: 2 } implementation @@ -837,9 +834,11 @@ Explained Query: ArrangeBy keys=[[#0]] // { arity: 2 } ReadIndex on=t1 i1=[lookup] // { arity: 2 } cte l1 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } cte l2 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(2)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(2)] // { arity: 3 } Return // { arity: 2 } CrossJoin type=delta // { arity: 2 } implementation @@ -849,16 +848,14 @@ Explained Query: %3:l2 » %0:l1[×]e » %1:l1[×]e » %2:l2[×]e ArrangeBy keys=[[]] // { arity: 0 } Project () // { arity: 0 } - Get l1 // { arity: 3 } + Get l1 // { arity: 1 } ArrangeBy keys=[[]] // { arity: 1 } - Project (#0) // { arity: 1 } - Get l1 // { arity: 3 } + Get l1 // { arity: 1 } ArrangeBy keys=[[]] // { arity: 0 } Project () // { arity: 0 } - Get l2 // { arity: 3 } + Get l2 // { arity: 1 } ArrangeBy keys=[[]] // { arity: 1 } - Project (#0) // { arity: 1 } - Get l2 // { arity: 3 } + Get l2 // { arity: 1 } Used Indexes: - materialize.public.i1 (lookup) @@ -968,7 +965,8 @@ Explained Query: ArrangeBy keys=[[#0]] // { arity: 2 } ReadIndex on=t1 i1=[differential join, lookup] // { arity: 2 } cte l1 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } cte l2 = Project (#0, #1) // { arity: 2 } Join on=(#0 = #2) type=differential // { arity: 3 } @@ -977,14 +975,13 @@ Explained Query: Get l0 // { arity: 2 } ArrangeBy keys=[[#0]] // { arity: 1 } Union // { arity: 1 } - Project (#0) // { arity: 1 } - Get l1 // { arity: 3 } + Get l1 // { arity: 1 } Map (error("more than one record produced in subquery")) // { arity: 1 } Project () // { arity: 0 } Filter (#0 > 1) // { arity: 1 } Reduce aggregates=[count(*)] // { arity: 1 } Project () // { arity: 0 } - Get l1 // { arity: 3 } + Get l1 // { arity: 1 } Return // { arity: 2 } Union // { arity: 2 } Get l2 // { arity: 2 } @@ -1338,13 +1335,14 @@ SELECT f2 FROM t1 WHERE f1 = 1 Explained Query: With cte l0 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0, #1) // { arity: 2 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } Return // { arity: 1 } Union // { arity: 1 } Project (#0) // { arity: 1 } - Get l0 // { arity: 3 } + Get l0 // { arity: 2 } Project (#1) // { arity: 1 } - Get l0 // { arity: 3 } + Get l0 // { arity: 2 } Used Indexes: - materialize.public.i1 (lookup) @@ -1396,9 +1394,11 @@ Explained Query: ArrangeBy keys=[[#0]] // { arity: 2 } ReadIndex on=t1 i1=[lookup] // { arity: 2 } cte l1 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(1)] // { arity: 3 } cte l2 = - ReadIndex on=materialize.public.t1 i1=[lookup value=(2)] // { arity: 3 } + Project (#0) // { arity: 1 } + ReadIndex on=materialize.public.t1 i1=[lookup value=(2)] // { arity: 3 } Return // { arity: 1 } Union // { arity: 1 } CrossJoin type=differential // { arity: 1 } @@ -1406,19 +1406,17 @@ Explained Query: %0:l1[×]e » %1:l1[×]e ArrangeBy keys=[[]] // { arity: 0 } Project () // { arity: 0 } - Get l1 // { arity: 3 } + Get l1 // { arity: 1 } ArrangeBy keys=[[]] // { arity: 1 } - Project (#0) // { arity: 1 } - Get l1 // { arity: 3 } + Get l1 // { arity: 1 } CrossJoin type=differential // { arity: 1 } implementation %0:l2[×]e » %1:l2[×]e ArrangeBy keys=[[]] // { arity: 0 } Project () // { arity: 0 } - Get l2 // { arity: 3 } + Get l2 // { arity: 1 } ArrangeBy keys=[[]] // { arity: 1 } - Project (#0) // { arity: 1 } - Get l2 // { arity: 3 } + Get l2 // { arity: 1 } Used Indexes: - materialize.public.i1 (lookup) @@ -1580,7 +1578,7 @@ UNION ALL SELECT * FROM c2 WHERE f1 > 7 ---- Explained Query: - With Mutually Recursive + With cte l0 = Union // { arity: 2 } ReadIndex on=t1 i1=[*** full scan ***] // { arity: 2 } @@ -1588,28 +1586,30 @@ Explained Query: cte l1 = Filter (#1 > 7) // { arity: 2 } Get l0 // { arity: 2 } - cte l2 = - Union // { arity: 2 } - Get l1 // { arity: 2 } - Get l2 // { arity: 2 } - Get l2 // { arity: 2 } - Get l3 // { arity: 2 } - Get l3 // { arity: 2 } - cte l3 = - Union // { arity: 2 } - Get l1 // { arity: 2 } - Get l2 // { arity: 2 } - Get l2 // { arity: 2 } - Get l3 // { arity: 2 } - Get l3 // { arity: 2 } Return // { arity: 2 } - Union // { arity: 2 } - Filter (#0 > 7) // { arity: 2 } - Get l0 // { arity: 2 } - Filter (#0 > 7) // { arity: 2 } - Get l2 // { arity: 2 } - Filter (#0 > 7) // { arity: 2 } - Get l3 // { arity: 2 } + With Mutually Recursive + cte l2 = + Union // { arity: 2 } + Get l1 // { arity: 2 } + Get l2 // { arity: 2 } + Get l2 // { arity: 2 } + Get l3 // { arity: 2 } + Get l3 // { arity: 2 } + cte l3 = + Union // { arity: 2 } + Get l1 // { arity: 2 } + Get l2 // { arity: 2 } + Get l2 // { arity: 2 } + Get l3 // { arity: 2 } + Get l3 // { arity: 2 } + Return // { arity: 2 } + Union // { arity: 2 } + Filter (#0 > 7) // { arity: 2 } + Get l0 // { arity: 2 } + Filter (#0 > 7) // { arity: 2 } + Get l2 // { arity: 2 } + Filter (#0 > 7) // { arity: 2 } + Get l3 // { arity: 2 } Used Indexes: - materialize.public.i1 (*** full scan ***) diff --git a/test/terraform/aws-persistent/main.tf b/test/terraform/aws-persistent/main.tf new file mode 100644 index 0000000000000..3b94f379611da --- /dev/null +++ b/test/terraform/aws-persistent/main.tf @@ -0,0 +1,134 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +provider "aws" { + region = "us-east-1" +} + +resource "random_password" "db_password" { + length = 32 + special = false +} + +variable "operator_version" { + type = string + default = "v25.2.0-beta.1" +} + +variable "orchestratord_version" { + type = string + default = "v0.130.3" +} + +module "materialize_infrastructure" { + source = "git::https://github.com/MaterializeInc/terraform-aws-materialize.git?ref=v0.2.5" + + # Basic settings + namespace = "aws-persistent" + environment = "dev" + install_materialize_operator = true + use_local_chart = true + helm_chart = "materialize-operator-v25.2.0-beta.1.tgz" + operator_version = var.operator_version + orchestratord_version = var.orchestratord_version + + # TODO: Doesn't seem to work yet + # helm_values = { + # operator = { + # clusters = { + # defaultReplicationFactor = { + # system = 1 + # probe = 1 + # support = 1 + # analytics = 1 + # } + # } + # } + # } + + # VPC Configuration + vpc_cidr = "10.0.0.0/16" + availability_zones = ["us-east-1a", "us-east-1b"] + private_subnet_cidrs = ["10.0.1.0/24", "10.0.2.0/24"] + public_subnet_cidrs = ["10.0.101.0/24", "10.0.102.0/24"] + single_nat_gateway = true + + # EKS Configuration + cluster_version = "1.31" + node_group_instance_types = ["r8g.2xlarge"] + node_group_desired_size = 2 + node_group_min_size = 1 + node_group_max_size = 3 + node_group_capacity_type = "ON_DEMAND" + + # Storage Configuration + bucket_force_destroy = true + + # For testing purposes, we are disabling encryption and versioning to allow for easier cleanup + # This should be enabled in production environments for security and data integrity + enable_bucket_versioning = false + enable_bucket_encryption = false + + # Database Configuration + database_password = random_password.db_password.result + postgres_version = "15" + db_instance_class = "db.t3.micro" + db_allocated_storage = 20 + database_name = "materialize" + database_username = "materialize" + db_multi_az = false + + # Basic monitoring + enable_monitoring = true + metrics_retention_days = 30 + + # Tags + tags = { + Environment = "dev" + Project = "aws-persistent" + Terraform = "true" + } +} + +# Generate random suffix for unique S3 bucket name +resource "random_id" "suffix" { + byte_length = 4 +} + +# outputs.tf +output "eks_cluster_endpoint" { + description = "EKS cluster endpoint" + value = module.materialize_infrastructure.eks_cluster_endpoint +} + +output "database_endpoint" { + description = "RDS instance endpoint" + value = module.materialize_infrastructure.database_endpoint +} + +output "s3_bucket_name" { + description = "Name of the S3 bucket" + value = module.materialize_infrastructure.s3_bucket_name +} + +output "materialize_s3_role_arn" { + description = "The ARN of the IAM role for Materialize" + value = module.materialize_infrastructure.materialize_s3_role_arn +} + +output "metadata_backend_url" { + description = "PostgreSQL connection URL in the format required by Materialize" + value = module.materialize_infrastructure.metadata_backend_url + sensitive = true +} + +output "persist_backend_url" { + description = "S3 connection URL in the format required by Materialize using IRSA" + value = module.materialize_infrastructure.persist_backend_url +} diff --git a/test/terraform/aws-persistent/simple.tf b/test/terraform/aws-persistent/simple.tf new file mode 100644 index 0000000000000..971001cd570dc --- /dev/null +++ b/test/terraform/aws-persistent/simple.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.76.0" + } + random = { + source = "hashicorp/random" + version = ">= 3.0" + } + } +} diff --git a/test/terraform/aws-temporary/main.tf b/test/terraform/aws-temporary/main.tf new file mode 100644 index 0000000000000..e776df467d80e --- /dev/null +++ b/test/terraform/aws-temporary/main.tf @@ -0,0 +1,136 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +provider "aws" { + region = "us-east-1" +} + +resource "random_password" "db_password" { + length = 32 + special = false +} + +variable "operator_version" { + type = string + default = "v25.2.0-beta.1.tgz" +} + +variable "orchestratord_version" { + type = string + default = "v0.130.3" +} + +module "materialize_infrastructure" { + source = "git::https://github.com/MaterializeInc/terraform-aws-materialize.git?ref=v0.2.5" + + # Basic settings + # The namespace and environment variables are used to construct the names of the resources + # e.g. ${namespace}-${environment}-eks and etc. + namespace = "aws-test" + environment = "dev" + install_materialize_operator = true + use_local_chart = true + helm_chart = "materialize-operator-v25.2.0-beta.1.tgz" + operator_version = var.operator_version + orchestratord_version = var.orchestratord_version + + # TODO: Doesn't seem to work yet + # helm_values = { + # operator = { + # clusters = { + # defaultReplicationFactor = { + # system = 1 + # probe = 1 + # support = 1 + # analytics = 1 + # } + # } + # } + # } + + # VPC Configuration + vpc_cidr = "10.0.0.0/16" + availability_zones = ["us-east-1a", "us-east-1b"] + private_subnet_cidrs = ["10.0.1.0/24", "10.0.2.0/24"] + public_subnet_cidrs = ["10.0.101.0/24", "10.0.102.0/24"] + single_nat_gateway = true + + # EKS Configuration + cluster_version = "1.31" + node_group_instance_types = ["r8g.2xlarge"] + node_group_desired_size = 2 + node_group_min_size = 1 + node_group_max_size = 3 + node_group_capacity_type = "ON_DEMAND" + + # Storage Configuration + bucket_force_destroy = true + + # For testing purposes, we are disabling encryption and versioning to allow for easier cleanup + # This should be enabled in production environments for security and data integrity + enable_bucket_versioning = false + enable_bucket_encryption = false + + # Database Configuration + database_password = random_password.db_password.result + postgres_version = "15" + db_instance_class = "db.t3.micro" + db_allocated_storage = 20 + database_name = "materialize" + database_username = "materialize" + db_multi_az = false + + # Basic monitoring + enable_monitoring = true + metrics_retention_days = 7 + + # Tags + tags = { + Environment = "dev" + Project = "aws-test" + Terraform = "true" + } +} + +# Generate random suffix for unique S3 bucket name +resource "random_id" "suffix" { + byte_length = 4 +} + +# outputs.tf +output "eks_cluster_endpoint" { + description = "EKS cluster endpoint" + value = module.materialize_infrastructure.eks_cluster_endpoint +} + +output "database_endpoint" { + description = "RDS instance endpoint" + value = module.materialize_infrastructure.database_endpoint +} + +output "s3_bucket_name" { + description = "Name of the S3 bucket" + value = module.materialize_infrastructure.s3_bucket_name +} + +output "materialize_s3_role_arn" { + description = "The ARN of the IAM role for Materialize" + value = module.materialize_infrastructure.materialize_s3_role_arn +} + +output "metadata_backend_url" { + description = "PostgreSQL connection URL in the format required by Materialize" + value = module.materialize_infrastructure.metadata_backend_url + sensitive = true +} + +output "persist_backend_url" { + description = "S3 connection URL in the format required by Materialize using IRSA" + value = module.materialize_infrastructure.persist_backend_url +} diff --git a/test/terraform/aws-temporary/simple.tf b/test/terraform/aws-temporary/simple.tf new file mode 100644 index 0000000000000..971001cd570dc --- /dev/null +++ b/test/terraform/aws-temporary/simple.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.76.0" + } + random = { + source = "hashicorp/random" + version = ">= 3.0" + } + } +} diff --git a/test/terraform/aws/main.tf b/test/terraform/aws/main.tf index 703491507aa99..86d1721685239 100644 --- a/test/terraform/aws/main.tf +++ b/test/terraform/aws/main.tf @@ -11,15 +11,45 @@ provider "aws" { region = "us-east-1" } +resource "random_password" "db_password" { + length = 32 + special = false +} + +variable "operator_version" { + type = string + default = "v25.2.0-beta.1" +} + +variable "orchestratord_version" { + type = string + default = "v0.130.3" +} + module "materialize_infrastructure" { - source = "git::https://github.com/MaterializeInc/terraform-aws-materialize.git?ref=v0.1.4" + source = "git::https://github.com/MaterializeInc/terraform-aws-materialize.git?ref=v0.2.5" # Basic settings environment = "dev" - vpc_name = "terraform-aws-test-vpc" - cluster_name = "terraform-aws-test-cluster" - mz_iam_service_account_name = "terraform-aws-test-user" - mz_iam_role_name = "terraform-aws-test-s3-role" + install_materialize_operator = true + use_local_chart = true + helm_chart = "materialize-operator-v25.2.0-beta.1.tgz" + operator_version = var.operator_version + orchestratord_version = var.orchestratord_version + + # TODO: Doesn't seem to work yet + # helm_values = { + # operator = { + # clusters = { + # defaultReplicationFactor = { + # system = 1 + # probe = 1 + # support = 1 + # analytics = 1 + # } + # } + # } + # } # VPC Configuration vpc_cidr = "10.0.0.0/16" @@ -30,7 +60,7 @@ module "materialize_infrastructure" { # EKS Configuration cluster_version = "1.31" - node_group_instance_types = ["c7a.2xlarge"] + node_group_instance_types = ["r8g.2xlarge"] node_group_desired_size = 2 node_group_min_size = 1 node_group_max_size = 3 diff --git a/test/terraform/azure-temporary/main.tf b/test/terraform/azure-temporary/main.tf new file mode 100644 index 0000000000000..75a2ebc908a86 --- /dev/null +++ b/test/terraform/azure-temporary/main.tf @@ -0,0 +1,113 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +provider "azurerm" { + subscription_id = "9bc1ad3f-3401-42a3-99cd-7faeeb51e059" + + features { + resource_group { + prevent_deletion_if_contains_resources = false + } + key_vault { + purge_soft_delete_on_destroy = true + recover_soft_deleted_key_vaults = false + } + + } +} + +resource "random_password" "pass" { + length = 20 + special = false +} + +resource "azurerm_resource_group" "materialize" { + name = "tf-test-rg" + location = "eastus2" + tags = {} +} + +module "materialize" { + # TODO: Use ref when v0.1.3 is released + # source = "git::https://github.com/MaterializeInc/terraform-azurerm-materialize.git?ref=v0.1.3" + source = "git::https://github.com/MaterializeInc/terraform-azurerm-materialize.git?ref=c751b1f1345c961156b9253622b27774dd1d8d93" + resource_group_name = azurerm_resource_group.materialize.name + location = "eastus2" + prefix = "tf-test" + install_materialize_operator = true + + materialize_instances = var.materialize_instances + + database_config = { + sku_name = "GP_Standard_D2s_v3" + version = "15" + password = random_password.pass.result + } + + tags = { + environment = "dev" + managed_by = "terraform" + } + + providers = { + azurerm = azurerm + } +} + +variable "location" { + description = "Azure region" + type = string + default = "eastus2" +} + +variable "tags" { + description = "Tags to apply to resources" + type = map(string) + default = {} +} + +variable "materialize_instances" { + description = "Configuration for Materialize instances" + type = list(object({ + name = string + namespace = optional(string) + database_name = string + cpu_request = optional(string, "1") + memory_request = optional(string, "1Gi") + memory_limit = optional(string, "1Gi") + create_database = optional(bool, true) + in_place_rollout = optional(bool, false) + request_rollout = optional(string) + force_rollout = optional(string) + })) + default = [] +} + +# Output the Materialize instance details +output "aks_cluster" { + description = "AKS cluster details" + value = module.materialize.aks_cluster + sensitive = true +} + +output "connection_strings" { + description = "Connection strings for Materialize" + value = module.materialize.connection_strings + sensitive = true +} + +output "kube_config" { + description = "The kube_config for the AKS cluster" + value = module.materialize.kube_config + sensitive = true +} + +output "resource_group_name" { + value = azurerm_resource_group.materialize.name +} diff --git a/test/terraform/azure-temporary/requirements.txt b/test/terraform/azure-temporary/requirements.txt new file mode 100644 index 0000000000000..4970d5fc4df02 --- /dev/null +++ b/test/terraform/azure-temporary/requirements.txt @@ -0,0 +1,5 @@ +azure-cli==2.69.0 +azure-mgmt-storage==21.2.0 +azure-storage-blob==12.24.1 +azure-identity==1.19.0 +azure-keyvault==4.2.0 diff --git a/test/terraform/azure-temporary/simple.tf b/test/terraform/azure-temporary/simple.tf new file mode 100644 index 0000000000000..8917d7bef2a50 --- /dev/null +++ b/test/terraform/azure-temporary/simple.tf @@ -0,0 +1,18 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = ">= 3.75.0" + } + azuread = { + source = "hashicorp/azuread" + version = ">= 2.45.0" + } + random = { + source = "hashicorp/random" + version = ">= 3.1.0" + } + } +} diff --git a/test/terraform/gcp-temporary/main.tf b/test/terraform/gcp-temporary/main.tf new file mode 100644 index 0000000000000..d847f6c76ac7a --- /dev/null +++ b/test/terraform/gcp-temporary/main.tf @@ -0,0 +1,92 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +terraform { + required_version = ">= 1.0" + + required_providers { + google = { + source = "hashicorp/google" + version = ">= 6.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} + +module "materialize" { + source = "github.com/MaterializeInc/terraform-google-materialize?ref=v0.1.1" + + project_id = var.project_id + region = var.region + prefix = "tf-gcp-test" + + database_config = { + tier = "db-custom-2-4096" + version = "POSTGRES_15" + password = var.database_password + } + + labels = { + environment = "simple" + example = "true" + } + + install_materialize_operator = true + + helm_values = { + clusters = { + defaultReplicationFactor = { + system = 1 + probe = 1 + support = 1 + analytics = 1 + } + } + } +} + +variable "project_id" { + description = "GCP Project ID" + type = string + default = "materialize-ci" +} + +variable "region" { + description = "GCP Region" + type = string + default = "us-east1" +} + +variable "database_password" { + description = "Password for Cloud SQL database user" + default = "your-strong-password" + type = string + sensitive = true +} + +output "gke_cluster" { + description = "GKE cluster details" + value = module.materialize.gke_cluster + sensitive = true +} + +output "service_accounts" { + description = "Service account details" + value = module.materialize.service_accounts +} + +output "connection_strings" { + description = "Connection strings for metadata and persistence backends" + value = module.materialize.connection_strings + sensitive = true +} diff --git a/test/terraform/gcp/main.tf b/test/terraform/gcp/main.tf new file mode 100644 index 0000000000000..d847f6c76ac7a --- /dev/null +++ b/test/terraform/gcp/main.tf @@ -0,0 +1,92 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +terraform { + required_version = ">= 1.0" + + required_providers { + google = { + source = "hashicorp/google" + version = ">= 6.0" + } + } +} + +provider "google" { + project = var.project_id + region = var.region +} + +module "materialize" { + source = "github.com/MaterializeInc/terraform-google-materialize?ref=v0.1.1" + + project_id = var.project_id + region = var.region + prefix = "tf-gcp-test" + + database_config = { + tier = "db-custom-2-4096" + version = "POSTGRES_15" + password = var.database_password + } + + labels = { + environment = "simple" + example = "true" + } + + install_materialize_operator = true + + helm_values = { + clusters = { + defaultReplicationFactor = { + system = 1 + probe = 1 + support = 1 + analytics = 1 + } + } + } +} + +variable "project_id" { + description = "GCP Project ID" + type = string + default = "materialize-ci" +} + +variable "region" { + description = "GCP Region" + type = string + default = "us-east1" +} + +variable "database_password" { + description = "Password for Cloud SQL database user" + default = "your-strong-password" + type = string + sensitive = true +} + +output "gke_cluster" { + description = "GKE cluster details" + value = module.materialize.gke_cluster + sensitive = true +} + +output "service_accounts" { + description = "Service account details" + value = module.materialize.service_accounts +} + +output "connection_strings" { + description = "Connection strings for metadata and persistence backends" + value = module.materialize.connection_strings + sensitive = true +} diff --git a/test/terraform/mzcompose.py b/test/terraform/mzcompose.py index 1d851b92d5b2d..a4545e0db56c0 100644 --- a/test/terraform/mzcompose.py +++ b/test/terraform/mzcompose.py @@ -11,6 +11,7 @@ """ import argparse +import json import os import signal import subprocess @@ -31,7 +32,71 @@ from materialize.mzcompose.services.testdrive import Testdrive SERVICES = [ - Testdrive(), # Overridden below + Testdrive(), # overridden below +] + + +COMPATIBLE_TESTDRIVE_FILES = [ + "array.td", + "cancel-subscribe.td", + "char-varchar-distinct.td", + "char-varchar-joins.td", + "char-varchar-multibyte.td", + "constants.td", + "coordinator-multiplicities.td", + "create-views.td", + "date_func.td", + "decimal-distinct.td", + "decimal-join.td", + "decimal-order.td", + "decimal-overflow.td", + "decimal-sum.td", + "decimal-zero.td", + "delete-using.td", + "drop.td", + "duplicate-table-names.td", + "failpoints.td", + "fetch-tail-as-of.td", + "fetch-tail-large-diff.td", + "fetch-tail-limit-timeout.td", + "fetch-tail-timestamp-zero.td", + "fetch-timeout.td", + "float_sum.td", + "get-started.td", + "github-11563.td", + "github-1947.td", + "github-3281.td", + "github-5502.td", + "github-5774.td", + "github-5873.td", + "github-5983.td", + "github-5984.td", + "github-6335.td", + "github-6744.td", + "github-6950.td", + "github-7171.td", + "github-7191.td", + "github-795.td", + "joins.td", + "jsonb.td", + "list.td", + "logging.td", + "map.td", + "multijoins.td", + "numeric-sum.td", + "numeric.td", + "oid.td", + "orms.td", + "pg-catalog.td", + "runtime-errors.td", + "search_path.td", + "self-test.td", + "string.td", + "subquery-scalar-errors.td", + "system-functions.td", + "test-skip-if.td", + "type_char_quoted.td", + "version.td", ] @@ -39,15 +104,1011 @@ def run_ignore_error( args: Sequence[Path | str], cwd: Path | None = None, stdin: None | int | IO[bytes] | bytes = None, + env: dict[str, str] | None = None, ): try: - spawn.runv(args, cwd=cwd, stdin=stdin) + spawn.runv(args, cwd=cwd, stdin=stdin, env=env) except subprocess.CalledProcessError: pass -def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: - """To run locally use `aws sso login` first.""" +def testdrive(no_reset: bool) -> Testdrive: + return Testdrive( + materialize_url="postgres://materialize@127.0.0.1:6875/materialize", + materialize_url_internal="postgres://mz_system:materialize@127.0.0.1:6877/materialize", + materialize_use_https=True, + no_consistency_checks=True, + network_mode="host", + volume_workdir="../testdrive:/workdir", + no_reset=no_reset, + # For full testdrive support we'll need: + # kafka_url=... + # schema_registry_url=... + # aws_endpoint=... + ) + + +def get_tag(tag: str) -> str: + return tag or f"v{ci_util.get_mz_version()}--pr.g{git.rev_parse('HEAD')}" + + +def mz_self_managed_debug(env: dict[str, str] | None = None) -> None: + run_ignore_error( + [ + "cargo", + "run", + "--bin", + "mz-self-managed-debug", + "--", + "--k8s-namespace", + "materialize-environment", + "--k8s-namespace", + "materialize", + ], + cwd=MZ_ROOT, + env=env, + ) + + +class AWS: + materialize_environment: dict | None + path: Path + environmentd_port_forward_process: subprocess.Popen[bytes] | None + balancerd_port_forward_process: subprocess.Popen[bytes] | None + + def __init__(self, path: Path): + self.materialize_environment = None + self.path = path + self.environmentd_port_forward_process = None + self.balancerd_port_forward_process = None + + def setup(self, prefix: str, setup: bool, tag: str) -> None: + if not setup: + spawn.runv( + [ + "aws", + "eks", + "update-kubeconfig", + "--name", + f"{prefix}-dev-eks", + "--region", + "us-east-1", + ] + ) + return + + vars = [ + "-var", + "operator_version=v25.2.0-beta.1", + "-var", + f"orchestratord_version={tag}", + ] + + print("--- Setup") + spawn.runv( + ["helm", "package", "../../../misc/helm-charts/operator/"], cwd=self.path + ) + spawn.runv(["terraform", "init"], cwd=self.path) + spawn.runv(["terraform", "validate"], cwd=self.path) + spawn.runv(["terraform", "plan", *vars], cwd=self.path) + spawn.runv(["terraform", "apply", "-auto-approve", *vars], cwd=self.path) + + metadata_backend_url = spawn.capture( + ["terraform", "output", "-raw", "metadata_backend_url"], cwd=self.path + ).strip() + persist_backend_url = spawn.capture( + ["terraform", "output", "-raw", "persist_backend_url"], cwd=self.path + ).strip() + spawn.capture( + ["terraform", "output", "-raw", "materialize_s3_role_arn"], cwd=self.path + ).strip() + + spawn.runv( + [ + "aws", + "eks", + "update-kubeconfig", + "--name", + f"{prefix}-dev-eks", + "--region", + "us-east-1", + ] + ) + + spawn.runv(["kubectl", "get", "nodes"]) + # Not working yet? + # spawn.runv( + # ["helm", "repo", "add", "openebs", "https://openebs.github.io/openebs"] + # ) + # spawn.runv(["helm", "repo", "update"]) + # spawn.runv( + # [ + # "helm", + # "install", + # "openebs", + # "--namespace", + # "openebs", + # "openebs/openebs", + # "--set", + # "engines.replicated.mayastor.enabled=false", + # "--create-namespace", + # ] + # ) + # spawn.runv( + # ["kubectl", "get", "pods", "-n", "openebs", "-l", "role=openebs-lvm"] + # ) + + spawn.capture( + [ + "aws", + "sts", + "get-caller-identity", + "--query", + "Account", + "--output", + "text", + ] + ).strip() + + for i in range(60): + try: + spawn.runv( + ["kubectl", "get", "pods", "-n", "materialize"], + cwd=self.path, + ) + status = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-n", + "materialize", + "-o", + "jsonpath={.items[0].status.phase}", + ], + cwd=self.path, + ) + if status == "Running": + break + except subprocess.CalledProcessError: + time.sleep(1) + else: + raise ValueError("Never completed") + + spawn.runv(["kubectl", "create", "namespace", "materialize-environment"]) + + materialize_backend_secret = { + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": "materialize-backend", + "namespace": "materialize-environment", + }, + "stringData": { + "metadata_backend_url": metadata_backend_url, + "persist_backend_url": persist_backend_url, + }, + } + + spawn.runv( + ["kubectl", "apply", "-f", "-"], + cwd=self.path, + stdin=yaml.dump(materialize_backend_secret).encode(), + ) + + self.materialize_environment = { + "apiVersion": "materialize.cloud/v1alpha1", + "kind": "Materialize", + "metadata": { + "name": "12345678-1234-1234-1234-123456789012", + "namespace": "materialize-environment", + }, + "spec": { + "environmentdImageRef": f"materialize/environmentd:{tag}", + "environmentdResourceRequirements": { + "limits": {"memory": "4Gi"}, + "requests": {"cpu": "2", "memory": "4Gi"}, + }, + "balancerdResourceRequirements": { + "limits": {"memory": "256Mi"}, + "requests": {"cpu": "100m", "memory": "256Mi"}, + }, + "backendSecretName": "materialize-backend", + }, + } + + spawn.runv( + ["kubectl", "apply", "-f", "-"], + cwd=self.path, + stdin=yaml.dump(self.materialize_environment).encode(), + ) + for i in range(60): + try: + spawn.runv( + [ + "kubectl", + "get", + "materializes", + "-n", + "materialize-environment", + ], + cwd=self.path, + ) + break + except subprocess.CalledProcessError: + time.sleep(1) + else: + raise ValueError("Never completed") + for i in range(240): + try: + spawn.runv( + ["kubectl", "get", "pods", "-n", "materialize-environment"], + cwd=self.path, + ) + status = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-l", + "app=environmentd", + "-n", + "materialize-environment", + "-o", + "jsonpath={.items[0].status.phase}", + ], + cwd=self.path, + ) + if status == "Running": + break + except subprocess.CalledProcessError: + time.sleep(1) + else: + raise ValueError("Never completed") + + # Can take a while for balancerd to come up + for i in range(300): + try: + status = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-l", + "app=balancerd", + "-n", + "materialize-environment", + "-o", + "jsonpath={.items[0].status.phase}", + ], + cwd=self.path, + ) + if status == "Running": + break + except subprocess.CalledProcessError: + time.sleep(1) + else: + raise ValueError("Never completed") + + def connect(self, c: Composition) -> None: + environmentd_name = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-l", + "app=environmentd", + "-n", + "materialize-environment", + "-o", + "jsonpath={.items[*].metadata.name}", + ], + cwd=self.path, + ) + + balancerd_name = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-l", + "app=balancerd", + "-n", + "materialize-environment", + "-o", + "jsonpath={.items[*].metadata.name}", + ], + cwd=self.path, + ) + # error: arguments in resource/name form must have a single resource and name + print(f"Got balancerd name: {balancerd_name}") + + self.environmentd_port_forward_process = subprocess.Popen( + [ + "kubectl", + "port-forward", + f"pod/{environmentd_name}", + "-n", + "materialize-environment", + "6877:6877", + "6878:6878", + ], + preexec_fn=os.setpgrp, + ) + self.balancerd_port_forward_process = subprocess.Popen( + [ + "kubectl", + "port-forward", + f"pod/{balancerd_name}", + "-n", + "materialize-environment", + "6875:6875", + "6876:6876", + ], + preexec_fn=os.setpgrp, + ) + time.sleep(10) + + with psycopg.connect( + "postgres://mz_system:materialize@127.0.0.1:6877/materialize", + autocommit=True, + ) as conn: + with conn.cursor() as cur: + # Required for some testdrive tests + cur.execute("ALTER CLUSTER mz_system SET (REPLICATION FACTOR 1)") + + c.up("testdrive", persistent=True) + c.testdrive( + dedent( + """ + > SELECT 1 + 1 + """ + ) + ) + + def cleanup(self) -> None: + if self.environmentd_port_forward_process: + os.killpg( + os.getpgid(self.environmentd_port_forward_process.pid), signal.SIGTERM + ) + if self.balancerd_port_forward_process: + os.killpg( + os.getpgid(self.balancerd_port_forward_process.pid), signal.SIGTERM + ) + + def destroy(self) -> None: + print("--- Destroying") + if self.materialize_environment: + run_ignore_error( + ["kubectl", "delete", "-f", "-"], + cwd=self.path, + stdin=yaml.dump(self.materialize_environment).encode(), + ) + run_ignore_error( + [ + "kubectl", + "delete", + "materialize.materialize.cloud/12345678-1234-1234-1234-123456789012", + "-n" "materialize-environment", + ] + ) + run_ignore_error(["kubectl", "delete", "namespace", "materialize-environment"]) + run_ignore_error(["kubectl", "delete", "namespace", "materialize"]) + spawn.runv(["terraform", "destroy", "-auto-approve"], cwd=self.path) + + +def workflow_aws_temporary(c: Composition, parser: WorkflowArgumentParser) -> None: + """To run locally use `aws sso login` first.""" + parser.add_argument( + "--setup", + default=True, + action=argparse.BooleanOptionalAction, + help="Run setup steps", + ) + parser.add_argument( + "--cleanup", + default=True, + action=argparse.BooleanOptionalAction, + help="Destroy the region at the end of the workflow.", + ) + parser.add_argument( + "--tag", + type=str, + help="Custom version tag to use", + ) + parser.add_argument( + "files", + nargs="*", + default=[ + "array.td", + "cancel-subscribe.td", + "char-varchar-distinct.td", + "char-varchar-joins.td", + "char-varchar-multibyte.td", + "constants.td", + "coordinator-multiplicities.td", + "create-views.td", + "date_func.td", + "decimal-distinct.td", + "decimal-join.td", + "decimal-order.td", + "decimal-overflow.td", + "decimal-sum.td", + "decimal-zero.td", + "delete-using.td", + "drop.td", + "duplicate-table-names.td", + "failpoints.td", + "fetch-tail-as-of.td", + "fetch-tail-large-diff.td", + "fetch-tail-limit-timeout.td", + "fetch-tail-timestamp-zero.td", + "fetch-timeout.td", + "float_sum.td", + "get-started.td", + "github-11563.td", + "github-1947.td", + "github-3281.td", + "github-5502.td", + "github-5774.td", + "github-5873.td", + "github-5983.td", + "github-5984.td", + "github-6335.td", + "github-6744.td", + "github-6950.td", + "github-7171.td", + "github-7191.td", + "github-795.td", + "joins.td", + "jsonb.td", + "list.td", + "logging.td", + "map.td", + "multijoins.td", + "numeric-sum.td", + "numeric.td", + "oid.td", + "orms.td", + "pg-catalog.td", + "runtime-errors.td", + "search_path.td", + "self-test.td", + "string.td", + "subquery-scalar-errors.td", + "system-functions.td", + "test-skip-if.td", + "type_char_quoted.td", + "version.td", + ], + help="run against the specified files", + ) + + args = parser.parse_args() + + tag = get_tag(args.tag) + path = MZ_ROOT / "test" / "terraform" / "aws-temporary" + aws = AWS(path) + try: + aws.setup("aws-test", args.setup, tag) + print("--- Running tests") + with c.override(testdrive(no_reset=False)): + aws.connect(c) + + with psycopg.connect( + "postgres://materialize@127.0.0.1:6875/materialize" + ) as conn: + with conn.cursor() as cur: + cur.execute("SELECT 1") + results = cur.fetchall() + assert results == [(1,)], results + cur.execute("SELECT mz_version()") + version = cur.fetchall()[0][0] + assert version.startswith(tag.split("--")[0] + " ") + with open( + MZ_ROOT / "misc" / "helm-charts" / "operator" / "Chart.yaml" + ) as f: + content = yaml.load(f, Loader=yaml.Loader) + helm_chart_version = content["version"] + assert version.endswith( + f", helm chart: {helm_chart_version})" + ), f"Actual version: {version}, expected to contain {helm_chart_version}" + + c.run_testdrive_files(*args.files) + finally: + aws.cleanup() + + mz_self_managed_debug() + + if args.cleanup: + aws.destroy() + + +PATH_AWS_PERSISTENT = MZ_ROOT / "test" / "terraform" / "aws-persistent" +PREFIX_AWS_PERSISTENT = "aws-persistent" + + +def workflow_aws_persistent_setup( + c: Composition, parser: WorkflowArgumentParser +) -> None: + """Setup the AWS persistent Terraform and Helm Chart""" + parser.add_argument( + "--tag", + type=str, + help="Custom version tag to use", + ) + + args = parser.parse_args() + + tag = get_tag(args.tag) + aws = AWS(PATH_AWS_PERSISTENT) + try: + aws.setup(PREFIX_AWS_PERSISTENT, True, tag) + with c.override(testdrive(no_reset=True)): + aws.connect(c) + c.testdrive( + dedent( + """ + > CREATE SOURCE counter FROM LOAD GENERATOR COUNTER + > CREATE TABLE table (c INT) + > CREATE MATERIALIZED VIEW mv AS SELECT count(*) FROM table + """ + ) + ) + finally: + aws.cleanup() + + +def workflow_aws_persistent_test( + c: Composition, parser: WorkflowArgumentParser +) -> None: + """Run a test workload against the AWS persistent setup""" + parser.add_argument( + "--tag", + type=str, + help="Custom version tag to use", + ) + + parser.add_argument("--runtime", default=600, type=int, help="Runtime in seconds") + + args = parser.parse_args() + + start_time = time.time() + + tag = get_tag(args.tag) + aws = AWS(PATH_AWS_PERSISTENT) + try: + aws.setup(PREFIX_AWS_PERSISTENT, False, tag) + with c.override(testdrive(no_reset=True)): + aws.connect(c) + + count = 1 + + c.testdrive( + dedent( + """ + > DELETE FROM table + """ + ) + ) + + while time.time() - start_time < args.runtime: + c.testdrive( + dedent( + f""" + > SELECT 1 + 1 + + > INSERT INTO table VALUES ({count}) + + > SELECT count(*) FROM table + {count} + + > SELECT * FROM mv + {count} + + > DROP VIEW IF EXISTS temp + + > CREATE VIEW temp AS SELECT * FROM mv + + > SELECT * FROM temp + {count} + """ + ) + ) + + count += 1 + + with psycopg.connect( + "postgres://materialize@127.0.0.1:6875/materialize", autocommit=True + ) as conn: + with conn.cursor() as cur: + cur.execute("SELECT max(counter) FROM counter") + old_max = cur.fetchall()[0][0] + time.sleep(5) + with conn.cursor() as cur: + cur.execute("SELECT max(counter) FROM counter") + new_max = cur.fetchall()[0][0] + assert new_max > old_max, f"{new_max} should be greater than {old_max}" + finally: + aws.cleanup() + + +def workflow_aws_persistent_destroy( + c: Composition, parser: WorkflowArgumentParser +) -> None: + """Setup the AWS persistent Terraform and Helm Chart""" + aws = AWS(PATH_AWS_PERSISTENT) + aws.destroy() + + +def workflow_gcp_temporary(c: Composition, parser: WorkflowArgumentParser) -> None: + parser.add_argument( + "--setup", + default=True, + action=argparse.BooleanOptionalAction, + help="Run setup steps", + ) + parser.add_argument( + "--cleanup", + default=True, + action=argparse.BooleanOptionalAction, + help="Destroy the region at the end of the workflow.", + ) + parser.add_argument( + "--tag", + type=str, + help="Custom version tag to use", + ) + parser.add_argument( + "files", + nargs="*", + default=COMPATIBLE_TESTDRIVE_FILES, + help="run against the specified files", + ) + + args = parser.parse_args() + + tag = args.tag or f"v{ci_util.get_mz_version()}--pr.g{git.rev_parse('HEAD')}" + materialize_environment = None + environmentd_port_forward_process = None + balancerd_port_forward_process = None + + path = MZ_ROOT / "test" / "terraform" / "gcp-temporary" + + gcp_service_account_json = os.getenv("GCP_SERVICE_ACCOUNT_JSON") + assert ( + gcp_service_account_json + ), "GCP_SERVICE_ACCOUNT_JSON environment variable has to be set" + gcloud_creds_path = path / "gcp.json" + with open(gcloud_creds_path, "w") as f: + f.write(gcp_service_account_json) + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(gcloud_creds_path) + + try: + spawn.runv(["gcloud", "config", "set", "project", "materialize-ci"]) + + spawn.runv( + [ + "gcloud", + "auth", + "activate-service-account", + f"--key-file={gcloud_creds_path}", + ], + ) + + if args.setup: + print("--- Setup") + spawn.runv(["terraform", "init"], cwd=path) + spawn.runv(["terraform", "validate"], cwd=path) + spawn.runv(["terraform", "plan"], cwd=path) + spawn.runv(["terraform", "apply", "-auto-approve"], cwd=path) + + gke_cluster = json.loads( + spawn.capture( + ["terraform", "output", "-json", "gke_cluster"], cwd=path + ).strip() + ) + connection_strings = json.loads( + spawn.capture( + ["terraform", "output", "-json", "connection_strings"], cwd=path + ).strip() + ) + + spawn.runv( + [ + "gcloud", + "container", + "clusters", + "get-credentials", + gke_cluster["name"], + "--region", + gke_cluster["location"], + "--project", + "materialize-ci", + ] + ) + + spawn.runv(["kubectl", "get", "nodes"]) + + if args.setup: + for i in range(60): + try: + spawn.runv( + ["kubectl", "get", "pods", "-n", "materialize"], + cwd=path, + ) + status = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-n", + "materialize", + "-o", + "jsonpath={.items[0].status.phase}", + ], + cwd=path, + ) + if status == "Running": + break + except subprocess.CalledProcessError: + time.sleep(1) + else: + raise ValueError("Never completed") + + spawn.runv(["kubectl", "create", "namespace", "materialize-environment"]) + + materialize_backend_secret = { + "apiVersion": "v1", + "kind": "Secret", + "metadata": { + "name": "materialize-backend", + "namespace": "materialize-environment", + }, + "stringData": { + "metadata_backend_url": connection_strings["metadata_backend_url"], + "persist_backend_url": connection_strings["persist_backend_url"], + }, + } + + spawn.runv( + ["kubectl", "apply", "-f", "-"], + cwd=path, + stdin=yaml.dump(materialize_backend_secret).encode(), + ) + + materialize_environment = { + "apiVersion": "materialize.cloud/v1alpha1", + "kind": "Materialize", + "metadata": { + "name": "12345678-1234-1234-1234-123456789012", + "namespace": "materialize-environment", + }, + "spec": { + "environmentdImageRef": f"materialize/environmentd:{tag}", + "environmentdResourceRequirements": { + "limits": {"memory": "4Gi"}, + "requests": {"cpu": "2", "memory": "4Gi"}, + }, + "balancerdResourceRequirements": { + "limits": {"memory": "256Mi"}, + "requests": {"cpu": "100m", "memory": "256Mi"}, + }, + "backendSecretName": "materialize-backend", + }, + } + + spawn.runv( + ["kubectl", "apply", "-f", "-"], + cwd=path, + stdin=yaml.dump(materialize_environment).encode(), + ) + for i in range(60): + try: + spawn.runv( + [ + "kubectl", + "get", + "materializes", + "-n", + "materialize-environment", + ], + cwd=path, + ) + break + except subprocess.CalledProcessError: + time.sleep(1) + else: + raise ValueError("Never completed") + for i in range(180): + try: + spawn.runv( + ["kubectl", "get", "pods", "-n", "materialize-environment"], + cwd=path, + ) + status = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-l", + "app=environmentd", + "-n", + "materialize-environment", + "-o", + "jsonpath={.items[0].status.phase}", + ], + cwd=path, + ) + if status == "Running": + break + except subprocess.CalledProcessError: + time.sleep(1) + else: + raise ValueError("Never completed") + + # Can take a while for balancerd to come up + for i in range(240): + try: + status = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-l", + "app=balancerd", + "-n", + "materialize-environment", + "-o", + "jsonpath={.items[0].status.phase}", + ], + cwd=path, + ) + if status == "Running": + break + except subprocess.CalledProcessError: + time.sleep(1) + else: + raise ValueError("Never completed") + + print("--- Running tests") + environmentd_name = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-l", + "app=environmentd", + "-n", + "materialize-environment", + "-o", + "jsonpath={.items[*].metadata.name}", + ], + cwd=path, + ) + + balancerd_name = spawn.capture( + [ + "kubectl", + "get", + "pods", + "-l", + "app=balancerd", + "-n", + "materialize-environment", + "-o", + "jsonpath={.items[*].metadata.name}", + ], + cwd=path, + ) + # error: arguments in resource/name form must have a single resource and name + print(f"Got balancerd name: {balancerd_name}") + + environmentd_port_forward_process = subprocess.Popen( + [ + "kubectl", + "port-forward", + f"pod/{environmentd_name}", + "-n", + "materialize-environment", + "6877:6877", + "6878:6878", + ], + preexec_fn=os.setpgrp, + ) + balancerd_port_forward_process = subprocess.Popen( + [ + "kubectl", + "port-forward", + f"pod/{balancerd_name}", + "-n", + "materialize-environment", + "6875:6875", + "6876:6876", + ], + preexec_fn=os.setpgrp, + ) + time.sleep(10) + + with c.override( + Testdrive( + materialize_url="postgres://materialize@127.0.0.1:6875/materialize", + materialize_url_internal="postgres://mz_system:materialize@127.0.0.1:6877/materialize", + materialize_use_https=True, + no_consistency_checks=True, + network_mode="host", + volume_workdir="../testdrive:/workdir", + # For full testdrive support we'll need: + # kafka_url=... + # schema_registry_url=... + # aws_endpoint=... + ) + ): + c.up("testdrive", persistent=True) + c.testdrive( + dedent( + """ + > SELECT 1 + 1 + """ + ) + ) + + with psycopg.connect( + "postgres://materialize@127.0.0.1:6875/materialize" + ) as conn: + with conn.cursor() as cur: + cur.execute("SELECT 1") + results = cur.fetchall() + assert results == [(1,)], results + cur.execute("SELECT mz_version()") + version = cur.fetchall()[0][0] + assert version.startswith(tag.split("--")[0] + " ") + with open( + MZ_ROOT / "misc" / "helm-charts" / "operator" / "Chart.yaml" + ) as f: + content = yaml.load(f, Loader=yaml.Loader) + content["version"] + # TODO: Reenable when we can pass the helm-chart path in directly + # assert version.endswith( + # f", helm chart: {helm_chart_version})" + # ), f"Actual version: {version}, expected to contain {helm_chart_version}" + + c.run_testdrive_files(*args.files) + finally: + if environmentd_port_forward_process: + os.killpg(os.getpgid(environmentd_port_forward_process.pid), signal.SIGTERM) + if balancerd_port_forward_process: + os.killpg(os.getpgid(balancerd_port_forward_process.pid), signal.SIGTERM) + + mz_self_managed_debug() + + if args.cleanup: + print("--- Cleaning up") + if materialize_environment: + run_ignore_error( + ["kubectl", "delete", "-f", "-"], + cwd=path, + stdin=yaml.dump(materialize_environment).encode(), + ) + run_ignore_error( + [ + "kubectl", + "delete", + "materialize.materialize.cloud/12345678-1234-1234-1234-123456789012", + "-n" "materialize-environment", + ] + ) + run_ignore_error( + ["kubectl", "delete", "namespace", "materialize-environment"] + ) + run_ignore_error(["kubectl", "delete", "namespace", "materialize"]) + spawn.runv(["terraform", "destroy", "-auto-approve"], cwd=path) + + +def workflow_azure_temporary(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "--setup", default=True, @@ -68,68 +1129,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: parser.add_argument( "files", nargs="*", - default=[ - "array.td", - "cancel-subscribe.td", - "char-varchar-distinct.td", - "char-varchar-joins.td", - "char-varchar-multibyte.td", - "constants.td", - "coordinator-multiplicities.td", - "create-views.td", - "date_func.td", - "decimal-distinct.td", - "decimal-join.td", - "decimal-order.td", - "decimal-overflow.td", - "decimal-sum.td", - "decimal-zero.td", - "delete-using.td", - "drop.td", - "duplicate-table-names.td", - "failpoints.td", - "fetch-tail-as-of.td", - "fetch-tail-large-diff.td", - "fetch-tail-limit-timeout.td", - "fetch-tail-timestamp-zero.td", - "fetch-timeout.td", - "float_sum.td", - "get-started.td", - "github-11563.td", - "github-1947.td", - "github-3281.td", - "github-5502.td", - "github-5774.td", - "github-5873.td", - "github-5983.td", - "github-5984.td", - "github-6335.td", - "github-6744.td", - "github-6950.td", - "github-7171.td", - "github-7191.td", - "github-795.td", - "joins.td", - "jsonb.td", - "list.td", - "logging.td", - "map.td", - "multijoins.td", - "numeric-sum.td", - "numeric.td", - "oid.td", - "orms.td", - "pg-catalog.td", - "runtime-errors.td", - "search_path.td", - "self-test.td", - "string.td", - "subquery-scalar-errors.td", - "system-functions.td", - "test-skip-if.td", - "type_char_quoted.td", - "version.td", - ], + default=COMPATIBLE_TESTDRIVE_FILES, help="run against the specified files", ) @@ -140,121 +1140,99 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: environmentd_port_forward_process = None balancerd_port_forward_process = None - path = MZ_ROOT / "test" / "terraform" / "aws" - try: - if args.setup: - print("--- Setup") - spawn.runv(["terraform", "init"], cwd=path) - spawn.runv(["terraform", "validate"], cwd=path) - spawn.runv(["terraform", "plan"], cwd=path) - spawn.runv(["terraform", "apply", "-auto-approve"], cwd=path) + path = MZ_ROOT / "test" / "terraform" / "azure-temporary" - metadata_backend_url = spawn.capture( - ["terraform", "output", "-raw", "metadata_backend_url"], cwd=path - ).strip() - persist_backend_url = spawn.capture( - ["terraform", "output", "-raw", "persist_backend_url"], cwd=path - ).strip() - materialize_s3_role_arn = spawn.capture( - ["terraform", "output", "-raw", "materialize_s3_role_arn"], cwd=path - ).strip() + spawn.runv(["bin/ci-builder", "run", "stable", "uv", "venv", str(path / "venv")]) + venv_env = os.environ.copy() + venv_env["PATH"] = f"{path/'venv'/'bin'}:{os.getenv('PATH')}" + venv_env["VIRTUAL_ENV"] = str(path / "venv") + spawn.runv( + ["uv", "pip", "install", "-r", "requirements.txt", "--prerelease=allow"], + cwd=path, + env=venv_env, + ) + try: + if os.getenv("CI"): + username = os.getenv("AZURE_SERVICE_ACCOUNT_USERNAME") + password = os.getenv("AZURE_SERVICE_ACCOUNT_PASSWORD") + tenant = os.getenv("AZURE_SERVICE_ACCOUNT_TENANT") + assert username, "AZURE_SERVICE_ACCOUNT_USERNAME has to be set" + assert password, "AZURE_SERVICE_ACCOUNT_PASSWORD has to be set" + assert tenant, "AZURE_SERVICE_ACOUNT_TENANT has to be set" spawn.runv( [ - "aws", - "eks", - "update-kubeconfig", - "--name", - "terraform-aws-test-cluster", - "--region", - "us-east-1", - ] + "az", + "login", + "--service-principal", + "--username", + username, + "--password", + password, + "--tenant", + tenant, + ], + env=venv_env, ) - spawn.runv(["kubectl", "get", "nodes"]) - # Not working yet? - # spawn.runv( - # ["helm", "repo", "add", "openebs", "https://openebs.github.io/openebs"] - # ) - # spawn.runv(["helm", "repo", "update"]) - # spawn.runv( - # [ - # "helm", - # "install", - # "openebs", - # "--namespace", - # "openebs", - # "openebs/openebs", - # "--set", - # "engines.replicated.mayastor.enabled=false", - # "--create-namespace", - # ] - # ) - # spawn.runv( - # ["kubectl", "get", "pods", "-n", "openebs", "-l", "role=openebs-lvm"] - # ) - - aws_account_id = spawn.capture( - [ - "aws", - "sts", - "get-caller-identity", - "--query", - "Account", - "--output", - "text", - ] + if args.setup: + print("--- Setup") + spawn.runv(["terraform", "init"], cwd=path, env=venv_env) + spawn.runv(["terraform", "validate"], cwd=path, env=venv_env) + spawn.runv(["terraform", "plan"], cwd=path, env=venv_env) + spawn.runv(["terraform", "apply", "-auto-approve"], cwd=path, env=venv_env) + + aks_cluster = json.loads( + spawn.capture( + ["terraform", "output", "-json", "aks_cluster"], cwd=path, env=venv_env + ).strip() + ) + connection_strings = json.loads( + spawn.capture( + ["terraform", "output", "-json", "connection_strings"], + cwd=path, + env=venv_env, ).strip() - public_ip_address = spawn.capture( - ["curl", "http://checkip.amazonaws.com"] + ) + json.loads( + spawn.capture( + ["terraform", "output", "-json", "kube_config"], cwd=path, env=venv_env ).strip() + ) + resource_group_name = spawn.capture( + ["terraform", "output", "-raw", "resource_group_name"], + cwd=path, + env=venv_env, + ).strip() - materialize_values = { - "operator": { - "image": {"tag": tag}, - "cloudProvider": { - "type": "aws", - "region": "us-east-1", - "providers": { - "aws": { - "enabled": True, - "accountID": aws_account_id, - "iam": { - "roles": {"environment": materialize_s3_role_arn} - }, - } - }, - }, - }, - "rbac": {"enabled": False}, - "networkPolicies": { - "enabled": True, - "egress": {"enabled": True, "cidrs": ["0.0.0.0/0"]}, - "ingress": {"enabled": True, "cidrs": [f"{public_ip_address}/24"]}, - "internal": {"enabled": True}, - }, - } + # kube_config_path = path / "kubeconfig.json" + # with open(kube_config_path, "w") as f: + # json.dump(kube_config, f) + # kubectl_env = os.environ.copy() + # kubectl_env["KUBECONFIG"] = str(kube_config_path) + spawn.runv( + [ + "az", + "aks", + "get-credentials", + "--overwrite-existing", + "--resource-group", + resource_group_name, + "--name", + aks_cluster["name"], + ], + env=venv_env, + ) - spawn.runv( - [ - "helm", - "install", - "materialize-operator", - "misc/helm-charts/operator", - "--namespace", - "materialize", - "--create-namespace", - "-f", - "-", - ], - cwd=MZ_ROOT, - stdin=yaml.dump(materialize_values).encode(), - ) + spawn.runv(["kubectl", "get", "nodes"], env=venv_env) + + if args.setup: for i in range(60): try: spawn.runv( ["kubectl", "get", "pods", "-n", "materialize"], cwd=path, + env=venv_env, ) status = spawn.capture( [ @@ -267,6 +1245,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "jsonpath={.items[0].status.phase}", ], cwd=path, + env=venv_env, ) if status == "Running": break @@ -275,7 +1254,10 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: else: raise ValueError("Never completed") - spawn.runv(["kubectl", "create", "namespace", "materialize-environment"]) + spawn.runv( + ["kubectl", "create", "namespace", "materialize-environment"], + env=venv_env, + ) materialize_backend_secret = { "apiVersion": "v1", @@ -285,8 +1267,8 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "namespace": "materialize-environment", }, "stringData": { - "metadata_backend_url": metadata_backend_url, - "persist_backend_url": persist_backend_url, + "metadata_backend_url": connection_strings["metadata_backend_url"], + "persist_backend_url": connection_strings["persist_backend_url"], }, } @@ -294,6 +1276,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: ["kubectl", "apply", "-f", "-"], cwd=path, stdin=yaml.dump(materialize_backend_secret).encode(), + env=venv_env, ) materialize_environment = { @@ -321,6 +1304,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: ["kubectl", "apply", "-f", "-"], cwd=path, stdin=yaml.dump(materialize_environment).encode(), + env=venv_env, ) for i in range(60): try: @@ -333,6 +1317,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "materialize-environment", ], cwd=path, + env=venv_env, ) break except subprocess.CalledProcessError: @@ -344,6 +1329,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: spawn.runv( ["kubectl", "get", "pods", "-n", "materialize-environment"], cwd=path, + env=venv_env, ) status = spawn.capture( [ @@ -358,6 +1344,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "jsonpath={.items[0].status.phase}", ], cwd=path, + env=venv_env, ) if status == "Running": break @@ -382,6 +1369,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "jsonpath={.items[0].status.phase}", ], cwd=path, + env=venv_env, ) if status == "Running": break @@ -389,18 +1377,6 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: time.sleep(1) else: raise ValueError("Never completed") - else: - spawn.runv( - [ - "aws", - "eks", - "update-kubeconfig", - "--name", - "terraform-aws-test-cluster", - "--region", - "us-east-1", - ] - ) print("--- Running tests") environmentd_name = spawn.capture( @@ -416,6 +1392,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "jsonpath={.items[*].metadata.name}", ], cwd=path, + env=venv_env, ) balancerd_name = spawn.capture( @@ -431,8 +1408,10 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "jsonpath={.items[*].metadata.name}", ], cwd=path, + env=venv_env, ) # error: arguments in resource/name form must have a single resource and name + print(f"Got environmentd name: {environmentd_name}") print(f"Got balancerd name: {balancerd_name}") environmentd_port_forward_process = subprocess.Popen( @@ -446,6 +1425,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "6878:6878", ], preexec_fn=os.setpgrp, + env=venv_env, ) balancerd_port_forward_process = subprocess.Popen( [ @@ -458,6 +1438,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "6876:6876", ], preexec_fn=os.setpgrp, + env=venv_env, ) time.sleep(10) @@ -499,8 +1480,11 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: MZ_ROOT / "misc" / "helm-charts" / "operator" / "Chart.yaml" ) as f: content = yaml.load(f, Loader=yaml.Loader) - helm_chart_version = content["version"] - assert version.endswith(f", helm chart: {helm_chart_version})") + content["version"] + # TODO: Reenable when we can pass the helm-chart path in directly + # assert version.endswith( + # f", helm chart: {helm_chart_version})" + # ), f"Actual version: {version}, expected to contain {helm_chart_version}" c.run_testdrive_files(*args.files) finally: @@ -509,6 +1493,8 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: if balancerd_port_forward_process: os.killpg(os.getpgid(balancerd_port_forward_process.pid), signal.SIGTERM) + mz_self_managed_debug(env=venv_env) + if args.cleanup: print("--- Cleaning up") if materialize_environment: @@ -516,6 +1502,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: ["kubectl", "delete", "-f", "-"], cwd=path, stdin=yaml.dump(materialize_environment).encode(), + env=venv_env, ) run_ignore_error( [ @@ -523,14 +1510,23 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: "delete", "materialize.materialize.cloud/12345678-1234-1234-1234-123456789012", "-n" "materialize-environment", - ] + ], + env=venv_env, ) run_ignore_error( - ["kubectl", "delete", "namespace", "materialize-environment"] + ["kubectl", "delete", "namespace", "materialize-environment"], + env=venv_env, ) run_ignore_error( - ["helm", "uninstall", "materialize-operator"], - cwd=path, + ["kubectl", "delete", "namespace", "materialize"], env=venv_env ) - run_ignore_error(["kubectl", "delete", "namespace", "materialize"]) - spawn.runv(["terraform", "destroy", "-auto-approve"], cwd=path) + try: + spawn.runv( + ["terraform", "destroy", "-auto-approve"], cwd=path, env=venv_env + ) + except: + # TODO: Remove this when https://github.com/MaterializeInc/terraform-azurerm-materialize/pull/10 lands and we use it, currently required since subnet deletion does not work on first try + time.sleep(15) + spawn.runv( + ["terraform", "destroy", "-auto-approve"], cwd=path, env=venv_env + ) diff --git a/test/testdrive-old-kafka-src-syntax/mzcompose.py b/test/testdrive-old-kafka-src-syntax/mzcompose.py index c946b1a2de564..77fd29709bf9d 100644 --- a/test/testdrive-old-kafka-src-syntax/mzcompose.py +++ b/test/testdrive-old-kafka-src-syntax/mzcompose.py @@ -18,6 +18,7 @@ from materialize import ci_util from materialize.mzcompose import get_default_system_parameters from materialize.mzcompose.composition import Composition, WorkflowArgumentParser +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.fivetran_destination import FivetranDestination from materialize.mzcompose.services.kafka import Kafka from materialize.mzcompose.services.materialized import Materialized @@ -37,9 +38,10 @@ Postgres(), MySql(), Minio(setup_materialize=True, additional_directories=["copytos3"]), - Materialized(external_minio=True), + Azurite(), + Materialized(external_blob_store=True), FivetranDestination(volumes_extra=["tmp:/share/tmp"]), - Testdrive(external_minio=True), + Testdrive(external_blob_store=True), ] @@ -88,6 +90,10 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: help="Rewrite results, disables junit reports", ) + parser.add_argument( + "--azurite", action="store_true", help="Use Azurite as blob store instead of S3" + ) + parser.add_argument( "files", nargs="*", @@ -98,7 +104,6 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: dependencies = [ "fivetran-destination", - "minio", "materialized", "postgres", "mysql", @@ -133,7 +138,8 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: materialized = Materialized( default_size=args.default_size, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=args.azurite, additional_system_parameter_defaults=additional_system_parameter_defaults, ) @@ -144,7 +150,8 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: validate_catalog_store=True, default_timeout=args.default_timeout, volumes_extra=["mzdata:/mzdata"], - external_minio=True, + external_blob_store=True, + blob_store_is_azure=args.azurite, fivetran_destination=True, fivetran_destination_files_path="/share/tmp", entrypoint_extra=[ diff --git a/test/testdrive-old-kafka-src-syntax/source-linear-operators.td b/test/testdrive-old-kafka-src-syntax/source-linear-operators.td index 0b0a041f1b545..a616a5afe29ed 100644 --- a/test/testdrive-old-kafka-src-syntax/source-linear-operators.td +++ b/test/testdrive-old-kafka-src-syntax/source-linear-operators.td @@ -45,16 +45,6 @@ $ set-regex match=u\d+ replacement=UID > CREATE VIEW v as SELECT * from data where a = 1 and d = 3; -? EXPLAIN SELECT * FROM v; -Explained Query: - Filter (#0 = 1) AND (#3 = 3) - ReadStorage materialize.public.data - -Source materialize.public.data - filter=((#0 = 1) AND (#3 = 3)) - -Target cluster: quickstart - > CREATE DEFAULT INDEX ON v; > SELECT * FROM v 1 2 2 3 @@ -65,17 +55,6 @@ Target cluster: quickstart > CREATE VIEW v as SELECT b from data where b = 1; -? EXPLAIN SELECT * FROM v; -Explained Query: - Project (#1) - Filter (#1 = 1) - ReadStorage materialize.public.data - -Source materialize.public.data - filter=((#1 = 1)) - -Target cluster: quickstart - > CREATE DEFAULT INDEX ON v; > SELECT * FROM v 1 @@ -90,17 +69,6 @@ Target cluster: quickstart > CREATE VIEW v as SELECT b from inner_view where a = 1 -? EXPLAIN SELECT * FROM v; -Explained Query: - Project (#1) - Filter (#0 = 1) AND (#3 = 4) - ReadStorage materialize.public.data - -Source materialize.public.data - filter=((#0 = 1) AND (#3 = 4)) - -Target cluster: quickstart - > CREATE DEFAULT INDEX ON v; > SELECT * FROM v 1 @@ -111,17 +79,6 @@ Target cluster: quickstart > CREATE VIEW v as SELECT d from inner_view where a = 1; -? EXPLAIN SELECT * FROM v; -Explained Query: - Project (#3) - Filter (#0 = 1) AND (#3 = 4) - ReadStorage materialize.public.data - -Source materialize.public.data - filter=((#0 = 1) AND (#3 = 4)) - -Target cluster: quickstart - > CREATE DEFAULT INDEX ON v; > SELECT * FROM v 4 @@ -130,23 +87,6 @@ Target cluster: quickstart > CREATE VIEW v as SELECT s1.a from data s1, data s2 where s1.a = s2.b and s2.d = 4; -? EXPLAIN SELECT * FROM v; -Explained Query: - Project (#0) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Project (#0) - Filter (#0) IS NOT NULL - ReadStorage materialize.public.data - ArrangeBy keys=[[#0]] - Project (#1) - Filter (#3 = 4) AND (#1) IS NOT NULL - ReadStorage materialize.public.data - -Source materialize.public.data - -Target cluster: quickstart - > CREATE DEFAULT INDEX ON v; > SELECT * FROM v 1 @@ -160,24 +100,6 @@ Target cluster: quickstart > CREATE VIEW v as SELECT s2.a from data s1, data s2 where s1.a = s2.b and s2.d = 4 and s1.d = 4; -? EXPLAIN SELECT * FROM v; -Explained Query: - Project (#1) - Join on=(#0 = #2) type=differential - ArrangeBy keys=[[#0]] - Project (#0) - Filter (#3 = 4) AND (#0) IS NOT NULL - ReadStorage materialize.public.data - ArrangeBy keys=[[#1]] - Project (#0, #1) - Filter (#3 = 4) AND (#1) IS NOT NULL - ReadStorage materialize.public.data - -Source materialize.public.data - filter=((#3 = 4)) - -Target cluster: quickstart - > CREATE DEFAULT INDEX ON v; > SELECT * FROM v 1 @@ -187,27 +109,6 @@ Target cluster: quickstart > CREATE VIEW v as SELECT s2.c from data s1, data s2 where s1.a = s2.a -? EXPLAIN SELECT * FROM v; -Explained Query: - With - cte l0 = - Filter (#0) IS NOT NULL - ReadStorage materialize.public.data - Return - Project (#2) - Join on=(#0 = #1) type=differential - ArrangeBy keys=[[#0]] - Project (#0) - Get l0 - ArrangeBy keys=[[#0]] - Project (#0, #2) - Get l0 - -Source materialize.public.data - filter=((#0) IS NOT NULL) - -Target cluster: quickstart - > CREATE DEFAULT INDEX ON v; > SELECT * FROM v 3 @@ -221,25 +122,6 @@ Target cluster: quickstart > CREATE VIEW v as SELECT * FROM (SELECT a, sum(b) FROM data GROUP BY a UNION ALL SELECT a, (a + c)::numeric FROM data) WHERE a = 1; -? EXPLAIN SELECT * FROM v; -Explained Query: - Union - Project (#1, #0) - Map (1) - Reduce aggregates=[sum(#0)] monotonic - Project (#1) - Filter (#0 = 1) - ReadStorage materialize.public.data - Project (#0, #4) - Filter (#0 = 1) - Map (bigint_to_numeric((1 + #2))) - ReadStorage materialize.public.data - -Source materialize.public.data - filter=((#0 = 1)) - -Target cluster: quickstart - > CREATE DEFAULT INDEX ON v; > SELECT * FROM v 1 3 @@ -263,23 +145,6 @@ $ kafka-ingest format=avro topic=data2 schema=${schema} > CREATE VIEW v as SELECT a, c FROM data EXCEPT ALL SELECT a, c FROM data2 where d is null -? EXPLAIN SELECT * FROM v; -Explained Query: - Threshold - Union - Project (#0, #2) - ReadStorage materialize.public.data - Negate - Project (#0, #2) - Filter (#3) IS NULL - ReadStorage materialize.public.data2 - -Source materialize.public.data -Source materialize.public.data2 - filter=((#3) IS NULL) - -Target cluster: quickstart - > CREATE DEFAULT INDEX ON v; > SELECT * FROM v 1 2 diff --git a/test/testdrive/cc_cluster_sizes.td b/test/testdrive/cc_cluster_sizes.td index d59cc4a3172ac..5ee1fae45ce3c 100644 --- a/test/testdrive/cc_cluster_sizes.td +++ b/test/testdrive/cc_cluster_sizes.td @@ -8,7 +8,7 @@ # by the Apache License, Version 2.0. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr} -ALTER SYSTEM SET allowed_cluster_replica_sizes = "1"; +ALTER SYSTEM SET allowed_cluster_replica_sizes = '1-no-disk'; ALTER SYSTEM SET disk_cluster_replicas_default = false; # Cannot create clusters with cc cluster size naming schemes @@ -26,7 +26,7 @@ ALTER SYSTEM SET unsafe_enable_unorchestrated_cluster_replicas = true contains:unknown cluster replica size 1cc # The existing cluster names are fine -> CREATE CLUSTER c SIZE '1'; +> CREATE CLUSTER c SIZE '1-no-disk'; # But ensure we cannot ALTER our way to a cc name either ! ALTER CLUSTER c SET (SIZE '1cc'); @@ -50,7 +50,7 @@ true > DROP CLUSTER c # Create a cluster with a legacy size with disk enabled. -> CREATE CLUSTER c SIZE '1', DISK = true +> CREATE CLUSTER c SIZE '1-no-disk', DISK = true > SELECT disk FROM mz_clusters WHERE name = 'c' true @@ -70,7 +70,7 @@ true # Same test as before, except the legacy size cluster has disk explicitly # disabled. -> CREATE CLUSTER c SIZE '1', DISK = false +> CREATE CLUSTER c SIZE '1-no-disk', DISK = false > ALTER CLUSTER c SET (SIZE = '1cc') > SELECT disk FROM mz_clusters WHERE name = 'c' true @@ -78,7 +78,7 @@ true # Same test as before, except the legacy size cluster has no disk explicitly # configured. -> CREATE CLUSTER c SIZE = '1' +> CREATE CLUSTER c SIZE = '1-no-disk' > SELECT disk FROM mz_clusters WHERE name = 'c' false > ALTER CLUSTER c SET (SIZE = '1cc') @@ -92,16 +92,16 @@ contains: DISK option not supported for modern cluster sizes because disk is alw contains: DISK option not supported for modern cluster sizes because disk is always enabled # But it's okay if you're going back to a legacy size. -> ALTER CLUSTER c SET (DISK = true, SIZE = '1') +> ALTER CLUSTER c SET (DISK = true, SIZE = '1-no-disk') > SELECT disk FROM mz_clusters WHERE name = 'c' true > DROP CLUSTER c # Ensure that altering from a legacy size to a legacy size does not enable disk. -> CREATE CLUSTER c SIZE = '1' +> CREATE CLUSTER c SIZE = '1-no-disk' > SELECT disk FROM mz_clusters WHERE name = 'c' false -> ALTER CLUSTER c SET (SIZE = '2') +> ALTER CLUSTER c SET (SIZE = '2-no-disk') > SELECT disk FROM mz_clusters WHERE name = 'c' false > DROP CLUSTER c diff --git a/test/testdrive/disk-feature-flag.td b/test/testdrive/disk-feature-flag.td index fbdb299588fa5..65b26287cfbd2 100644 --- a/test/testdrive/disk-feature-flag.td +++ b/test/testdrive/disk-feature-flag.td @@ -11,10 +11,10 @@ $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.mater ALTER SYSTEM SET enable_disk_cluster_replicas = false ALTER SYSTEM SET disk_cluster_replicas_default = false -! CREATE CLUSTER no SIZE = '1', REPLICATION FACTOR 0, DISK; +! CREATE CLUSTER no SIZE = '1-no-disk', REPLICATION FACTOR 0, DISK; exact:`WITH (DISK)` for cluster replicas is not available -> CREATE CLUSTER no SIZE = '1', REPLICATION FACTOR 0; +> CREATE CLUSTER no SIZE = '1-no-disk', REPLICATION FACTOR 0; ! ALTER CLUSTER no SET (REPLICATION FACTOR 1, DISK); exact:`WITH (DISK)` for cluster replicas is not available @@ -28,20 +28,20 @@ ALTER SYSTEM SET enable_disk_cluster_replicas = true > DROP CLUSTER IF EXISTS c; # Can set unmanaged cluster replica options directly, mixing and matching disk -> CREATE CLUSTER c REPLICAS (r1 (SIZE '1', DISK), r2 (SIZE '1')) +> CREATE CLUSTER c REPLICAS (r1 (SIZE '1-no-disk', DISK), r2 (SIZE '1-no-disk')) > SELECT r.name, r.size, r.disk FROM mz_catalog.mz_clusters c, mz_catalog.mz_cluster_replicas r WHERE c.name = 'c' AND c.id = r.cluster_id; -r1 1 true -r2 1 false +r1 1-no-disk true +r2 1-no-disk false > DROP CLUSTER c; # Can set on managed clusters -> CREATE CLUSTER c SIZE '1', REPLICATION FACTOR = 2, DISK; +> CREATE CLUSTER c SIZE '1-no-disk', REPLICATION FACTOR = 2, DISK; > SELECT r.name, r.size, r.disk FROM mz_catalog.mz_clusters c, mz_catalog.mz_cluster_replicas r WHERE c.name = 'c' AND c.id = r.cluster_id; -r1 1 true -r2 1 true +r1 1-no-disk true +r2 1-no-disk true > DROP CLUSTER c; @@ -49,11 +49,11 @@ r2 1 true $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr} ALTER SYSTEM SET disk_cluster_replicas_default = true -> CREATE CLUSTER c REPLICAS (r1 (SIZE '1', DISK), r2 (SIZE '1')) +> CREATE CLUSTER c REPLICAS (r1 (SIZE '1-no-disk', DISK), r2 (SIZE '1-no-disk')) > SELECT r.name, r.size, r.disk FROM mz_catalog.mz_clusters c, mz_catalog.mz_cluster_replicas r WHERE c.name = 'c' AND c.id = r.cluster_id; -r1 1 true -r2 1 true +r1 1-no-disk true +r2 1-no-disk true > DROP CLUSTER c; @@ -64,11 +64,11 @@ ALTER SYSTEM RESET disk_cluster_replicas_default $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr} ALTER SYSTEM SET enable_disk_cluster_replicas = false -! CREATE CLUSTER c REPLICAS (dff_3 (size '1', disk)) +! CREATE CLUSTER c REPLICAS (dff_3 (size '1-no-disk', disk)) contains:`WITH (DISK)` for cluster replicas is not available # Cannot set DISK on unmanaged clusters (the option is per replica) -! CREATE CLUSTER c REPLICAS (dff_3 (size '1')), DISK; +! CREATE CLUSTER c REPLICAS (dff_3 (size '1-no-disk')), DISK; contains:DISK not supported for unmanaged clusters # The following test that we don't crash envd with bad parameters, and instead just fallback diff --git a/test/testdrive/mzcompose.py b/test/testdrive/mzcompose.py index 3f2159490351c..7cd31db9646f6 100644 --- a/test/testdrive/mzcompose.py +++ b/test/testdrive/mzcompose.py @@ -18,6 +18,7 @@ from materialize import ci_util from materialize.mzcompose import get_default_system_parameters from materialize.mzcompose.composition import Composition, WorkflowArgumentParser +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.fivetran_destination import FivetranDestination from materialize.mzcompose.services.kafka import Kafka from materialize.mzcompose.services.materialized import Materialized @@ -36,10 +37,11 @@ Redpanda(), Postgres(), MySql(), + Azurite(), Minio(setup_materialize=True, additional_directories=["copytos3"]), - Materialized(external_minio=True), + Materialized(external_blob_store=True), FivetranDestination(volumes_extra=["tmp:/share/tmp"]), - Testdrive(external_minio=True), + Testdrive(external_blob_store=True), ] @@ -88,6 +90,10 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: help="Rewrite results, disables junit reports", ) + parser.add_argument( + "--azurite", action="store_true", help="Use Azurite as blob store instead of S3" + ) + parser.add_argument( "files", nargs="*", @@ -98,10 +104,10 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: dependencies = [ "fivetran-destination", - "minio", "materialized", "postgres", "mysql", + "minio", ] if args.redpanda: dependencies += ["redpanda"] @@ -128,7 +134,8 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: materialized = Materialized( default_size=args.default_size, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=args.azurite, additional_system_parameter_defaults=additional_system_parameter_defaults, ) @@ -139,7 +146,8 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: validate_catalog_store=True, default_timeout=args.default_timeout, volumes_extra=["mzdata:/mzdata"], - external_minio=True, + external_blob_store=True, + blob_store_is_azure=args.azurite, fivetran_destination=True, fivetran_destination_files_path="/share/tmp", entrypoint_extra=[ diff --git a/test/testdrive/source-linear-operators.td b/test/testdrive/source-linear-operators.td index bd2d75e5651f0..4e74c23dbd2fb 100644 --- a/test/testdrive/source-linear-operators.td +++ b/test/testdrive/source-linear-operators.td @@ -193,8 +193,9 @@ Target cluster: quickstart Explained Query: With cte l0 = - Filter (#0) IS NOT NULL - ReadStorage materialize.public.data_tbl + Project (#0, #2) + Filter (#0) IS NOT NULL + ReadStorage materialize.public.data_tbl Return Project (#2) Join on=(#0 = #1) type=differential @@ -202,8 +203,7 @@ Explained Query: Project (#0) Get l0 ArrangeBy keys=[[#0]] - Project (#0, #2) - Get l0 + Get l0 Source materialize.public.data_tbl filter=((#0) IS NOT NULL) diff --git a/test/txn-wal-fencing/mzcompose.py b/test/txn-wal-fencing/mzcompose.py index 8661e1b133907..e080485f47d73 100644 --- a/test/txn-wal-fencing/mzcompose.py +++ b/test/txn-wal-fencing/mzcompose.py @@ -12,6 +12,7 @@ purpose of exercising fencing. """ +import argparse import random import time from concurrent import futures @@ -19,7 +20,8 @@ from enum import Enum from materialize import buildkite -from materialize.mzcompose.composition import Composition +from materialize.mzcompose.composition import Composition, WorkflowArgumentParser +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.materialized import Materialized from materialize.mzcompose.services.minio import Minio from materialize.mzcompose.services.postgres import CockroachOrPostgresMetadata @@ -87,6 +89,7 @@ class SuccessfulCommit: SERVICES = [ Minio(setup_materialize=True), + Azurite(), CockroachOrPostgresMetadata(), # Overriden below Materialized(name="mz_first"), @@ -94,14 +97,19 @@ class SuccessfulCommit: ] -def workflow_default(c: Composition) -> None: +def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: + parser.add_argument( + "--azurite", action="store_true", help="Use Azurite as blob store instead of S3" + ) + args = parser.parse_args() + workloads = buildkite.shard_list(WORKLOADS, lambda w: w.name) print( f"Workloads in shard with index {buildkite.get_parallelism_index()}: {[w.name for w in workloads]}" ) for workload in workloads: - run_workload(c, workload) + run_workload(c, workload, args) def execute_operation( @@ -161,12 +169,12 @@ def execute_operation( ) -def run_workload(c: Composition, workload: Workload) -> None: +def run_workload(c: Composition, workload: Workload, args: argparse.Namespace) -> None: print(f"+++ Running workload {workload.name} ...") c.silent = True c.down(destroy_volumes=True) - c.up("minio", c.metadata_store()) + c.up(c.metadata_store()) mzs = { "mz_first": workload.txn_wal_first, @@ -178,7 +186,8 @@ def run_workload(c: Composition, workload: Workload) -> None: Materialized( name=mz_name, external_metadata_store=True, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=args.azurite, sanity_restart=False, ) for mz_name in mzs diff --git a/test/zippy/mzcompose.py b/test/zippy/mzcompose.py index 280e6c2dba0ea..cb3475b77f26b 100644 --- a/test/zippy/mzcompose.py +++ b/test/zippy/mzcompose.py @@ -19,6 +19,7 @@ from enum import Enum from materialize.mzcompose.composition import Composition, WorkflowArgumentParser +from materialize.mzcompose.services.azure import Azurite from materialize.mzcompose.services.balancerd import Balancerd from materialize.mzcompose.services.clusterd import Clusterd from materialize.mzcompose.services.cockroach import Cockroach @@ -43,18 +44,36 @@ def create_mzs( - additional_system_parameter_defaults: dict[str, str] | None = None -) -> list[Materialized]: + azurite: bool, + transaction_isolation: bool, + additional_system_parameter_defaults: dict[str, str] | None = None, +) -> list[Testdrive | Materialized]: return [ Materialized( name=mz_name, - external_minio=True, + external_blob_store=True, + blob_store_is_azure=azurite, external_metadata_store=True, sanity_restart=False, metadata_store="cockroach", additional_system_parameter_defaults=additional_system_parameter_defaults, ) for mz_name in ["materialized", "materialized2"] + ] + [ + Testdrive( + materialize_url="postgres://materialize@balancerd:6875", + no_reset=True, + seed=1, + # Timeout increased since Large Zippy occasionally runs into them + default_timeout="1200s", + materialize_params={ + "statement_timeout": "'1800s'", + "transaction_isolation": f"'{transaction_isolation}'", + }, + metadata_store="cockroach", + external_blob_store=True, + blob_store_is_azure=azurite, + ), ] @@ -65,11 +84,11 @@ def create_mzs( Postgres(), Cockroach(), Minio(setup_materialize=True, additional_directories=["copytos3"]), + Azurite(), Mc(), Balancerd(), - *create_mzs(), + *create_mzs(azurite=False, transaction_isolation=False), Clusterd(name="storaged"), - Testdrive(metadata_store="cockroach"), Grafana(), Prometheus(), SshBastionHost(), @@ -157,10 +176,19 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: help="System parameters to set in Materialize, i.e. what you would set with `ALTER SYSTEM SET`", ) + parser.add_argument( + "--azurite", action="store_true", help="Use Azurite as blob store instead of S3" + ) + args = parser.parse_args() scenario_class = globals()[args.scenario] c.up("zookeeper", "redpanda", "ssh-bastion-host") + if args.azurite: + c.up("azurite") + else: + del c.compose["services"]["azurite"] + # Required for backups, even with azurite c.enable_minio_versioning() if args.observability: @@ -181,19 +209,11 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None: restart="on-failure:5", setup_materialize=True, ), - Testdrive( - materialize_url="postgres://materialize@balancerd:6875", - no_reset=True, - seed=1, - # Timeout increased since Large Zippy occasionally runs into them - default_timeout="1200s", - materialize_params={ - "statement_timeout": "'1800s'", - "transaction_isolation": f"'{args.transaction_isolation}'", - }, - metadata_store="cockroach", + *create_mzs( + args.azurite, + args.transaction_isolation, + additional_system_parameter_defaults, ), - *create_mzs(additional_system_parameter_defaults), ): c.up("materialized")