-
Notifications
You must be signed in to change notification settings - Fork 18
add setting to define filename pattern for part exports #1512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
8163fb0
a3d35bb
c384d5b
9bae367
270499f
71a4b9a
fde576b
ed45b16
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,11 +13,14 @@ | |
| #include <Processors/QueryPlan/QueryPlan.h> | ||
| #include <Processors/QueryPlan/ExpressionStep.h> | ||
| #include <QueryPipeline/QueryPipelineBuilder.h> | ||
| #include <Common/Macros.h> | ||
| #include <Common/Exception.h> | ||
| #include <Common/ProfileEventsScope.h> | ||
| #include <Databases/DatabaseReplicated.h> | ||
| #include <Storages/MergeTree/ExportList.h> | ||
| #include <Formats/FormatFactory.h> | ||
| #include <Databases/enableAllExperimentalSettings.h> | ||
| #include <boost/algorithm/string/replace.hpp> | ||
|
|
||
| namespace ProfileEvents | ||
| { | ||
|
|
@@ -43,6 +46,7 @@ namespace Setting | |
| extern const SettingsUInt64 min_bytes_to_use_direct_io; | ||
| extern const SettingsUInt64 export_merge_tree_part_max_bytes_per_file; | ||
| extern const SettingsUInt64 export_merge_tree_part_max_rows_per_file; | ||
| extern const SettingsString export_merge_tree_part_filename_pattern; | ||
| } | ||
|
|
||
| namespace | ||
|
|
@@ -80,6 +84,33 @@ namespace | |
| plan_for_part.addStep(std::move(expression_step)); | ||
| } | ||
| } | ||
|
|
||
| String buildDestinationFilename( | ||
| const MergeTreePartExportManifest & manifest, | ||
| const StorageID & storage_id, | ||
| const ContextPtr & local_context) | ||
| { | ||
| auto filename = manifest.settings[Setting::export_merge_tree_part_filename_pattern].value; | ||
|
|
||
| boost::replace_all(filename, "{part_name}", manifest.data_part->name); | ||
| boost::replace_all(filename, "{checksum}", manifest.data_part->checksums.getTotalChecksumHex()); | ||
|
|
||
| Macros::MacroExpansionInfo macro_info; | ||
| macro_info.table_id = storage_id; | ||
|
|
||
| if (auto database = DatabaseCatalog::instance().tryGetDatabase(storage_id.database_name)) | ||
| { | ||
| if (const auto replicated = dynamic_cast<const DatabaseReplicated *>(database.get())) | ||
| { | ||
| macro_info.shard = replicated->getShardName(); | ||
| macro_info.replica = replicated->getReplicaName(); | ||
| } | ||
| } | ||
|
|
||
| filename = local_context->getMacros()->expand(filename, macro_info); | ||
|
|
||
| return filename; | ||
| } | ||
| } | ||
|
|
||
| ExportPartTask::ExportPartTask(MergeTreeData & storage_, const MergeTreePartExportManifest & manifest_) | ||
|
|
@@ -136,8 +167,10 @@ bool ExportPartTask::executeStep() | |
|
|
||
| try | ||
| { | ||
| const auto filename = buildDestinationFilename(manifest, destination_storage_id, local_context); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The new filename expansion passes Useful? React with 👍 / 👎.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, port + ai completion suggestion glitch, fixed |
||
|
|
||
| sink = destination_storage->import( | ||
| manifest.data_part->name + "_" + manifest.data_part->checksums.getTotalChecksumHex(), | ||
| filename, | ||
| block_with_partition_values, | ||
| new_file_path_callback, | ||
| manifest.file_already_exists_policy == MergeTreePartExportManifest::FileAlreadyExistsPolicy::overwrite, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| <clickhouse> | ||
| <macros> | ||
| <shard>shard1</shard> | ||
| <replica>replica1</replica> | ||
| </macros> | ||
| </clickhouse> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| <clickhouse> | ||
| <macros> | ||
| <shard>shard2</shard> | ||
| <replica>replica1</replica> | ||
| </macros> | ||
| </clickhouse> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| ---- Test: Default pattern {part_name}_{checksum} | ||
| 1 2020 | ||
| 2 2020 | ||
| 3 2020 | ||
| ---- Verify filename matches 2020_1_1_0_*.1.parquet | ||
| 1 | ||
| ---- Test: Custom prefix pattern | ||
| 4 2021 | ||
| ---- Verify filename matches myprefix_2021_2_2_0.1.parquet | ||
| 1 | ||
| ---- Test: Pattern with macros | ||
| 1 2020 | ||
| 2 2020 | ||
| 3 2020 | ||
| ---- Verify macros expanded (no literal braces in parquet filenames, that's the best we can do for stateless tests) | ||
| 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fromJsonStringnow unconditionally readsfilename_pattern, but oldermetadata.jsonexport manifests do not contain this key. On mixed-version or restarted clusters with pre-existing export tasks,json->getValue<String>("filename_pattern")will throw during task loading and prevent those exports from being processed. This field needs an optional read with a default (matching{part_name}_{checksum}) like other backward-compatible manifest fields.Useful? React with 👍 / 👎.