|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | + |
| 20 | +#pragma once |
| 21 | + |
| 22 | +#include <cstdint> |
| 23 | +#include <map> |
| 24 | +#include <memory> |
| 25 | +#include <optional> |
| 26 | +#include <string> |
| 27 | +#include <vector> |
| 28 | + |
| 29 | +#include "paimon/defs.h" |
| 30 | +#include "paimon/executor.h" |
| 31 | +#include "paimon/memory/memory_pool.h" |
| 32 | +#include "paimon/metrics.h" |
| 33 | +#include "paimon/result.h" |
| 34 | +#include "paimon/status.h" |
| 35 | +#include "paimon/type_fwd.h" |
| 36 | +#include "paimon/visibility.h" |
| 37 | + |
| 38 | +namespace paimon { |
| 39 | +class CommitContext; |
| 40 | +class CommitMessage; |
| 41 | + |
| 42 | +/// Interface for commit operations in a file store. |
| 43 | +/// |
| 44 | +/// The `FileStoreCommit` class provides interfaces for committing changes, expiring old snapshots, |
| 45 | +/// dropping partitions, and retrieving commit metrics. |
| 46 | +class PAIMON_EXPORT FileStoreCommit { |
| 47 | + public: |
| 48 | + /// Create an instance of `FileStoreCommit`. |
| 49 | + /// |
| 50 | + /// @param context A unique pointer to the `CommitContext` used for commit operations. |
| 51 | + /// |
| 52 | + /// @return A Result containing a unique pointer to the `FileStoreCommit` instance. |
| 53 | + static Result<std::unique_ptr<FileStoreCommit>> Create(std::unique_ptr<CommitContext> context); |
| 54 | + |
| 55 | + virtual ~FileStoreCommit() = default; |
| 56 | + |
| 57 | + /// Commit changes to the file store. |
| 58 | + /// |
| 59 | + /// @param commit_messages A vector of commit messages to be committed. |
| 60 | + /// @param commit_identifier An optional identifier for the commit operation. Default is |
| 61 | + /// `BATCH_WRITE_COMMIT_IDENTIFIER`. |
| 62 | + /// @param watermark An optional event-time watermark used to indicate the progress of data |
| 63 | + /// processing. Default is std::nullopt. |
| 64 | + /// @return Status indicating the success or failure of the commit operation. |
| 65 | + virtual Status Commit(const std::vector<std::shared_ptr<CommitMessage>>& commit_messages, |
| 66 | + int64_t commit_identifier = BATCH_WRITE_COMMIT_IDENTIFIER, |
| 67 | + std::optional<int64_t> watermark = std::nullopt) = 0; |
| 68 | + |
| 69 | + /// Filter out all `std::vector<CommitMessage>` which have been committed and commit the |
| 70 | + /// remaining ones. |
| 71 | + /// |
| 72 | + /// Compared to commit, this method will first check if a commit_identifier has been |
| 73 | + /// committed, so this method might be slower. A common usage of this method is to retry the |
| 74 | + /// commit process after a failure. |
| 75 | + /// |
| 76 | + /// @param commit_identifier_and_messages A map containing all {@link CommitMessage}s in |
| 77 | + /// question. The key is the commit_identifier. |
| 78 | + /// |
| 79 | + /// @param watermark An optional event-time watermark used to indicate the progress of data |
| 80 | + /// processing. Default is std::nullopt. |
| 81 | + /// @return Number of `std::vector<CommitMessage>` committed. |
| 82 | + virtual Result<int32_t> FilterAndCommit( |
| 83 | + const std::map<int64_t, std::vector<std::shared_ptr<CommitMessage>>>& |
| 84 | + commit_identifier_and_messages, |
| 85 | + std::optional<int64_t> watermark = std::nullopt) = 0; |
| 86 | + |
| 87 | + /// Overwrite from manifest committable and partition. |
| 88 | + /// |
| 89 | + /// @param partitions A single partition maps each partition key to a partition value. Depending |
| 90 | + /// on the user-defined statement, the partition might not include all partition keys. Also |
| 91 | + /// note that this partition does not necessarily equal to the partitions of the newly added |
| 92 | + /// key-values. This is just the partition to be cleaned up. |
| 93 | + /// @param commit_messages Description of the commit messages. |
| 94 | + /// @param commit_identifier Unique identifier. |
| 95 | + /// @param watermark An optional event-time watermark used to indicate the progress of data |
| 96 | + /// processing. Default is std::nullopt. |
| 97 | + /// @return Result of the operation. |
| 98 | + virtual Status Overwrite(const std::vector<std::map<std::string, std::string>>& partitions, |
| 99 | + const std::vector<std::shared_ptr<CommitMessage>>& commit_messages, |
| 100 | + int64_t commit_identifier, |
| 101 | + std::optional<int64_t> watermark = std::nullopt) = 0; |
| 102 | + |
| 103 | + /// This is a temporary interface for internal use. It will be removed in a future version. |
| 104 | + /// Please do not rely on it for long-term use. |
| 105 | + /// |
| 106 | + /// @param partitions Description of the partitions. |
| 107 | + /// @param commit_messages Description of the commit messages. |
| 108 | + /// @param commit_identifier Unique identifier. |
| 109 | + /// @param watermark An optional event-time watermark used to indicate the progress of data |
| 110 | + /// processing. Default is std::nullopt. |
| 111 | + /// @return Result of the operation. |
| 112 | + virtual Result<int32_t> FilterAndOverwrite( |
| 113 | + const std::vector<std::map<std::string, std::string>>& partitions, |
| 114 | + const std::vector<std::shared_ptr<CommitMessage>>& commit_messages, |
| 115 | + int64_t commit_identifier, std::optional<int64_t> watermark = std::nullopt) = 0; |
| 116 | + |
| 117 | + /// If user want to use REST catalog commit, please set |
| 118 | + /// `CommitContextBuilder::UseRESTCatalogCommit()`, then call `Commit()` (or |
| 119 | + /// `FilterAndCommit()`) normally, then call this method to get the last commit table request, |
| 120 | + /// which is a JSON string that can be used to send to REST catalog server. |
| 121 | + /// |
| 122 | + /// @note Temporary interface for internal use, will be removed in the future. |
| 123 | + /// |
| 124 | + /// @return A Result containing a JSON string which including `snapshot` and `statistics`, but |
| 125 | + /// excluding `tableId`. |
| 126 | + virtual Result<std::string> GetLastCommitTableRequest() = 0; |
| 127 | + |
| 128 | + /// Expire old snapshot in the file store. |
| 129 | + /// |
| 130 | + /// @return Result<int32_t> indicating the number of expired items or an error status. |
| 131 | + virtual Result<int32_t> Expire() = 0; |
| 132 | + |
| 133 | + /// Drop specified partitions from the file store. |
| 134 | + /// |
| 135 | + /// @param partitions A vector of partitions to be dropped. |
| 136 | + /// @param commit_identifier An identifier for the commit operation. |
| 137 | + /// @return Status indicating the success or failure of the drop partition operation. |
| 138 | + virtual Status DropPartition(const std::vector<std::map<std::string, std::string>>& partitions, |
| 139 | + int64_t commit_identifier) = 0; |
| 140 | + |
| 141 | + /// Retrieve metrics related to commit operations. |
| 142 | + /// |
| 143 | + /// @return A shared pointer to a `Metrics` object containing commit metrics. |
| 144 | + virtual std::shared_ptr<Metrics> GetCommitMetrics() const = 0; |
| 145 | +}; |
| 146 | + |
| 147 | +} // namespace paimon |
0 commit comments