Skip to content
This repository was archived by the owner on Oct 10, 2025. It is now read-only.

Commit 7e84af4

Browse files
committed
Hash join flatten fix
1 parent c14b6d4 commit 7e84af4

File tree

2 files changed

+19
-33
lines changed

2 files changed

+19
-33
lines changed

src/include/planner/operator/logical_hash_join.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,6 @@ class LogicalHashJoin final : public LogicalOperator {
5858

5959
static bool isNodeIDOnlyJoin(const std::vector<join_condition_t>& joinConditions);
6060

61-
private:
62-
bool isJoinKeyUniqueOnBuildSide(const binder::Expression& joinNodeID);
63-
6461
private:
6562
std::vector<join_condition_t> joinConditions;
6663
common::JoinType joinType;

src/planner/operator/logical_hash_join.cpp

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include "planner/operator/logical_hash_join.h"
22

3-
#include "common/cast.h"
43
#include "planner/operator/factorization/flatten_resolver.h"
54
#include "planner/operator/factorization/sink_util.h"
65
#include "planner/operator/scan/logical_scan_node_table.h"
@@ -165,6 +164,24 @@ binder::expression_vector LogicalHashJoin::getJoinNodeIDs(
165164
return result;
166165
}
167166

167+
class JoinNodeIDUniquenessAnalyzer {
168+
public:
169+
static bool isUnique(LogicalOperator* op, const binder::Expression& joinNodeID) {
170+
switch (op->getOperatorType()) {
171+
case LogicalOperatorType::FILTER:
172+
case LogicalOperatorType::FLATTEN:
173+
case LogicalOperatorType::LIMIT:
174+
case LogicalOperatorType::PROJECTION:
175+
case LogicalOperatorType::SEMI_MASKER:
176+
return isUnique(op->getChild(0).get(), joinNodeID);
177+
case LogicalOperatorType::SCAN_NODE_TABLE:
178+
return *op->constCast<LogicalScanNodeTable>().getNodeID() == joinNodeID;
179+
default:
180+
return false;
181+
}
182+
}
183+
};
184+
168185
bool LogicalHashJoin::requireFlatProbeKeys() {
169186
// Flatten for multiple join keys.
170187
if (joinConditions.size() > 1) {
@@ -179,35 +196,7 @@ bool LogicalHashJoin::requireFlatProbeKeys() {
179196
if (probeKey->dataType.getLogicalTypeID() != LogicalTypeID::INTERNAL_ID) {
180197
return true;
181198
}
182-
return !isJoinKeyUniqueOnBuildSide(*buildKey);
183-
}
184-
185-
bool LogicalHashJoin::isJoinKeyUniqueOnBuildSide(const binder::Expression& joinNodeID) {
186-
auto buildSchema = children[1]->getSchema();
187-
auto numGroupsInScope = buildSchema->getGroupsPosInScope().size();
188-
bool hasProjectedOutGroups = buildSchema->getNumGroups() > numGroupsInScope;
189-
if (numGroupsInScope > 1 || hasProjectedOutGroups) {
190-
return false;
191-
}
192-
// Now there is a single factorization group, we need to further make sure joinNodeID comes from
193-
// ScanNodeID operator. Because if joinNodeID comes from a ColExtend we cannot guarantee the
194-
// reverse mapping is still many-to-one. We look for the most simple pattern where build plan is
195-
// linear.
196-
auto op = children[1].get();
197-
while (op->getNumChildren() != 0) {
198-
if (op->getNumChildren() > 1) {
199-
return false;
200-
}
201-
op = op->getChild(0).get();
202-
}
203-
if (op->getOperatorType() != LogicalOperatorType::SCAN_NODE_TABLE) {
204-
return false;
205-
}
206-
auto scan = ku_dynamic_cast<LogicalScanNodeTable*>(op);
207-
if (scan->getNodeID()->getUniqueName() != joinNodeID.getUniqueName()) {
208-
return false;
209-
}
210-
return true;
199+
return !JoinNodeIDUniquenessAnalyzer::isUnique(children[1].get(), *buildKey);
211200
}
212201

213202
} // namespace planner

0 commit comments

Comments
 (0)