11#include " planner/operator/logical_hash_join.h"
22
3- #include " common/cast.h"
43#include " planner/operator/factorization/flatten_resolver.h"
54#include " planner/operator/factorization/sink_util.h"
65#include " planner/operator/scan/logical_scan_node_table.h"
@@ -165,6 +164,24 @@ binder::expression_vector LogicalHashJoin::getJoinNodeIDs(
165164 return result;
166165}
167166
167+ class JoinNodeIDUniquenessAnalyzer {
168+ public:
169+ static bool isUnique (LogicalOperator* op, const binder::Expression& joinNodeID) {
170+ switch (op->getOperatorType ()) {
171+ case LogicalOperatorType::FILTER:
172+ case LogicalOperatorType::FLATTEN:
173+ case LogicalOperatorType::LIMIT:
174+ case LogicalOperatorType::PROJECTION:
175+ case LogicalOperatorType::SEMI_MASKER:
176+ return isUnique (op->getChild (0 ).get (), joinNodeID);
177+ case LogicalOperatorType::SCAN_NODE_TABLE:
178+ return *op->constCast <LogicalScanNodeTable>().getNodeID () == joinNodeID;
179+ default :
180+ return false ;
181+ }
182+ }
183+ };
184+
168185bool LogicalHashJoin::requireFlatProbeKeys () {
169186 // Flatten for multiple join keys.
170187 if (joinConditions.size () > 1 ) {
@@ -179,35 +196,7 @@ bool LogicalHashJoin::requireFlatProbeKeys() {
179196 if (probeKey->dataType .getLogicalTypeID () != LogicalTypeID::INTERNAL_ID) {
180197 return true ;
181198 }
182- return !isJoinKeyUniqueOnBuildSide (*buildKey);
183- }
184-
185- bool LogicalHashJoin::isJoinKeyUniqueOnBuildSide (const binder::Expression& joinNodeID) {
186- auto buildSchema = children[1 ]->getSchema ();
187- auto numGroupsInScope = buildSchema->getGroupsPosInScope ().size ();
188- bool hasProjectedOutGroups = buildSchema->getNumGroups () > numGroupsInScope;
189- if (numGroupsInScope > 1 || hasProjectedOutGroups) {
190- return false ;
191- }
192- // Now there is a single factorization group, we need to further make sure joinNodeID comes from
193- // ScanNodeID operator. Because if joinNodeID comes from a ColExtend we cannot guarantee the
194- // reverse mapping is still many-to-one. We look for the most simple pattern where build plan is
195- // linear.
196- auto op = children[1 ].get ();
197- while (op->getNumChildren () != 0 ) {
198- if (op->getNumChildren () > 1 ) {
199- return false ;
200- }
201- op = op->getChild (0 ).get ();
202- }
203- if (op->getOperatorType () != LogicalOperatorType::SCAN_NODE_TABLE) {
204- return false ;
205- }
206- auto scan = ku_dynamic_cast<LogicalScanNodeTable*>(op);
207- if (scan->getNodeID ()->getUniqueName () != joinNodeID.getUniqueName ()) {
208- return false ;
209- }
210- return true ;
199+ return !JoinNodeIDUniquenessAnalyzer::isUnique (children[1 ].get (), *buildKey);
211200}
212201
213202} // namespace planner
0 commit comments