Skip to content
This repository was archived by the owner on Oct 10, 2025. It is now read-only.

Commit 1ef39d5

Browse files
committed
update
1 parent 8d0a0bc commit 1ef39d5

File tree

9 files changed

+83
-69
lines changed

9 files changed

+83
-69
lines changed

extension/fts/src/function/create_fts_index.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ static std::string formatStrInCypher(const std::string& input) {
145145
return result;
146146
}
147147

148-
static std::string createAdvancedPatternMatchTable(const CreateFTSBindData& bindData) {
148+
static std::string createTablesForExactTermMatch(const CreateFTSBindData& bindData) {
149149
std::string query;
150150
auto appearsInfoTableName =
151151
FTSUtils::getAppearsInfoTableName(bindData.tableID, bindData.indexName);
@@ -228,9 +228,9 @@ std::string createFTSIndexQuery(ClientContext& context, const TableFuncBindData&
228228
"RETURN t.term, CAST(count(distinct t.docID) AS UINT64));",
229229
termsTableName, appearsInfoTableName);
230230

231-
// If the advanced_pattern_match is enabled, we need to create two additional tables.
232-
if (ftsBindData->createFTSConfig.advancedPatternMatch) {
233-
query += createAdvancedPatternMatchTable(*ftsBindData);
231+
// If the exact_term_match is enabled, we need to create an additional tables.
232+
if (ftsBindData->createFTSConfig.exactTermMatch) {
233+
query += createTablesForExactTermMatch(*ftsBindData);
234234
}
235235

236236
auto appearsInTableName = FTSUtils::getAppearsInTableName(tableID, indexName);
@@ -258,8 +258,8 @@ std::string createFTSIndexQuery(ClientContext& context, const TableFuncBindData&
258258
params += stringFormat("stemmer := '{}', ", ftsBindData->createFTSConfig.stemmer);
259259
params += stringFormat("stopWords := '{}', ",
260260
ftsBindData->createFTSConfig.stopWordsTableInfo.stopWords);
261-
params += stringFormat("advanced_pattern_match := {}",
262-
ftsBindData->createFTSConfig.advancedPatternMatch ? "true" : "false");
261+
params += stringFormat("exact_term_match := {}",
262+
ftsBindData->createFTSConfig.exactTermMatch ? "true" : "false");
263263
query += stringFormat("CALL _CREATE_FTS_INDEX('{}', '{}', {}, {});", tableName, indexName,
264264
properties, params);
265265
query += stringFormat("RETURN 'Index {} has been created.' as result;", ftsBindData->indexName);

extension/fts/src/function/fts_config.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ CreateFTSConfig::CreateFTSConfig(main::ClientContext& context, common::table_id_
157157
value.validateType(common::LogicalTypeID::STRING);
158158
tokenizerInfo.jiebaDictDir =
159159
common::StringUtils::getLower(value.getValue<std::string>());
160-
} else if (AdvancedPatternMatch::NAME == lowerCaseName) {
161-
value.validateType(AdvancedPatternMatch::TYPE);
162-
advancedPatternMatch = value.getValue<bool>();
160+
} else if (ExactTermMatch::NAME == lowerCaseName) {
161+
value.validateType(ExactTermMatch::TYPE);
162+
exactTermMatch = value.getValue<bool>();
163163
} else {
164164
throw common::BinderException{"Unrecognized optional parameter: " + name};
165165
}
@@ -169,7 +169,7 @@ CreateFTSConfig::CreateFTSConfig(main::ClientContext& context, common::table_id_
169169
FTSConfig CreateFTSConfig::getFTSConfig() const {
170170
return FTSConfig{stemmer, stopWordsTableInfo.tableName, stopWordsTableInfo.stopWords,
171171
ignorePattern, ignorePatternQuery, tokenizerInfo.tokenizer, tokenizerInfo.jiebaDictDir,
172-
advancedPatternMatch};
172+
exactTermMatch};
173173
}
174174

175175
void FTSConfig::serialize(common::Serializer& serializer) const {
@@ -180,7 +180,7 @@ void FTSConfig::serialize(common::Serializer& serializer) const {
180180
serializer.serializeValue(ignorePatternQuery);
181181
serializer.serializeValue(tokenizer);
182182
serializer.serializeValue(jiebaDictDir);
183-
serializer.serializeValue(advancedPatternMatch);
183+
serializer.serializeValue(exactTermMatch);
184184
}
185185

186186
FTSConfig FTSConfig::deserialize(common::Deserializer& deserializer) {
@@ -192,7 +192,7 @@ FTSConfig FTSConfig::deserialize(common::Deserializer& deserializer) {
192192
deserializer.deserializeValue(config.ignorePatternQuery);
193193
deserializer.deserializeValue(config.tokenizer);
194194
deserializer.deserializeValue(config.jiebaDictDir);
195-
deserializer.deserializeValue(config.advancedPatternMatch);
195+
deserializer.deserializeValue(config.exactTermMatch);
196196
return config;
197197
}
198198

extension/fts/src/function/query_fts/query_fts_bind_data.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ QueryFTSBindData::QueryFTSBindData(binder::expression_vector columns,
5151
outputTableID{output[0]->constCast<binder::NodeExpression>().getTableIDs()[0]},
5252
numDocs{numDocs}, avgDocLen{avgDocLen},
5353
patternMatchAlgo{PatternMatchFactory::getPatternMatchAlgo(
54-
entry.getAuxInfo().cast<FTSIndexAuxInfo>().config.advancedPatternMatch)} {
54+
entry.getAuxInfo().cast<FTSIndexAuxInfo>().config.exactTermMatch ? TermMatchType::EXACT :
55+
TermMatchType::STEM)} {
5556
auto& nodeExpr = output[0]->constCast<binder::NodeExpression>();
5657
KU_ASSERT(nodeExpr.getNumEntries() == 1);
5758
outputTableID = nodeExpr.getEntry(0)->getTableID();
@@ -77,13 +78,12 @@ std::vector<std::string> QueryFTSBindData::getQueryTerms(main::ClientContext& co
7778
auto config = entry.getAuxInfo().cast<FTSIndexAuxInfo>().config;
7879
FTSUtils::normalizeQuery(queryInStr, config.ignorePatternQuery);
7980
auto terms = FTSUtils::tokenizeString(queryInStr, config);
80-
auto stopWordsTable =
81-
StorageManager::Get(context)
82-
->getTable(catalog::Catalog::Get(context)
83-
->getTableCatalogEntry(transaction::Transaction::Get(context),
84-
config.stopWordsTableName)
85-
->getTableID())
86-
->ptrCast<NodeTable>();
81+
auto stopWordsTable = StorageManager::Get(context)
82+
->getTable(catalog::Catalog::Get(context)
83+
->getTableCatalogEntry(transaction::Transaction::Get(context),
84+
config.stopWordsTableName)
85+
->getTableID())
86+
->ptrCast<NodeTable>();
8787
return FTSUtils::stemTerms(terms, entry.getAuxInfo().cast<FTSIndexAuxInfo>().config,
8888
MemoryManager::Get(context), stopWordsTable, transaction::Transaction::Get(context),
8989
optionalParams->constCast<QueryFTSOptionalParams>().conjunctive.getParamVal(),

extension/fts/src/function/query_fts/query_fts_index.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,6 @@ static std::unique_ptr<TableFuncBindData> bindFunc(main::ClientContext* context,
382382
auto inputTableName = getParamVal(*input, 0);
383383
auto indexName = getParamVal(*input, 1);
384384
auto query = input->getParam(2);
385-
386385
auto tableEntry = FTSIndexUtils::bindNodeTable(*context, inputTableName, indexName,
387386
FTSIndexUtils::IndexOperation::QUERY);
388387
auto catalog = catalog::Catalog::Get(*context);
@@ -398,7 +397,7 @@ static std::unique_ptr<TableFuncBindData> bindFunc(main::ClientContext* context,
398397
auto appearsInEntry = catalog->getTableCatalogEntry(transaction,
399398
FTSUtils::getAppearsInTableName(tableEntry->getTableID(), indexName));
400399
std::vector<catalog::TableCatalogEntry*> nodeEntries{termsEntry, docsEntry};
401-
if (ftsIndexEntry->getAuxInfo().cast<FTSIndexAuxInfo>().config.advancedPatternMatch) {
400+
if (ftsIndexEntry->getAuxInfo().cast<FTSIndexAuxInfo>().config.exactTermMatch) {
402401
nodeEntries.push_back(catalog->getTableCatalogEntry(transaction,
403402
FTSUtils::getOrigTermsTableName(tableEntry->getTableID(), indexName)));
404403
}

extension/fts/src/function/query_fts/query_fts_pattern_match.cpp

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ class MatchTermVertexCompute : public function::VertexCompute {
5252
std::unordered_map<common::offset_t, uint64_t>& resDfs;
5353
};
5454

55-
class BasicMatchVertexCompute final : public MatchTermVertexCompute {
55+
class StemTermMatchVertexCompute final : public MatchTermVertexCompute {
5656
public:
57-
explicit BasicMatchVertexCompute(std::unordered_map<common::offset_t, uint64_t>& resDfs,
57+
explicit StemTermMatchVertexCompute(std::unordered_map<common::offset_t, uint64_t>& resDfs,
5858
std::vector<VCQueryTerm>& queryTerms)
5959
: MatchTermVertexCompute{queryTerms, resDfs} {}
6060

@@ -65,13 +65,13 @@ class BasicMatchVertexCompute final : public MatchTermVertexCompute {
6565
}
6666

6767
std::unique_ptr<VertexCompute> copy() override {
68-
return std::make_unique<BasicMatchVertexCompute>(resDfs, queryTerms);
68+
return std::make_unique<StemTermMatchVertexCompute>(resDfs, queryTerms);
6969
}
7070
};
7171

72-
class AdvancedMatchVertexCompute final : public MatchTermVertexCompute {
72+
class ExactTermMatchVertexCompute final : public MatchTermVertexCompute {
7373
public:
74-
AdvancedMatchVertexCompute(std::unordered_map<common::offset_t, uint64_t>& resDfs,
74+
ExactTermMatchVertexCompute(std::unordered_map<common::offset_t, uint64_t>& resDfs,
7575
std::vector<VCQueryTerm>& queryTerms, const QueryFTSBindData& bindData,
7676
main::ClientContext& context)
7777
: MatchTermVertexCompute{queryTerms, resDfs},
@@ -82,7 +82,7 @@ class AdvancedMatchVertexCompute final : public MatchTermVertexCompute {
8282
bindData{bindData}, context{context},
8383
termsDFLookup{bindData.getTermsEntry(context), context} {}
8484

85-
~AdvancedMatchVertexCompute() override { sb_stemmer_delete(sbStemmer); }
85+
~ExactTermMatchVertexCompute() override { sb_stemmer_delete(sbStemmer); }
8686

8787
void handleMatchedTerm(uint64_t itr, const graph::VertexScanState::Chunk& chunk) override {
8888
auto term = chunk.getProperties<common::ku_string_t>(0)[itr];
@@ -94,7 +94,7 @@ class AdvancedMatchVertexCompute final : public MatchTermVertexCompute {
9494
}
9595

9696
std::unique_ptr<VertexCompute> copy() override {
97-
return std::make_unique<AdvancedMatchVertexCompute>(resDfs, queryTerms, bindData, context);
97+
return std::make_unique<ExactTermMatchVertexCompute>(resDfs, queryTerms, bindData, context);
9898
}
9999

100100
private:
@@ -104,30 +104,33 @@ class AdvancedMatchVertexCompute final : public MatchTermVertexCompute {
104104
TermsDFLookup termsDFLookup;
105105
};
106106

107-
static void basicMatchAlgo(std::unordered_map<common::offset_t, uint64_t>& dfs,
107+
static void stemTermMatch(std::unordered_map<common::offset_t, uint64_t>& dfs,
108108
std::vector<VCQueryTerm>& vcQueryTerms, ExecutionContext* executionContext, graph::Graph* graph,
109109
const QueryFTSBindData& bindData) {
110-
auto matchVc = BasicMatchVertexCompute{dfs, vcQueryTerms};
110+
auto matchVc = StemTermMatchVertexCompute{dfs, vcQueryTerms};
111111
GDSUtils::runVertexCompute(executionContext, GDSDensityState::DENSE, graph, matchVc,
112112
bindData.getTermsEntry(*executionContext->clientContext),
113113
std::vector<std::string>{"term", TermsDFLookup::DOC_FREQUENCY_PROP_NAME});
114114
}
115115

116-
static void advancedMatchAlgo(std::unordered_map<common::offset_t, uint64_t>& dfs,
116+
static void exactTermMatch(std::unordered_map<common::offset_t, uint64_t>& dfs,
117117
std::vector<VCQueryTerm>& vcQueryTerms, ExecutionContext* executionContext, graph::Graph* graph,
118118
const QueryFTSBindData& bindData) {
119119
auto matchOrigTermVc =
120-
AdvancedMatchVertexCompute{dfs, vcQueryTerms, bindData, *executionContext->clientContext};
120+
ExactTermMatchVertexCompute{dfs, vcQueryTerms, bindData, *executionContext->clientContext};
121121
GDSUtils::runVertexCompute(executionContext, GDSDensityState::DENSE, graph, matchOrigTermVc,
122122
bindData.getOrigTermsEntry(*executionContext->clientContext),
123123
std::vector<std::string>{"term"});
124124
}
125125

126-
pattern_match_algo PatternMatchFactory::getPatternMatchAlgo(bool isAdvanced) {
127-
if (isAdvanced) {
128-
return advancedMatchAlgo;
129-
} else {
130-
return basicMatchAlgo;
126+
pattern_match_algo PatternMatchFactory::getPatternMatchAlgo(TermMatchType termMatchType) {
127+
switch (termMatchType) {
128+
case TermMatchType::EXACT:
129+
return exactTermMatch;
130+
case TermMatchType::STEM:
131+
return stemTermMatch;
132+
default:
133+
KU_UNREACHABLE;
131134
}
132135
}
133136

extension/fts/src/include/function/fts_config.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ struct Stemmer {
1717
static void validate(const std::string& stemmer);
1818
};
1919

20-
struct AdvancedPatternMatch {
21-
static constexpr const char* NAME = "advanced_pattern_match";
20+
struct ExactTermMatch {
21+
static constexpr const char* NAME = "exact_term_match";
2222
static constexpr common::LogicalTypeID TYPE = common::LogicalTypeID::BOOL;
2323
static constexpr bool DEFAULT_VALUE = false;
2424
};
@@ -85,7 +85,7 @@ struct CreateFTSConfig {
8585
std::string ignorePattern = IgnorePattern::DEFAULT_VALUE;
8686
std::string ignorePatternQuery = IgnorePattern::DEFAULT_VALUE_QUERY;
8787
TokenizerInfo tokenizerInfo;
88-
bool advancedPatternMatch = AdvancedPatternMatch::DEFAULT_VALUE;
88+
bool exactTermMatch = ExactTermMatch::DEFAULT_VALUE;
8989

9090
CreateFTSConfig() = default;
9191
CreateFTSConfig(main::ClientContext& context, common::table_id_t tableID,
@@ -104,16 +104,16 @@ struct FTSConfig {
104104
std::string ignorePatternQuery = "";
105105
std::string tokenizer = "";
106106
std::string jiebaDictDir = "";
107-
bool advancedPatternMatch = false;
107+
bool exactTermMatch = false;
108108

109109
FTSConfig() = default;
110110
FTSConfig(std::string stemmer, std::string stopWordsTableName, std::string stopWordsSource,
111111
std::string ignorePattern, std::string ignorePatternQuery, std::string tokenizer,
112-
std::string jiebaDictDir, bool advancedPatternMatch)
112+
std::string jiebaDictDir, bool exactTermMatch)
113113
: stemmer{std::move(stemmer)}, stopWordsTableName{std::move(stopWordsTableName)},
114114
stopWordsSource{std::move(stopWordsSource)}, ignorePattern{std::move(ignorePattern)},
115115
ignorePatternQuery{std::move(ignorePatternQuery)}, tokenizer{std::move(tokenizer)},
116-
jiebaDictDir{std::move(jiebaDictDir)}, advancedPatternMatch{advancedPatternMatch} {}
116+
jiebaDictDir{std::move(jiebaDictDir)}, exactTermMatch{exactTermMatch} {}
117117

118118
void serialize(common::Serializer& serializer) const;
119119

extension/fts/src/include/function/query_fts/query_fts_pattern_match.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,14 @@ using pattern_match_algo = std::function<void(std::unordered_map<common::offset_
1919
std::vector<VCQueryTerm>& vcQueryTerms, processor::ExecutionContext* executionContext,
2020
graph::Graph* graph, const QueryFTSBindData& bindData)>;
2121

22+
enum class TermMatchType : uint8_t {
23+
STEM = 0,
24+
EXACT = 1,
25+
};
26+
2227
class PatternMatchFactory {
2328
public:
24-
static pattern_match_algo getPatternMatchAlgo(bool isAdvanced);
29+
static pattern_match_algo getPatternMatchAlgo(TermMatchType termMatchType);
2530
};
2631

2732
} // namespace fts_extension

extension/fts/test/test_files/advanced_pattern_match.test

Lines changed: 0 additions & 24 deletions
This file was deleted.

extension/fts/test/test_files/wildcard.test

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,34 @@ Abcdefg|This book is a test?ax*alphabetical?
5151
---- 2
5252
Echoes of the Past|A deep dive into the history of ancient civilizations.
5353
Computers|The hiory*?story*a?b?c of computing
54+
55+
-CASE exact_term_match
56+
-LOAD_DYNAMIC_EXTENSION fts
57+
-STATEMENT CREATE NODE TABLE news (content string, primary key(content));
58+
---- ok
59+
-STATEMENT create (n:news {content: "alice is a canadian runner"})
60+
---- ok
61+
-STATEMENT create (n:news {content: "carol is running in the playground"})
62+
---- ok
63+
-STATEMENT CALL CREATE_FTS_INDEX('news', 'news_index_0', ['content'], exact_term_match := FALSE);
64+
---- ok
65+
-STATEMENT CALL QUERY_FTS_INDEX('news', 'news_index_0', 'runn*') RETURN node.content, score order by score
66+
---- 1
67+
alice is a canadian runner|0.301030
68+
-STATEMENT CALL QUERY_FTS_INDEX('news', 'news_index_0', 'runn?ng') RETURN node.content, score order by score
69+
---- 0
70+
-STATEMENT CALL QUERY_FTS_INDEX('news', 'news_index_0', 'runne?') RETURN node.content, score order by score
71+
---- 1
72+
alice is a canadian runner|0.301030
73+
-STATEMENT CALL CREATE_FTS_INDEX('news', 'news_index_1', ['content'], exact_term_match := TRUE);
74+
---- ok
75+
-STATEMENT CALL QUERY_FTS_INDEX('news', 'news_index_1', 'runn*') RETURN node.content, score order by score
76+
---- 2
77+
alice is a canadian runner|0.301030
78+
carol is running in the playground|0.301030
79+
-STATEMENT CALL QUERY_FTS_INDEX('news', 'news_index_1', 'runn?ng') RETURN node.content, score order by score
80+
---- 1
81+
carol is running in the playground|0.301030
82+
-STATEMENT CALL QUERY_FTS_INDEX('news', 'news_index_1', 'runne?') RETURN node.content, score order by score
83+
---- 1
84+
alice is a canadian runner|0.301030

0 commit comments

Comments
 (0)