Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/daemon/api_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ void SearchRequestBuilder_c::SendQuery ( const char * sIndexes, ISphOutputBuffer
tOut.SendInt ( q.m_iCutoff );
tOut.SendInt ( q.m_iRetryCount<0 ? 0 : q.m_iRetryCount ); // runaround for old clients.
tOut.SendInt ( q.m_iRetryDelay<0 ? 0 : q.m_iRetryDelay );
tOut.SendString ( q.m_sGroupDistinct.cstr() );
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can not just replace the logic as new master could send request into the old agent and agent will fail with the wrong error message.
It could be better to increase VER_COMMAND_SEARCH and handle the old behavior for single distinct and allow agent to faile the incoming request from the newer master.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it also worth for all tests with the new result set to ask LLM to calculate these results set from the raw table data and compare that output with the result sets tests got to make sure the new result sets are valid. Otherwise somebody need to check these results sets by hand.

tOut.SendString ( q.m_dGroupDistinct.IsEmpty() ? "" : q.m_dGroupDistinct[0].cstr() );
tOut.SendInt ( q.m_bGeoAnchor );
if ( q.m_bGeoAnchor )
{
Expand Down Expand Up @@ -844,8 +844,10 @@ bool ParseSearchQuery ( InputBuffer_c & tReq, ISphOutputBuffer & tOut, CSphQuery
tQuery.m_iCutoff = tReq.GetInt();
tQuery.m_iRetryCount = tReq.GetInt ();
tQuery.m_iRetryDelay = tReq.GetInt ();
tQuery.m_sGroupDistinct = tReq.GetString ();
sphColumnToLowercase ( const_cast<char *>( tQuery.m_sGroupDistinct.cstr() ) );
CSphString sGroupDistinct = tReq.GetString ();
sphColumnToLowercase ( const_cast<char *>( sGroupDistinct.cstr() ) );
if ( !sGroupDistinct.IsEmpty() )
tQuery.m_dGroupDistinct.Add ( sGroupDistinct );

tQuery.m_bGeoAnchor = ( tReq.GetInt()!=0 );
if ( tQuery.m_bGeoAnchor )
Expand Down
4 changes: 2 additions & 2 deletions src/daemon/search_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ class GlobalSorters_c final
{
auto iValidIndexes = (int)dIndexes.count_of ( [&] ( const auto& pIndex ) { return pIndex; } );

m_bNeedGlobalSorters = iValidIndexes>1 && !dQueries.First().m_sGroupDistinct.IsEmpty();
m_bNeedGlobalSorters = iValidIndexes>1 && !dQueries.First().m_dGroupDistinct.IsEmpty();
if ( m_bNeedGlobalSorters )
{
// check if schemas are same
Expand Down Expand Up @@ -2192,7 +2192,7 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd )
{
for ( auto& dItem : dItems )
{
if ( dItem.m_sExpr=="count(*)" || ( dItem.m_sExpr=="@distinct" ) )
if ( dItem.m_sExpr=="count(*)" || dItem.m_sExpr.Begins("@distinct_") )
tRes.m_dZeroCount.Add ( dItem.m_sAlias );
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/frontendschema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct AggregateColumnSort_fn
return c.m_eAggrFunc!=SPH_AGGR_NONE
|| c.m_sName=="@groupby"
|| c.m_sName=="@count"
|| c.m_sName=="@distinct"
|| c.m_sName.Begins("@distinct_")
|| IsSortJsonInternal ( c.m_sName );
}

Expand Down
4 changes: 2 additions & 2 deletions src/joinsorter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1939,8 +1939,8 @@ void JoinSorter_c::AddGroupbyItemsToJoinSelectList()
}
}

if ( !tQuery.m_sGroupDistinct.IsEmpty() )
AddToJoinSelectList ( tQuery.m_sGroupDistinct, tQuery.m_sGroupDistinct );
for ( const auto & sDistinct : tQuery.m_dGroupDistinct )
AddToJoinSelectList ( sDistinct, sDistinct );
}
}

Expand Down
106 changes: 69 additions & 37 deletions src/queuecreator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ bool HasImplicitGrouping ( const CSphQuery & tQuery )
{
auto fnIsImplicit = [] ( const CSphQueryItem & t )
{
return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct";
return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr.Begins("@distinct_");
};

return tQuery.m_sGroupBy.IsEmpty() ? tQuery.m_dItems.any_of(fnIsImplicit) : false;
Expand Down Expand Up @@ -86,7 +86,7 @@ static bool IsCount ( const CSphString & s )
static bool IsGroupby ( const CSphString & s )
{
return s=="@groupby"
|| s=="@distinct"
|| s.Begins("@distinct_")
|| s=="groupby()"
|| IsSortJsonInternal(s);
}
Expand Down Expand Up @@ -340,7 +340,13 @@ class QueueCreator_c
void ReplaceJsonGroupbyWithStrings ( CSphString & sJsonGroupBy );
void UpdateAggregateDependencies ( CSphColumnInfo & tExprCol );
int GetGroupbyAttrIndex() const { return GetAliasedAttrIndex ( m_tQuery.m_sGroupBy, m_tQuery, *m_pSorterSchema ); }
int GetGroupDistinctAttrIndex() const { return GetAliasedAttrIndex ( m_tQuery.m_sGroupDistinct, m_tQuery, *m_pSorterSchema ); }
int GetGroupDistinctAttrIndex() const
{
// Return index of first distinct attribute for compatibility
if ( m_tQuery.m_dGroupDistinct.IsEmpty() )
return -1;
return GetAliasedAttrIndex ( m_tQuery.m_dGroupDistinct[0], m_tQuery, *m_pSorterSchema );
}

bool CanCalcFastCountDistinct() const;
bool CanCalcFastCountFilter() const;
Expand Down Expand Up @@ -463,40 +469,46 @@ void QueueCreator_c::CreateGrouperByAttr ( ESphAttr eType, const CSphColumnInfo

bool QueueCreator_c::SetupDistinctAttr()
{
const CSphString & sDistinct = m_tQuery.m_sGroupDistinct;
if ( sDistinct.IsEmpty() )
if ( m_tQuery.m_dGroupDistinct.IsEmpty() )
return true;

assert ( m_pSorterSchema );
auto & tSchema = *m_pSorterSchema;

int iDistinct = tSchema.GetAttrIndex ( sDistinct.cstr() );
if ( iDistinct<0 )
// Handle multiple distinct expressions
for ( const auto & sDistinct : m_tQuery.m_dGroupDistinct )
{
CSphString sJsonCol;
if ( !sphJsonNameSplit ( sDistinct.cstr(), m_tQuery.m_sJoinIdx.cstr(), &sJsonCol ) )
int iDistinct = tSchema.GetAttrIndex ( sDistinct.cstr() );
if ( iDistinct<0 )
{
return Err ( "group-count-distinct attribute '%s' not found", sDistinct.cstr() );
return false;
}
CSphString sJsonCol;
if ( !sphJsonNameSplit ( sDistinct.cstr(), m_tQuery.m_sJoinIdx.cstr(), &sJsonCol ) )
{
return Err ( "group-count-distinct attribute '%s' not found", sDistinct.cstr() );
}

CSphColumnInfo tExprCol ( sDistinct.cstr(), SPH_ATTR_JSON_FIELD_PTR );
tExprCol.m_eStage = SPH_EVAL_SORTER;
tExprCol.m_uAttrFlags = CSphColumnInfo::ATTR_JOINED;
m_pSorterSchema->AddAttr ( tExprCol, true );
iDistinct = m_pSorterSchema->GetAttrIndex ( tExprCol.m_sName.cstr() );
}
CSphColumnInfo tExprCol ( sDistinct.cstr(), SPH_ATTR_JSON_FIELD_PTR );
tExprCol.m_eStage = SPH_EVAL_SORTER;
tExprCol.m_uAttrFlags = CSphColumnInfo::ATTR_JOINED;
m_pSorterSchema->AddAttr ( tExprCol, true );
iDistinct = m_pSorterSchema->GetAttrIndex ( tExprCol.m_sName.cstr() );
}

const auto & tDistinctAttr = tSchema.GetAttr(iDistinct);
if ( IsNotRealAttribute(tDistinctAttr) )
return Err ( "group-count-distinct attribute '%s' not found", sDistinct.cstr() );
const auto & tDistinctAttr = tSchema.GetAttr(iDistinct);
if ( IsNotRealAttribute(tDistinctAttr) )
return Err ( "group-count-distinct attribute '%s' not found", sDistinct.cstr() );

if ( tDistinctAttr.IsColumnar() )
m_tGroupSorterSettings.m_pDistinctFetcher = CreateColumnarDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_eAttrType, m_tQuery.m_eCollation );
else
m_tGroupSorterSettings.m_pDistinctFetcher = CreateDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_tLocator, tDistinctAttr.m_eAttrType );
// Use first distinct expression for fetcher (sufficient for grouping/sorting)
if ( !m_tGroupSorterSettings.m_pDistinctFetcher )
{
if ( tDistinctAttr.IsColumnar() )
m_tGroupSorterSettings.m_pDistinctFetcher = CreateColumnarDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_eAttrType, m_tQuery.m_eCollation );
else
m_tGroupSorterSettings.m_pDistinctFetcher = CreateDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_tLocator, tDistinctAttr.m_eAttrType );

m_bJoinedGroupSort |= IsJoinAttr(tDistinctAttr.m_sName);
m_bJoinedGroupSort |= IsJoinAttr(tDistinctAttr.m_sName);
}
}

return true;
}
Expand Down Expand Up @@ -1456,9 +1468,16 @@ bool QueueCreator_c::MaybeAddGroupbyMagic ( bool bGotDistinct )

if ( bGotDistinct )
{
CSphColumnInfo tDistinct ( "@distinct", SPH_ATTR_INTEGER );
tDistinct.m_eStage = SPH_EVAL_SORTER;
AddColumn ( tDistinct );
// Handle multiple distinct expressions
for ( const auto & tItem : m_tQuery.m_dItems )
{
if ( tItem.m_sExpr.Begins("@distinct_") )
{
CSphColumnInfo tDistinct ( tItem.m_sExpr.cstr(), SPH_ATTR_INTEGER );
tDistinct.m_eStage = SPH_EVAL_SORTER;
AddColumn ( tDistinct );
}
}
}

// add @groupbystr last in case we need to skip it on sending (like @int_attr_*)
Expand All @@ -1480,15 +1499,20 @@ bool QueueCreator_c::MaybeAddGroupbyMagic ( bool bGotDistinct )
m_tGroupSorterSettings.m_tLocCount = m_pSorterSchema->GetAttr ( iCount ).m_tLocator;
LOC_CHECK ( m_tGroupSorterSettings.m_tLocCount.m_bDynamic, "@count must be dynamic" );

int iDistinct = m_pSorterSchema->GetAttrIndex ( "@distinct" );
int iDistinct = -1;
if ( bGotDistinct )
{
LOC_CHECK ( iDistinct>=0, "missing @distinct" );
// Look for @distinct_0 (first distinct column) for backward compatibility
iDistinct = m_pSorterSchema->GetAttrIndex ( "@distinct_0" );
}
if ( bGotDistinct )
{
LOC_CHECK ( iDistinct>=0, "missing @distinct_0" );
m_tGroupSorterSettings.m_tLocDistinct = m_pSorterSchema->GetAttr ( iDistinct ).m_tLocator;
LOC_CHECK ( m_tGroupSorterSettings.m_tLocDistinct.m_bDynamic, "@distinct must be dynamic" );
LOC_CHECK ( m_tGroupSorterSettings.m_tLocDistinct.m_bDynamic, "@distinct_0 must be dynamic" );
}
else
LOC_CHECK ( iDistinct<=0, "unexpected @distinct" );
LOC_CHECK ( iDistinct<=0, "unexpected @distinct_0" );

int iGroupbyStr = m_pSorterSchema->GetAttrIndex ( sJsonGroupBy.cstr() );
if ( iGroupbyStr>=0 )
Expand Down Expand Up @@ -2174,9 +2198,17 @@ bool QueueCreator_c::SetupGroupSortingFunc ( bool bGotDistinct )

if ( bGotDistinct )
{
m_dGroupColumns.Add ( { m_pSorterSchema->GetAttrIndex ( m_tQuery.m_sGroupDistinct.cstr() ), true } );
assert ( m_dGroupColumns.Last().first>=0 );
m_hExtra.Add ( m_pSorterSchema->GetAttr ( m_dGroupColumns.Last().first ).m_sName );
// Add all distinct expressions to group columns
for ( const auto & sDistinct : m_tQuery.m_dGroupDistinct )
{
int iAttrIndex = m_pSorterSchema->GetAttrIndex ( sDistinct.cstr() );
if ( iAttrIndex >= 0 )
{
m_dGroupColumns.Add ( { iAttrIndex, true } );
assert ( m_dGroupColumns.Last().first>=0 );
m_hExtra.Add ( m_pSorterSchema->GetAttr ( m_dGroupColumns.Last().first ).m_sName );
}
}
}

// implicit case
Expand Down Expand Up @@ -2230,7 +2262,7 @@ bool QueueCreator_c::AddGroupbyStuff ()
m_bHeadWOGroup = ( m_tQuery.m_sGroupBy.IsEmpty () && m_tQuery.m_bFacetHead );
auto fnIsImplicit = [] ( const CSphQueryItem & t )
{
return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct";
return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr.Begins("@distinct_");
};

bool bHasImplicitGrouping = HasImplicitGrouping(m_tQuery);
Expand Down
Loading
Loading