diff --git a/api/ruby/lib/sphinx/client.rb b/api/ruby/lib/sphinx/client.rb index 7dd6e2095f..ea9624f9a5 100644 --- a/api/ruby/lib/sphinx/client.rb +++ b/api/ruby/lib/sphinx/client.rb @@ -1,5 +1,5 @@ # = client.rb - Sphinx Client API -# +# # Author:: Dmytro Shteflyuk . # Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk # License:: Distributes under the same terms as Ruby @@ -10,17 +10,17 @@ # You can freely distribute/modify this library. # ==Sphinx Client API -# +# # The Sphinx Client API is used to communicate with searchd # daemon and get search results from Sphinx. -# +# # ===Usage -# +# # sphinx = Sphinx::Client.new # result = sphinx.Query('test') # ids = result['matches'].map { |match| match['id'] }.join(',') # posts = Post.find :all, :conditions => "id IN (#{ids})" -# +# # docs = posts.map(&:body) # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test') @@ -46,22 +46,22 @@ class SphinxUnknownError < SphinxError; end # :startdoc: class Client - + # :stopdoc: - + # Known searchd commands - + # search command SEARCHD_COMMAND_SEARCH = 0 # excerpt command SEARCHD_COMMAND_EXCERPT = 1 # update command - SEARCHD_COMMAND_UPDATE = 2 + SEARCHD_COMMAND_UPDATE = 2 # keywords command - SEARCHD_COMMAND_KEYWORDS = 3 - + SEARCHD_COMMAND_KEYWORDS = 3 + # Current client-side command implementation versions - + # search command version VER_COMMAND_SEARCH = 0x119 # excerpt command version @@ -70,39 +70,39 @@ class Client VER_COMMAND_UPDATE = 0x103 # keywords command version VER_COMMAND_KEYWORDS = 0x100 - + # Known searchd status codes - + # general success, command-specific reply follows SEARCHD_OK = 0 # general failure, command-specific reply may follow SEARCHD_ERROR = 1 # temporaty failure, client should retry later SEARCHD_RETRY = 2 - # general success, warning message and command-specific reply follow - SEARCHD_WARNING = 3 - + # general success, warning message and command-specific reply follow + SEARCHD_WARNING = 3 + # :startdoc: - + # Known match modes - + # match all query words - SPH_MATCH_ALL = 0 + SPH_MATCH_ALL = 0 # match any query word - SPH_MATCH_ANY = 1 + SPH_MATCH_ANY = 1 # match this exact phrase - SPH_MATCH_PHRASE = 2 + SPH_MATCH_PHRASE = 2 # match this boolean query - SPH_MATCH_BOOLEAN = 3 + SPH_MATCH_BOOLEAN = 3 # match this extended query - SPH_MATCH_EXTENDED = 4 + SPH_MATCH_EXTENDED = 4 # match all document IDs w/o fulltext query, apply filters SPH_MATCH_FULLSCAN = 5 # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE) SPH_MATCH_EXTENDED2 = 6 - + # Known ranking modes (ext2 only) - + # default mode, phrase proximity major factor and BM25 minor one SPH_RANK_PROXIMITY_BM25 = 0 # statistical mode, BM25 ranking only (faster but worse quality) @@ -117,9 +117,9 @@ class Client SPH_RANK_FIELDMASK = 6 SPH_RANK_SPH04 = 7 SPH_RANK_EXPR = 8 - + # Known sort modes - + # sort by document relevance desc, then by date SPH_SORT_RELEVANCE = 0 # sort by document date desc, then by relevance desc @@ -132,23 +132,23 @@ class Client SPH_SORT_EXTENDED = 4 # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)") SPH_SORT_EXPR = 5 - + # Known filter types - + # filter by integer values set SPH_FILTER_VALUES = 0 # filter by integer range SPH_FILTER_RANGE = 1 # filter by float range SPH_FILTER_FLOATRANGE = 2 - + # Known attribute types - + # this attr is just an integer SPH_ATTR_INTEGER = 1 # this attr is a timestamp SPH_ATTR_TIMESTAMP = 2 - # this attr is an ordinal string number (integer at search time, + # this attr is an ordinal string number (integer at search time, # specially handled at indexing time) SPH_ATTR_ORDINAL = 3 # this attr is a boolean bit field @@ -162,28 +162,28 @@ class Client # this attr has multiple values (0 or more) SPH_ATTR_MULTI = 0x40000001 SPH_ATTR_MULTI64 = 0x40000002 - + # Known grouping functions - + # group by day SPH_GROUPBY_DAY = 0 # group by week - SPH_GROUPBY_WEEK = 1 + SPH_GROUPBY_WEEK = 1 # group by month - SPH_GROUPBY_MONTH = 2 + SPH_GROUPBY_MONTH = 2 # group by year SPH_GROUPBY_YEAR = 3 # group by attribute value SPH_GROUPBY_ATTR = 4 # group by sequential attrs pair SPH_GROUPBY_ATTRPAIR = 5 - - # Constructs the Sphinx::Client object and sets options to their default values. + + # Constructs the Sphinx::Client object and sets options to their default values. def initialize # per-client-object settings @host = 'localhost' # searchd host (default is "localhost") @port = 9312 # searchd port (default is 9312) - + # per-query settings @offset = 0 # how many records to seek from result-set start (default is 0) @limit = 20 # how many records to return from result-set starting at offset (default is 20) @@ -206,29 +206,29 @@ def initialize @indexweights = [] # per-index weights @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25) @rankexpr = '' # ranker expression for SPH_RANK_EXPR - @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) + @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) @fieldweights = {} # per-field-name weights @overrides = [] # per-query attribute values overrides @select = '*' # select-list (attributes or expressions, with optional aliases) - + # per-reply fields (for single-query case) @error = '' # last error message @warning = '' # last warning message - + @reqs = [] # requests storage (for multi-query case) @mbenc = '' # stored mbstring encoding end - + # Get last error message. def GetLastError @error end - + # Get last warning message. def GetLastWarning @warning end - + # Set searchd host name (string) and port (integer). def SetServer(host, port) assert { host.instance_of? String } @@ -237,7 +237,7 @@ def SetServer(host, port) @host = host @port = port end - + # Set offset and count into result set, # and optionally set max-matches and cutoff limits. def SetLimits(offset, limit, max = 0, cutoff = 0) @@ -253,7 +253,7 @@ def SetLimits(offset, limit, max = 0, cutoff = 0) @maxmatches = max if max > 0 @cutoff = cutoff if cutoff > 0 end - + # Set maximum query time, in milliseconds, per-index, # integer, 0 means "do not limit" def SetMaxQueryTime(max) @@ -261,7 +261,7 @@ def SetMaxQueryTime(max) assert { max >= 0 } @maxquerytime = max end - + # Set matching mode. DEPRECATED def SetMatchMode(mode) # $stderr.puts "DEPRECATED: Do not call this method or, even better, use SphinxQL instead of an API\n" @@ -275,7 +275,7 @@ def SetMatchMode(mode) @mode = mode end - + # Set ranking mode. def SetRankingMode(ranker, rankexpr = '') assert { ranker == SPH_RANK_PROXIMITY_BM25 \ @@ -291,7 +291,7 @@ def SetRankingMode(ranker, rankexpr = '') @ranker = ranker @rankexpr = rankexpr end - + # Set matches sorting mode. def SetSortMode(mode, sortby = '') assert { mode == SPH_SORT_RELEVANCE \ @@ -306,7 +306,7 @@ def SetSortMode(mode, sortby = '') @sort = mode @sortby = sortby end - + # Bind per-field weights by order. # # DEPRECATED; use SetFieldWeights() instead. @@ -334,7 +334,7 @@ def SetFieldWeights(weights) @fieldweights = weights end - + # Bind per-index weights by name. def SetIndexWeights(weights) assert { weights.instance_of? Hash } @@ -342,13 +342,13 @@ def SetIndexWeights(weights) assert { index.instance_of? String } assert { weight.instance_of? Fixnum } end - + @indexweights = weights end - + # Set IDs range to match. - # - # Only match records if document ID is beetwen min_id and max_id (inclusive). + # + # Only match records if document ID is beetwen min_id and max_id (inclusive). def SetIDRange(min, max) assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) } assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) } @@ -357,9 +357,9 @@ def SetIDRange(min, max) @min_id = min @max_id = max end - + # Set values filter. - # + # # Only match those records where attribute column values # are in specified set. def SetFilter(attribute, values, exclude = false) @@ -371,13 +371,13 @@ def SetFilter(attribute, values, exclude = false) values.each do |value| assert { value.instance_of? Fixnum } end - + @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values } end end - + # Set range filter. - # + # # Only match those records where attribute column value # is beetwen min and max (including min and max). def SetFilterRange(attribute, min, max, exclude = false) @@ -385,10 +385,10 @@ def SetFilterRange(attribute, min, max, exclude = false) assert { min.instance_of? Fixnum or min.instance_of? Bignum } assert { max.instance_of? Fixnum or max.instance_of? Bignum } assert { min <= max } - + @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max } end - + # Set float range filter. # # Only match those records where attribute column value @@ -398,14 +398,14 @@ def SetFilterFloatRange(attribute, min, max, exclude = false) assert { min.instance_of? Float } assert { max.instance_of? Float } assert { min <= max } - + @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max } end - + # Setup anchor point for geosphere distance calculations. # # Required to use @geodist in filters and sorting - # distance will be computed to this point. Latitude and longitude + # distance will be computed to this point. Latitude and longitude # must be in radians. # # * attrlat -- is the name of latitude attribute @@ -420,7 +420,7 @@ def SetGeoAnchor(attrlat, attrlong, lat, long) @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long } end - + # Set grouping attribute and function. # # In grouping mode, all matches are assigned to different groups @@ -450,7 +450,7 @@ def SetGeoAnchor(attrlat, attrlong, lat, long) # WARNING: grouping is done in fixed memory and thus its results # are only approximate; so there might be more groups reported # in total_found than actually present. @count might also - # be underestimated. + # be underestimated. # # For example, if sorting by relevance and grouping by "published" # attribute with SPH_GROUPBY_DAY function, then the result set will @@ -471,22 +471,22 @@ def SetGroupBy(attribute, func, groupsort = '@group desc') @groupfunc = func @groupsort = groupsort end - + # Set count-distinct attribute for group-by queries. def SetGroupDistinct(attribute) assert { attribute.instance_of? String } @groupdistinct = attribute end - + # Set distributed retries count and delay. def SetRetries(count, delay = 0) assert { count.instance_of? Fixnum } assert { delay.instance_of? Fixnum } - + @retrycount = count @retrydelay = delay end - + # DEPRECATED: Set attribute values override # # There can be only one override per attribute. @@ -505,13 +505,13 @@ def SetSelect(select) assert { select.instance_of? String } @select = select end - + # Clear all filters (for multi-queries). def ResetFilters @filters = [] @anchor = [] end - + # Clear groupby settings (for multi-queries). def ResetGroupBy @groupby = '' @@ -519,12 +519,12 @@ def ResetGroupBy @groupsort = '@group desc' @groupdistinct = '' end - + # Clear all attribute value overrides (for multi-queries). def ResetOverrides @overrides = [] end - + # Connect to searchd server and run given search query. # # query is query string @@ -545,7 +545,7 @@ def ResetOverrides # # Returns false on failure. # Returns hash which has the following keys on success: - # + # # * 'matches' -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id. # * 'total' -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h) # * 'total_found' -- total amount of matching documents in index @@ -554,20 +554,20 @@ def ResetOverrides def Query(query, index = '*', comment = '') assert { @reqs.empty? } @reqs = [] - + self.AddQuery(query, index, comment) results = self.RunQueries - + # probably network error; error message should be already filled return false unless results.instance_of?(Array) - + @error = results[0]['error'] @warning = results[0]['warning'] - + return false if results[0]['status'] == SEARCHD_ERROR return results[0] end - + # Add query to batch. # # Batch queries enable searchd to perform internal optimizations, @@ -582,7 +582,7 @@ def Query(query, index = '*', comment = '') # Returns index to results array returned by RunQueries call. def AddQuery(query, index = '*', comment = '') # build request - + # mode and limits request = Request.new request.put_int @offset, @limit, @mode, @ranker @@ -603,8 +603,8 @@ def AddQuery(query, index = '*', comment = '') # id64 range marker request.put_int 1 # id64 range - request.put_int64 @min_id.to_i, @max_id.to_i - + request.put_int64 @min_id.to_i, @max_id.to_i + # filters request.put_int @filters.length @filters.each do |filter| @@ -623,7 +623,7 @@ def AddQuery(query, index = '*', comment = '') end request.put_int filter['exclude'] ? 1 : 0 end - + # group-by clause, max-matches count, group-sort clause, cutoff count request.put_int @groupfunc request.put_string @groupby @@ -631,7 +631,7 @@ def AddQuery(query, index = '*', comment = '') request.put_string @groupsort request.put_int @cutoff, @retrycount, @retrydelay request.put_string @groupdistinct - + # anchor point if @anchor.empty? request.put_int 0 @@ -640,27 +640,27 @@ def AddQuery(query, index = '*', comment = '') request.put_string @anchor['attrlat'], @anchor['attrlong'] request.put_float @anchor['lat'], @anchor['long'] end - + # per-index weights request.put_int @indexweights.length @indexweights.each do |idx, weight| request.put_string idx request.put_int weight end - + # max query time request.put_int @maxquerytime - + # per-field weights request.put_int @fieldweights.length @fieldweights.each do |field, weight| request.put_string field request.put_int weight end - + # comment request.put_string comment - + # attribute overrides request.put_int @overrides.length for entry in @overrides do @@ -669,7 +669,7 @@ def AddQuery(query, index = '*', comment = '') entry['values'].each do |id, val| assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) } assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) } - + request.put_int64 id case entry['type'] when SPH_ATTR_FLOAT @@ -681,15 +681,15 @@ def AddQuery(query, index = '*', comment = '') end end end - + # select-list request.put_string @select - + # store request to requests array @reqs << request.to_s; return @reqs.length - 1 end - + # Run queries batch. # # Returns an array of result sets on success. @@ -710,7 +710,7 @@ def RunQueries nreqs = @reqs.length @reqs = [] response = PerformRequest(:search, req, nreqs) - + # parse response begin results = [] @@ -718,10 +718,10 @@ def RunQueries while ires < nreqs ires += 1 result = {} - + result['error'] = '' result['warning'] = '' - + # extract status status = result['status'] = response.get_int if status != SEARCHD_OK @@ -734,19 +734,19 @@ def RunQueries next end end - + # read schema fields = [] attrs = {} attrs_names_in_order = [] - + nfields = response.get_int while nfields > 0 nfields -= 1 fields << response.get_string end result['fields'] = fields - + nattrs = response.get_int while nattrs > 0 nattrs -= 1 @@ -756,29 +756,29 @@ def RunQueries attrs_names_in_order << attr end result['attrs'] = attrs - + # read match count count = response.get_int id64 = response.get_int - + # read matches result['matches'] = [] while count > 0 count -= 1 - + if id64 != 0 doc = response.get_int64 weight = response.get_int else doc, weight = response.get_ints(2) end - + r = {} # This is a single result put in the result['matches'] array r['id'] = doc r['weight'] = weight attrs_names_in_order.each do |a| r['attrs'] ||= {} - + case attrs[a] when SPH_ATTR_BIGINT # handle 64-bit ints @@ -812,7 +812,7 @@ def RunQueries end result['total'], result['total_found'], msecs, words = response.get_ints(4) result['time'] = '%.3f' % (msecs / 1000.0) - + result['words'] = {} while words > 0 words -= 1 @@ -820,17 +820,17 @@ def RunQueries docs, hits = response.get_ints(2) result['words'][word] = { 'docs' => docs, 'hits' => hits } end - + results << result end #rescue EOFError # @error = 'incomplete reply' # raise SphinxResponseError, @error end - + return results end - + # Connect to searchd server and generate exceprts from given documents. # # * docs -- an array of strings which represent the documents' contents @@ -838,7 +838,7 @@ def RunQueries # for stemming, lexing and case folding # * words -- a string which contains the words to highlight # * opts is a hash which contains additional optional highlighting parameters. - # + # # You can use following parameters: # * 'before_match' -- a string to insert before a set of matching words, default is "" # * 'after_match' -- a string to insert after a set of matching words, default is "" @@ -874,9 +874,9 @@ def BuildExcerpts(docs, index, words, opts = {}) opts['weight_order'] ||= false opts['load_files'] ||= false opts['allow_empty'] ||= false - + # build request - + # v.1.0 req flags = 1 flags |= 2 if opts['exact_phrase'] @@ -887,26 +887,26 @@ def BuildExcerpts(docs, index, words, opts = {}) flags |= 64 if opts['force_all_words'] flags |= 128 if opts['load_files'] flags |= 256 if opts['allow_empty'] - + request = Request.new request.put_int 0, flags # mode=0, flags=1 (remove spaces) # req index request.put_string index # req words request.put_string words - + # options request.put_string opts['before_match'] request.put_string opts['after_match'] request.put_string opts['chunk_separator'] request.put_int opts['limit'].to_i, opts['around'].to_i - + # options v1.2 request.put_int opts['limit_passages'].to_i request.put_int opts['limit_words'].to_i request.put_int opts['start_passage_id'].to_i request.put_string opts['html_strip_mode'] - + # documents request.put_int docs.size docs.each do |doc| @@ -914,9 +914,9 @@ def BuildExcerpts(docs, index, words, opts = {}) request.put_string doc end - + response = PerformRequest(:excerpt, request) - + # parse response begin res = [] @@ -929,7 +929,7 @@ def BuildExcerpts(docs, index, words, opts = {}) end return res end - + # Connect to searchd server, and generate keyword list for a given query. # # Returns an array of words on success. @@ -937,7 +937,7 @@ def BuildKeywords(query, index, hits) assert { query.instance_of? String } assert { index.instance_of? String } assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) } - + # build request request = Request.new # v.1.0 req @@ -946,7 +946,7 @@ def BuildKeywords(query, index, hits) request.put_int hits ? 1 : 0 response = PerformRequest(:keywords, request) - + # parse response begin res = [] @@ -954,17 +954,17 @@ def BuildKeywords(query, index, hits) 0.upto(nwords - 1) do |i| tokenized = response.get_string normalized = response.get_string - + entry = { 'tokenized' => tokenized, 'normalized' => normalized } entry['docs'], entry['hits'] = response.get_ints(2) if hits - + res << entry end rescue EOFError @error = 'incomplete reply' raise SphinxResponseError, @error end - + return res end @@ -987,12 +987,12 @@ def UpdateAttributes(index, attrs, values, mva = false, ignoreexistent = false ) assert { index.instance_of? String } assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) } assert { ignoreexistent.instance_of?(TrueClass) || ignoreexistent.instance_of?(FalseClass) } - + assert { attrs.instance_of? Array } attrs.each do |attr| assert { attr.instance_of? String } end - + assert { values.instance_of? Hash } values.each do |id, entry| assert { id.instance_of? Fixnum } @@ -1007,18 +1007,18 @@ def UpdateAttributes(index, attrs, values, mva = false, ignoreexistent = false ) end end end - + # build request request = Request.new request.put_string index - + request.put_int attrs.length request.put_int ignoreexistent ? 1 : 0 for attr in attrs request.put_string attr request.put_int mva ? 1 : 0 end - + request.put_int values.length values.each do |id, entry| request.put_int64 id @@ -1028,9 +1028,9 @@ def UpdateAttributes(index, attrs, values, mva = false, ignoreexistent = false ) request.put_int(*entry) end end - + response = PerformRequest(:update, request) - + # parse response begin return response.get_int @@ -1039,9 +1039,9 @@ def UpdateAttributes(index, attrs, values, mva = false, ignoreexistent = false ) raise SphinxResponseError, @error end end - + protected - + # Connect to searchd server. def Connect begin @@ -1054,23 +1054,23 @@ def Connect @error = "connection to #{@host}:#{@port} failed (error=#{err})" raise SphinxConnectError, @error end - + v = sock.recv(4).unpack('N*').first if v < 1 sock.close @error = "expected searchd protocol version 1+, got version '#{v}'" raise SphinxConnectError, @error end - + sock.send([1].pack('N'), 0) sock end - + # Get and check response packet from searchd server. def GetResponse(sock, client_version) response = '' len = 0 - + header = sock.recv(8) if header.length == 8 status, ver, len = header.unpack('n2N') @@ -1088,7 +1088,7 @@ def GetResponse(sock, client_version) end end sock.close - + # check response read = response.length if response.empty? or read != len.to_i @@ -1097,7 +1097,7 @@ def GetResponse(sock, client_version) : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" raise SphinxResponseError, @error end - + # check status if (status == SEARCHD_WARNING) wlen = response[0, 4].unpack('N*').first @@ -1109,32 +1109,32 @@ def GetResponse(sock, client_version) @error = 'searchd error: ' + response[4, response.length - 4] raise SphinxInternalError, @error end - + if status == SEARCHD_RETRY @error = 'temporary searchd error: ' + response[4, response.length - 4] raise SphinxTemporaryError, @error end - + unless status == SEARCHD_OK @error = "unknown status code: '#{status}'" raise SphinxUnknownError, @error end - + # check version if ver < client_version @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " + "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work" end - + return response end - + # Connect, send query, get response. def PerformRequest(command, request, additional = nil) cmd = command.to_s.upcase command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd) command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd) - + sock = self.Connect len = request.to_s.length + (additional != nil ? 8 : 0) header = [command_id, command_ver, len].pack('nnN') @@ -1143,7 +1143,7 @@ def PerformRequest(command, request, additional = nil) response = self.GetResponse(sock, command_ver) return Response.new(response) end - + # :stopdoc: def assert raise 'Assertion failed!' unless yield if $DEBUG diff --git a/api/sphinxapi.php b/api/sphinxapi.php index aefd9d0f4f..79580ff6f8 100644 --- a/api/sphinxapi.php +++ b/api/sphinxapi.php @@ -43,7 +43,7 @@ define ( "SEARCHD_COMMAND_FLUSHATTRS", 7 ); /// current client-side command implementation versions -define ( "VER_COMMAND_SEARCH", 0x120 ); +define ( "VER_COMMAND_SEARCH", 0x121 ); define ( "VER_COMMAND_EXCERPT", 0x104 ); define ( "VER_COMMAND_UPDATE", 0x104 ); define ( "VER_COMMAND_KEYWORDS", 0x100 ); diff --git a/doc/versions.md b/doc/versions.md new file mode 100644 index 0000000000..92d93356bd --- /dev/null +++ b/doc/versions.md @@ -0,0 +1,493 @@ +# Manticore Protocol Versioning System + +This document provides a comprehensive overview of how protocol versioning works in Manticore Search, covering daemon/client version compatibility, command-specific versioning, and backward/forward compatibility mechanisms. + +## Overview + +Manticore Search uses a multi-layered versioning system to ensure compatibility between different client and daemon versions. The versioning system operates at several levels: + +1. **Connection-level handshake** (global protocol version) +2. **Command-level versioning** (per-command protocol versions) +3. **Replication protocol versioning** (for cluster operations) +4. **Master-agent extensions** (for distributed search) + +## 1. Connection-Level Handshake + +### Handshake Protocol Version + +The initial connection handshake uses a simple global protocol version: + +```cpp +// From src/sphinx.h +#define SPHINX_SEARCHD_PROTO 1 +#define SPHINX_CLIENT_VERSION 1 +``` + +**Code Implementation:** + +```cpp +// From src/netreceive_api.cpp - Daemon sends handshake +tOut.SendDword ( SPHINX_SEARCHD_PROTO ); // that is handshake + +// Daemon receives and validates client handshake +auto uHandshake = tIn.GetDword(); +if ( uHandshake!=SPHINX_CLIENT_VERSION && uHandshake!=0x01000000UL ) +{ + sphLogDebugv ( "conn %s(%d): got handshake, major v.%d", sClientIP, iCID, uHandshake ); + return; // Drop connection on version mismatch +} +``` + +```cpp +// From src/searchdha.cpp - Agent sends handshake to master +m_tOutput.SendDword ( SPHINX_CLIENT_VERSION ); +``` + +**Key characteristics:** +- Both daemon and client exchange a 4-byte DWORD containing version `0x00000001` +- This is the **only** exchange that supports both big-endian and little-endian byte order +- If versions don't match exactly (`1` or `0x01000000` for endianness), connection is dropped +- This version has remained constant at `1` throughout Manticore's history + +**Handshake Flow:** +``` +1. TCP connection established +2. Daemon sends: 0x00000001 (or 0x01000000 in host byte order) +3. Client sends: 0x00000001 (or 0x01000000 in host byte order) +4. Both sides validate received version +5. Connection proceeds to command phase (if successful) +``` + +## 2. Command-Level Versioning + +### Version Format + +Each command uses a 16-bit version number (`WORD`) encoded as **MAJOR.MINOR**: +- **High byte (bits 15-8)**: Major version +- **Low byte (bits 7-0)**: Minor version +- Example: Version 1.38 = `0x126` (0x01 major, 0x26 minor) + +### Current Command Versions + +| Command | Code | Current Version | Hex | Description | +|---------|------|----------------|-----|-------------| +| SEARCH | 0 | 1.38 | 0x126 | Main search command | +| EXCERPT | 1 | 1.4 | 0x104 | Snippet generation | +| UPDATE | 2 | 1.4 | 0x104 | Document updates | +| KEYWORDS | 3 | 1.2 | 0x102 | Keyword extraction | +| PERSIST | 4 | N/A | N/A | Connection persistence (unversioned) | +| STATUS | 5 | 1.1 | 0x101 | Status information | +| FLUSHATTRS | 7 | 1.0 | 0x100 | Flush attributes | +| SPHINXQL | 8 | 1.0 | 0x100 | SQL proxy command | +| PING | 9 | 1.0 | 0x100 | Connectivity test | +| UVAR | 11 | 1.0 | 0x100 | User variables | +| JSON | 16 | 1.2 | 0x102 | JSON/REST proxy | +| CALLPQ | 17 | 1.0 | 0x100 | Percolate queries | +| CLUSTER | 18 | 1.10 | 0x10A | Cluster operations | +| GETFIELD | - | 1.0 | 0x100 | Field operations | +| SUGGEST | 15 | 1.1 | 0x101 | Query suggestions | + +**Code Implementation:** + +```cpp +// From src/searchdaemon.h - Command version definitions +enum SearchdCommandV_e : WORD +{ + VER_COMMAND_SEARCH = 0x126, // 1.38 + VER_COMMAND_EXCERPT = 0x104, + VER_COMMAND_UPDATE = 0x104, + VER_COMMAND_KEYWORDS = 0x102, + VER_COMMAND_STATUS = 0x101, + VER_COMMAND_FLUSHATTRS = 0x100, + VER_COMMAND_SPHINXQL = 0x100, + VER_COMMAND_JSON = 0x102, + VER_COMMAND_PING = 0x100, + VER_COMMAND_UVAR = 0x100, + VER_COMMAND_CALLPQ = 0x100, + VER_COMMAND_CLUSTER = 0x10A, + VER_COMMAND_GETFIELD = 0x100, + VER_COMMAND_SUGGEST = 0x101, + + VER_COMMAND_WRONG = 0, +}; +``` + +```php +// From api/sphinxapi.php - Client-side version constants +define ( "VER_COMMAND_SEARCH", 0x120 ); // Note: Client uses older version +define ( "VER_COMMAND_EXCERPT", 0x104 ); +define ( "VER_COMMAND_UPDATE", 0x104 ); +define ( "VER_COMMAND_KEYWORDS", 0x100 ); +define ( "VER_COMMAND_STATUS", 0x101 ); +define ( "VER_COMMAND_FLUSHATTRS", 0x100 ); +``` + +### Version Compatibility Rules + +The daemon implements strict version checking for each command: + +```cpp +// From src/searchd.cpp - Actual implementation +bool CheckCommandVersion ( WORD uVer, WORD uDaemonVersion, ISphOutputBuffer & tOut ) +{ + // Rule 1: Major versions MUST match exactly + if ( ( uVer>>8)!=( uDaemonVersion>>8) ) + { + SendErrorReply ( tOut, "major command version mismatch (expected v.%d.x, got v.%d.%d)", + uDaemonVersion>>8, uVer>>8, uVer&0xff ); + return false; + } + + // Rule 2: Client version MUST be <= daemon version + if ( uVer>uDaemonVersion ) + { + SendErrorReply ( tOut, "client version is higher than daemon version (client is v.%d.%d, daemon is v.%d.%d)", + uVer>>8, uVer&0xff, uDaemonVersion>>8, uDaemonVersion&0xff ); + return false; + } + + return true; // Compatible +} +``` + +**Usage Example:** +```cpp +// From src/daemon/api_search.cpp - SEARCH command handler +void HandleCommandSearch ( ISphOutputBuffer & tOut, WORD uVer, InputBuffer_c & tReq ) +{ + // Check command version compatibility + if ( !CheckCommandVersion ( uVer, VER_COMMAND_SEARCH, tOut ) ) + return; + + // Additional minimum version check for SEARCH + const WORD MIN_VERSION = 0x119; // 1.25 + if ( uVer>8, uVer&0xff, MIN_VERSION>>8, MIN_VERSION&0xff ); + return; + } + + // Command processing continues... +} +``` + +**Compatibility Matrix:** +- ✅ **Compatible**: Client major = Daemon major AND Client version ≤ Daemon version +- ❌ **Incompatible**: Client major ≠ Daemon major OR Client version > Daemon version + +### Special Cases + +#### SEARCH Command Minimum Version +The SEARCH command has an additional minimum version requirement: + +```cpp +const WORD MIN_VERSION = 0x119; // 1.25 +if (uVer < MIN_VERSION) { + // Error: client version too old + return false; +} +``` + +#### Version Validation Examples + +| Client | Daemon | Compatible? | Reason | +|--------|--------|-------------|---------| +| 1.25 | 1.38 | ✅ | Same major, client ≤ daemon | +| 1.38 | 1.25 | ❌ | Client > daemon | +| 1.25 | 2.0 | ❌ | Different major versions | +| 1.20 | 1.38 | ❌ | Below minimum version for SEARCH | + +## 3. Master-Agent Extensions (Distributed Search) + +### Master Version Protocol + +For distributed searches, the SEARCH command includes a special `master_version` field that enables protocol extensions: + +```cpp +// From src/searchdaemon.h +enum +{ + VER_COMMAND_SEARCH_MASTER = 25 // Current maximum master version +}; +``` + +**Code Implementation:** + +```cpp +// From src/daemon/api_search.cpp - Agent sends master version to daemon +void SearchRequestBuilder_c::BuildRequest ( const AgentConn_t & tAgent, ISphOutputBuffer & tOut ) const +{ + auto tHdr = APIHeader ( tOut, SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH ); + + tOut.SendInt ( VER_COMMAND_SEARCH_MASTER ); // Send maximum supported version + tOut.SendInt ( m_dQueries.GetLength() ); + // ... rest of request +} +``` + +```cpp +// From src/daemon/api_search.cpp - Daemon validates master version +void HandleCommandSearch ( ISphOutputBuffer & tOut, WORD uVer, InputBuffer_c & tReq ) +{ + // ... version checks ... + + int iMasterVer = tReq.GetInt(); + if ( iMasterVer<0 || iMasterVer>VER_COMMAND_SEARCH_MASTER ) + { + SendErrorReply ( tOut, "master-agent version mismatch; update me first, then update master!" ); + return; + } + WORD uMasterVer { WORD (iMasterVer) }; + bool bAgentMode = ( uMasterVer>0 ); // Agent mode detection + + // ... rest of processing +} +``` + +### Master Version Capabilities + +Each master version enables specific protocol features: + +| Version | Feature | +|---------|---------| +| 0 | Regular client mode (no extensions) | +| 1+ | Agent mode with extended schema | +| 1 | Collation support | +| 2 | Outer ORDER BY and LIMIT | +| 5 | Equal bounds filters | +| 6 | GROUP BY limits | +| 13 | MVA functions (ANY/ALL/NONE) | +| 14 | UDF ranker support | +| 15 | FACET support, enhanced filters | +| 16 | Keyword expansion options | +| up to 25 | Additional distributed features | + +### Client vs Agent Mode + +- **Client Mode** (`master_version = 0`): Returns exactly requested columns +- **Agent Mode** (`master_version > 0`): Returns additional service columns needed for distributed aggregation + +**Version-Dependent Features:** + +```cpp +// From src/daemon/api_search.cpp - Version-dependent attribute handling +static ESphAttr FixupAttrForNetwork ( const CSphColumnInfo & tCol, const CSphSchema & tSchema, + int iVer, WORD uMasterVer, bool bAgentMode ) +{ + bool bSendJson = ( bAgentMode && uMasterVer>=3 ); // JSON support from v3 + bool bSendJsonField = ( bAgentMode && uMasterVer>=4 ); // JSON fields from v4 + + switch ( tCol.m_eAttrType ) + { + // Attribute type handling based on version... + } +} +``` + +```cpp +// From src/daemon/api_search.cpp - Conditional feature inclusion +void SendResult ( int iVer, ISphOutputBuffer & tOut, const AggrResult_t& tRes, + bool bAgentMode, const CSphQuery & tQuery, WORD uMasterVer ) +{ + // Various features enabled based on master version + if ( bAgentMode && uMasterVer>=7 ) + { + // Send additional statistics (introduced in v7) + tOut.SendDword ( tRes.m_tStats.m_iFetchedDocs ); + tOut.SendDword ( tRes.m_tStats.m_iFetchedHits ); + } + + if ( bAgentMode && uMasterVer>=19 ) + { + // Even more advanced features (v19+) + // ... + } +} +``` + +## 4. Replication Protocol Versioning + +### Replication Version + +The replication subsystem has its own versioning: + +```cpp +// From src/replication/serialize.cpp +static constexpr WORD VER_COMMAND_REPLICATE = 0x109; // 1.9 +``` + +**Code Implementation:** + +```cpp +// From src/replication/serialize.cpp - Version validation +bool LoadCmdHeader( MemoryReader_c& tReader, ReplicationCommand_t* pCmd ) +{ + TlsMsg::ResetErr(); + auto eCommand = (ReplCmd_e) tReader.GetVal (); + if ( eCommandReplCmd_e::TOTAL ) + return TlsMsg::Err ( "bad replication command %d", (int) eCommand ); + + pCmd->m_uVersion = tReader.GetVal (); + if ( pCmd->m_uVersion>VER_COMMAND_REPLICATE ) + return TlsMsg::Err ( "replication command %d, version mismatch %d, got %d", + (int) eCommand, VER_COMMAND_REPLICATE, (int)pCmd->m_uVersion ); + + pCmd->m_eCommand = eCommand; + pCmd->m_sIndex = tReader.GetString (); + return true; +} +``` + +```cpp +// From src/replication/serialize.cpp - Version setting +void SaveCmdHeader ( const ReplicationCommand_t & tCmd, MemoryWriter_c & tWriter ) +{ + tWriter.PutWord ((WORD) tCmd.m_eCommand ); + tWriter.PutWord ( VER_COMMAND_REPLICATE ); // Always use current version + tWriter.PutString ( tCmd.m_sIndex ); +} +``` + +### Version History + +```cpp +// ver 0x105 - Fixed CSphWordHit serialization +// ver 0x106 - Add total indexed bytes to accum +// ver 0x107 - Add blobs vector to replicate update statement +// ver 0x108 - GTID sent as blob (was string) +// ver 0x109 - Index support for ALTER ADD/DROP table +``` + +### Replication Compatibility + +Replication uses a simpler compatibility rule: +- Client replication version MUST be ≤ daemon replication version +- No major/minor version distinction + +## 5. Backward and Forward Compatibility + +### Backward Compatibility Strategy + +1. **Command Support**: Daemon supports older client command versions within the same major version +2. **Graceful Degradation**: Newer daemon features are disabled when serving older clients +3. **Protocol Extensions**: New features added as optional extensions that older clients can ignore + +### Forward Compatibility Limitations + +1. **No Future Support**: Daemon cannot serve clients with higher command versions +2. **Major Version Boundaries**: Breaking changes require major version increments +3. **Strict Validation**: Version mismatches result in immediate connection termination + +### Version Evolution Strategy + +1. **Minor Increments**: Add new optional fields or extend existing behavior +2. **Major Increments**: Breaking changes, protocol restructuring +3. **Extension Mechanisms**: Master-agent extensions allow feature additions without breaking compatibility + +## 6. Error Handling + +### Version Mismatch Errors + +1. **Handshake Failure**: Connection dropped silently or with retry message +2. **Command Version Mismatch**: Error response with specific version information +3. **Minimum Version**: Clear error message indicating required upgrade + +### Example Error Messages + +**From actual code - these are the real error messages:** + +```cpp +// From src/searchd.cpp - Major version mismatch +SendErrorReply ( tOut, "major command version mismatch (expected v.%d.x, got v.%d.%d)", + uDaemonVersion>>8, uVer>>8, uVer&0xff ); + +// From src/searchd.cpp - Client version too new +SendErrorReply ( tOut, "client version is higher than daemon version (client is v.%d.%d, daemon is v.%d.%d)", + uVer>>8, uVer&0xff, uDaemonVersion>>8, uDaemonVersion&0xff ); + +// From src/daemon/api_search.cpp - Client version too old +SendErrorReply ( tOut, "client version is too old; upgrade your client (client is v.%d.%d, min is v.%d.%d)", + uVer>>8, uVer&0xff, MIN_VERSION>>8, MIN_VERSION&0xff ); + +// From src/daemon/api_search.cpp - Master-agent version mismatch +SendErrorReply ( tOut, "master-agent version mismatch; update me first, then update master!" ); + +// From src/replication/serialize.cpp - Replication version mismatch +TlsMsg::Err ( "replication command %d, version mismatch %d, got %d", + (int) eCommand, VER_COMMAND_REPLICATE, (int)pCmd->m_uVersion ); +``` + +**Example outputs:** +``` +"major command version mismatch (expected v.1.x, got v.2.0)" +"client version is higher than daemon version (client is v.1.40, daemon is v.1.38)" +"client version is too old; upgrade your client (client is v.1.20, min is v.1.25)" +"master-agent version mismatch; update me first, then update master!" +"replication command 5, version mismatch 265, got 266" +``` + +## 7. Implementation Guidelines + +### For Client Developers + +1. **Version Detection**: Always check daemon capabilities before using advanced features +2. **Graceful Fallback**: Implement fallback behavior for unsupported versions +3. **Error Handling**: Properly handle version mismatch errors +4. **Testing**: Test against multiple daemon versions + +**Example Client Implementation:** + +```php +// From api/sphinxapi.php - How the PHP client uses versions +public function Query ( $query, $index="*", $comment="" ) +{ + // Build request with command version + $req = pack ( "nnNNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, 0, $nreqs ) . $req; + + // Send and validate response + if ( !( $this->_Send ( $fp, $req, $len+8 ) ) || + !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) ) + { + // Handle version mismatch or communication errors + return false; + } + + // Process response... +} +``` + +### For Daemon Developers + +1. **Backward Compatibility**: Maintain support for reasonable range of older client versions +2. **Version Documentation**: Document all protocol changes with version numbers +3. **Deprecation Policy**: Provide clear migration paths for breaking changes +4. **Extension Design**: Use optional extensions rather than breaking changes when possible + +## 8. Best Practices + +### Version Management + +1. **Conservative Approach**: Don't bump versions unnecessarily +2. **Documentation**: Maintain detailed changelog of protocol changes +3. **Testing Matrix**: Test all supported version combinations +4. **Migration Planning**: Plan deprecation timeline for old versions + +### Development Workflow + +1. **Feature Design**: Design new features as backward-compatible extensions +2. **Version Planning**: Plan version increments as part of feature design +3. **Compatibility Testing**: Test new features with older clients +4. **Documentation**: Update protocol documentation with every change + +## Conclusion + +Manticore's multi-layered versioning system provides a robust framework for managing compatibility between different client and daemon versions. The system balances the need for innovation with the requirement for backward compatibility, ensuring that existing clients continue to work while new features can be added progressively. + +Understanding this versioning system is crucial for: +- Client library developers +- System administrators managing mixed-version environments +- Contributors working on protocol extensions +- Users planning upgrades and migrations + +The key principle is that **major versions must match exactly**, while **minor versions support backward compatibility** within reasonable bounds. \ No newline at end of file diff --git a/mysqlse/ha_sphinx.cc b/mysqlse/ha_sphinx.cc index 49b534a6fc..d957b340f5 100644 --- a/mysqlse/ha_sphinx.cc +++ b/mysqlse/ha_sphinx.cc @@ -160,6 +160,7 @@ enum { SPHINX_SEARCHD_PROTO = 1, SEARCHD_COMMAND_SEARCH = 0, + VER_COMMAND_SEARCH = 0x119, }; @@ -302,9 +303,9 @@ struct CSphSEShare uint m_iUseCount; #if MYSQL_VERSION_ID<50610 CHARSET_INFO * m_pTableQueryCharset; -#else +#else const CHARSET_INFO * m_pTableQueryCharset; -#endif +#endif int m_iTableFields; char ** m_sTableField; @@ -437,7 +438,7 @@ struct CSphSEThreadTable CHARSET_INFO * m_pQueryCharset; #else const CHARSET_INFO * m_pQueryCharset; -#endif +#endif bool m_bReplace; ///< are we doing an INSERT or REPLACE @@ -2108,7 +2109,7 @@ int ha_sphinx::open ( const char * name, int, uint ) CSphTLS * pTls = (CSphTLS *)( *tmp ); SafeDelete ( pTls ); *tmp = NULL; - } + } #else if ( table->in_use->ha_data [ sphinx_hton.slot ] ) { @@ -2360,16 +2361,16 @@ int ha_sphinx::write_row ( byte * ) } else { (*ppField)->val_str ( &sValue ); - + int iLen = sValue.length(); bool bMva = ( iLen>1 && sValue.ptr()[0]=='(' && sValue.ptr()[iLen-1]==')' ); - + if ( !bMva ) sQuery.append ( "'" ); sValue.print ( &sQuery ); if ( !bMva ) sQuery.append ( "'" ); - + sValue.length(0); } } diff --git a/src/daemon/api_search.cpp b/src/daemon/api_search.cpp index c65ff37480..867669ba7a 100644 --- a/src/daemon/api_search.cpp +++ b/src/daemon/api_search.cpp @@ -172,7 +172,10 @@ void SearchRequestBuilder_c::SendQuery ( const char * sIndexes, ISphOutputBuffer tOut.SendInt ( q.m_iCutoff ); tOut.SendInt ( q.m_iRetryCount<0 ? 0 : q.m_iRetryCount ); // runaround for old clients. tOut.SendInt ( q.m_iRetryDelay<0 ? 0 : q.m_iRetryDelay ); - tOut.SendString ( q.m_sGroupDistinct.cstr() ); + // Version 0x121+: Send multiple distinct fields + tOut.SendInt ( q.m_dGroupDistinct.GetLength() ); + for ( const auto & sDistinct : q.m_dGroupDistinct ) + tOut.SendString ( sDistinct.cstr() ); tOut.SendInt ( q.m_bGeoAnchor ); if ( q.m_bGeoAnchor ) { @@ -844,8 +847,15 @@ bool ParseSearchQuery ( InputBuffer_c & tReq, ISphOutputBuffer & tOut, CSphQuery tQuery.m_iCutoff = tReq.GetInt(); tQuery.m_iRetryCount = tReq.GetInt (); tQuery.m_iRetryDelay = tReq.GetInt (); - tQuery.m_sGroupDistinct = tReq.GetString (); - sphColumnToLowercase ( const_cast( tQuery.m_sGroupDistinct.cstr() ) ); + // Version 0x121+: Read multiple distinct fields + int iDistinctCount = tReq.GetInt(); + for ( int i = 0; i < iDistinctCount; i++ ) + { + CSphString sGroupDistinct = tReq.GetString(); + sphColumnToLowercase ( const_cast( sGroupDistinct.cstr() ) ); + if ( !sGroupDistinct.IsEmpty() ) + tQuery.m_dGroupDistinct.Add ( sGroupDistinct ); + } tQuery.m_bGeoAnchor = ( tReq.GetInt()!=0 ); if ( tQuery.m_bGeoAnchor ) diff --git a/src/daemon/search_handler.cpp b/src/daemon/search_handler.cpp index 44d26d74a1..6e30a74b56 100644 --- a/src/daemon/search_handler.cpp +++ b/src/daemon/search_handler.cpp @@ -769,7 +769,7 @@ class GlobalSorters_c final { auto iValidIndexes = (int)dIndexes.count_of ( [&] ( const auto& pIndex ) { return pIndex; } ); - m_bNeedGlobalSorters = iValidIndexes>1 && !dQueries.First().m_sGroupDistinct.IsEmpty(); + m_bNeedGlobalSorters = iValidIndexes>1 && !dQueries.First().m_dGroupDistinct.IsEmpty(); if ( m_bNeedGlobalSorters ) { // check if schemas are same @@ -2192,7 +2192,7 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd ) { for ( auto& dItem : dItems ) { - if ( dItem.m_sExpr=="count(*)" || ( dItem.m_sExpr=="@distinct" ) ) + if ( dItem.m_sExpr=="count(*)" || dItem.m_sExpr.Begins("@distinct_") ) tRes.m_dZeroCount.Add ( dItem.m_sAlias ); } } diff --git a/src/frontendschema.cpp b/src/frontendschema.cpp index 1856ac03d3..2779fff0be 100644 --- a/src/frontendschema.cpp +++ b/src/frontendschema.cpp @@ -35,7 +35,7 @@ struct AggregateColumnSort_fn return c.m_eAggrFunc!=SPH_AGGR_NONE || c.m_sName=="@groupby" || c.m_sName=="@count" - || c.m_sName=="@distinct" + || c.m_sName.Begins("@distinct_") || IsSortJsonInternal ( c.m_sName ); } diff --git a/src/joinsorter.cpp b/src/joinsorter.cpp index 3d22f485f2..24a19a1767 100644 --- a/src/joinsorter.cpp +++ b/src/joinsorter.cpp @@ -1939,8 +1939,8 @@ void JoinSorter_c::AddGroupbyItemsToJoinSelectList() } } - if ( !tQuery.m_sGroupDistinct.IsEmpty() ) - AddToJoinSelectList ( tQuery.m_sGroupDistinct, tQuery.m_sGroupDistinct ); + for ( const auto & sDistinct : tQuery.m_dGroupDistinct ) + AddToJoinSelectList ( sDistinct, sDistinct ); } } diff --git a/src/queuecreator.cpp b/src/queuecreator.cpp index 7b578157be..129f46f4f7 100644 --- a/src/queuecreator.cpp +++ b/src/queuecreator.cpp @@ -35,7 +35,7 @@ bool HasImplicitGrouping ( const CSphQuery & tQuery ) { auto fnIsImplicit = [] ( const CSphQueryItem & t ) { - return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct"; + return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr.Begins("@distinct_"); }; return tQuery.m_sGroupBy.IsEmpty() ? tQuery.m_dItems.any_of(fnIsImplicit) : false; @@ -86,7 +86,7 @@ static bool IsCount ( const CSphString & s ) static bool IsGroupby ( const CSphString & s ) { return s=="@groupby" - || s=="@distinct" + || s.Begins("@distinct_") || s=="groupby()" || IsSortJsonInternal(s); } @@ -340,7 +340,13 @@ class QueueCreator_c void ReplaceJsonGroupbyWithStrings ( CSphString & sJsonGroupBy ); void UpdateAggregateDependencies ( CSphColumnInfo & tExprCol ); int GetGroupbyAttrIndex() const { return GetAliasedAttrIndex ( m_tQuery.m_sGroupBy, m_tQuery, *m_pSorterSchema ); } - int GetGroupDistinctAttrIndex() const { return GetAliasedAttrIndex ( m_tQuery.m_sGroupDistinct, m_tQuery, *m_pSorterSchema ); } + int GetGroupDistinctAttrIndex() const + { + // Return index of first distinct attribute for compatibility + if ( m_tQuery.m_dGroupDistinct.IsEmpty() ) + return -1; + return GetAliasedAttrIndex ( m_tQuery.m_dGroupDistinct[0], m_tQuery, *m_pSorterSchema ); + } bool CanCalcFastCountDistinct() const; bool CanCalcFastCountFilter() const; @@ -463,40 +469,46 @@ void QueueCreator_c::CreateGrouperByAttr ( ESphAttr eType, const CSphColumnInfo bool QueueCreator_c::SetupDistinctAttr() { - const CSphString & sDistinct = m_tQuery.m_sGroupDistinct; - if ( sDistinct.IsEmpty() ) + if ( m_tQuery.m_dGroupDistinct.IsEmpty() ) return true; assert ( m_pSorterSchema ); auto & tSchema = *m_pSorterSchema; - int iDistinct = tSchema.GetAttrIndex ( sDistinct.cstr() ); - if ( iDistinct<0 ) + // Handle multiple distinct expressions + for ( const auto & sDistinct : m_tQuery.m_dGroupDistinct ) { - CSphString sJsonCol; - if ( !sphJsonNameSplit ( sDistinct.cstr(), m_tQuery.m_sJoinIdx.cstr(), &sJsonCol ) ) + int iDistinct = tSchema.GetAttrIndex ( sDistinct.cstr() ); + if ( iDistinct<0 ) { - return Err ( "group-count-distinct attribute '%s' not found", sDistinct.cstr() ); - return false; - } + CSphString sJsonCol; + if ( !sphJsonNameSplit ( sDistinct.cstr(), m_tQuery.m_sJoinIdx.cstr(), &sJsonCol ) ) + { + return Err ( "group-count-distinct attribute '%s' not found", sDistinct.cstr() ); + } - CSphColumnInfo tExprCol ( sDistinct.cstr(), SPH_ATTR_JSON_FIELD_PTR ); - tExprCol.m_eStage = SPH_EVAL_SORTER; - tExprCol.m_uAttrFlags = CSphColumnInfo::ATTR_JOINED; - m_pSorterSchema->AddAttr ( tExprCol, true ); - iDistinct = m_pSorterSchema->GetAttrIndex ( tExprCol.m_sName.cstr() ); - } + CSphColumnInfo tExprCol ( sDistinct.cstr(), SPH_ATTR_JSON_FIELD_PTR ); + tExprCol.m_eStage = SPH_EVAL_SORTER; + tExprCol.m_uAttrFlags = CSphColumnInfo::ATTR_JOINED; + m_pSorterSchema->AddAttr ( tExprCol, true ); + iDistinct = m_pSorterSchema->GetAttrIndex ( tExprCol.m_sName.cstr() ); + } - const auto & tDistinctAttr = tSchema.GetAttr(iDistinct); - if ( IsNotRealAttribute(tDistinctAttr) ) - return Err ( "group-count-distinct attribute '%s' not found", sDistinct.cstr() ); + const auto & tDistinctAttr = tSchema.GetAttr(iDistinct); + if ( IsNotRealAttribute(tDistinctAttr) ) + return Err ( "group-count-distinct attribute '%s' not found", sDistinct.cstr() ); - if ( tDistinctAttr.IsColumnar() ) - m_tGroupSorterSettings.m_pDistinctFetcher = CreateColumnarDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_eAttrType, m_tQuery.m_eCollation ); - else - m_tGroupSorterSettings.m_pDistinctFetcher = CreateDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_tLocator, tDistinctAttr.m_eAttrType ); + // Use first distinct expression for fetcher (sufficient for grouping/sorting) + if ( !m_tGroupSorterSettings.m_pDistinctFetcher ) + { + if ( tDistinctAttr.IsColumnar() ) + m_tGroupSorterSettings.m_pDistinctFetcher = CreateColumnarDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_eAttrType, m_tQuery.m_eCollation ); + else + m_tGroupSorterSettings.m_pDistinctFetcher = CreateDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_tLocator, tDistinctAttr.m_eAttrType ); - m_bJoinedGroupSort |= IsJoinAttr(tDistinctAttr.m_sName); + m_bJoinedGroupSort |= IsJoinAttr(tDistinctAttr.m_sName); + } + } return true; } @@ -1456,9 +1468,16 @@ bool QueueCreator_c::MaybeAddGroupbyMagic ( bool bGotDistinct ) if ( bGotDistinct ) { - CSphColumnInfo tDistinct ( "@distinct", SPH_ATTR_INTEGER ); - tDistinct.m_eStage = SPH_EVAL_SORTER; - AddColumn ( tDistinct ); + // Handle multiple distinct expressions + for ( const auto & tItem : m_tQuery.m_dItems ) + { + if ( tItem.m_sExpr.Begins("@distinct_") ) + { + CSphColumnInfo tDistinct ( tItem.m_sExpr.cstr(), SPH_ATTR_INTEGER ); + tDistinct.m_eStage = SPH_EVAL_SORTER; + AddColumn ( tDistinct ); + } + } } // add @groupbystr last in case we need to skip it on sending (like @int_attr_*) @@ -1480,15 +1499,20 @@ bool QueueCreator_c::MaybeAddGroupbyMagic ( bool bGotDistinct ) m_tGroupSorterSettings.m_tLocCount = m_pSorterSchema->GetAttr ( iCount ).m_tLocator; LOC_CHECK ( m_tGroupSorterSettings.m_tLocCount.m_bDynamic, "@count must be dynamic" ); - int iDistinct = m_pSorterSchema->GetAttrIndex ( "@distinct" ); + int iDistinct = -1; if ( bGotDistinct ) { - LOC_CHECK ( iDistinct>=0, "missing @distinct" ); + // Look for @distinct_0 (first distinct column) for backward compatibility + iDistinct = m_pSorterSchema->GetAttrIndex ( "@distinct_0" ); + } + if ( bGotDistinct ) + { + LOC_CHECK ( iDistinct>=0, "missing @distinct_0" ); m_tGroupSorterSettings.m_tLocDistinct = m_pSorterSchema->GetAttr ( iDistinct ).m_tLocator; - LOC_CHECK ( m_tGroupSorterSettings.m_tLocDistinct.m_bDynamic, "@distinct must be dynamic" ); + LOC_CHECK ( m_tGroupSorterSettings.m_tLocDistinct.m_bDynamic, "@distinct_0 must be dynamic" ); } else - LOC_CHECK ( iDistinct<=0, "unexpected @distinct" ); + LOC_CHECK ( iDistinct<=0, "unexpected @distinct_0" ); int iGroupbyStr = m_pSorterSchema->GetAttrIndex ( sJsonGroupBy.cstr() ); if ( iGroupbyStr>=0 ) @@ -2174,9 +2198,17 @@ bool QueueCreator_c::SetupGroupSortingFunc ( bool bGotDistinct ) if ( bGotDistinct ) { - m_dGroupColumns.Add ( { m_pSorterSchema->GetAttrIndex ( m_tQuery.m_sGroupDistinct.cstr() ), true } ); - assert ( m_dGroupColumns.Last().first>=0 ); - m_hExtra.Add ( m_pSorterSchema->GetAttr ( m_dGroupColumns.Last().first ).m_sName ); + // Add all distinct expressions to group columns + for ( const auto & sDistinct : m_tQuery.m_dGroupDistinct ) + { + int iAttrIndex = m_pSorterSchema->GetAttrIndex ( sDistinct.cstr() ); + if ( iAttrIndex >= 0 ) + { + m_dGroupColumns.Add ( { iAttrIndex, true } ); + assert ( m_dGroupColumns.Last().first>=0 ); + m_hExtra.Add ( m_pSorterSchema->GetAttr ( m_dGroupColumns.Last().first ).m_sName ); + } + } } // implicit case @@ -2230,7 +2262,7 @@ bool QueueCreator_c::AddGroupbyStuff () m_bHeadWOGroup = ( m_tQuery.m_sGroupBy.IsEmpty () && m_tQuery.m_bFacetHead ); auto fnIsImplicit = [] ( const CSphQueryItem & t ) { - return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct"; + return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr.Begins("@distinct_"); }; bool bHasImplicitGrouping = HasImplicitGrouping(m_tQuery); diff --git a/src/searchdaemon.h b/src/searchdaemon.h index f94bfbdb55..65328c8e28 100644 --- a/src/searchdaemon.h +++ b/src/searchdaemon.h @@ -143,7 +143,7 @@ enum /// (shared here because of REPLICATE) enum SearchdCommandV_e : WORD { - VER_COMMAND_SEARCH = 0x126, // 1.38 + VER_COMMAND_SEARCH = 0x127, // 1.38 VER_COMMAND_EXCERPT = 0x104, VER_COMMAND_UPDATE = 0x104, VER_COMMAND_KEYWORDS = 0x102, diff --git a/src/searchdsql.cpp b/src/searchdsql.cpp index 1f85378c03..148477e0a5 100644 --- a/src/searchdsql.cpp +++ b/src/searchdsql.cpp @@ -1239,27 +1239,42 @@ bool SqlParser_c::AddDistinct ( SqlNode_t * pNewExpr, SqlNode_t * pStart, SqlNod { CSphString sDistinct; ToString ( sDistinct, *pNewExpr ); - if ( !m_pQuery->m_sGroupDistinct.IsEmpty() && m_pQuery->m_sGroupDistinct!=sDistinct ) + + // Check if this distinct expression already exists + if ( m_pQuery->m_dGroupDistinct.Contains ( sDistinct ) ) { - yyerror ( this, "too many COUNT(DISTINCT) clauses" ); - return false; + CSphString sItemName; + sItemName.SetSprintf ( "@distinct_%d", m_pQuery->m_dGroupDistinct.GetFirst ( [&sDistinct] ( const CSphString & s ) { return s==sDistinct; } ) ); + return AddItem ( sItemName.cstr(), pStart, pEnd ); } - - m_pQuery->m_sGroupDistinct = sDistinct; - return AddItem ( "@distinct", pStart, pEnd ); + + // Add new distinct expression + int iDistinctIndex = m_pQuery->m_dGroupDistinct.GetLength(); + m_pQuery->m_dGroupDistinct.Add ( sDistinct ); + + // Create unique @distinct_N item name + CSphString sItemName; + sItemName.SetSprintf ( "@distinct_%d", iDistinctIndex ); + + return AddItem ( sItemName.cstr(), pStart, pEnd ); } void SqlParser_c::AddDistinct ( SqlNode_t * pNewExpr ) { + CSphString sDistinct; if ( !pNewExpr ) { - m_pQuery->m_sGroupDistinct = "id"; + sDistinct = "id"; } else { - ToString ( m_pQuery->m_sGroupDistinct, *pNewExpr ); - sphColumnToLowercase ( const_cast( m_pQuery->m_sGroupDistinct.cstr() ) ); + ToString ( sDistinct, *pNewExpr ); + sphColumnToLowercase ( const_cast( sDistinct.cstr() ) ); } + + // Add to vector if not already present + if ( !m_pQuery->m_dGroupDistinct.Contains ( sDistinct ) ) + m_pQuery->m_dGroupDistinct.Add ( sDistinct ); } bool SqlParser_c::AddDistinctSort ( SqlNode_t * pNewExpr, SqlNode_t * pStart, SqlNode_t * pEnd, bool bSortAsc ) @@ -1267,32 +1282,41 @@ bool SqlParser_c::AddDistinctSort ( SqlNode_t * pNewExpr, SqlNode_t * pStart, Sq if ( !AddDistinct ( pNewExpr, pStart, pEnd ) ) return false; - m_pQuery->m_sOrderBy.SetSprintf ( "@distinct %s", ( bSortAsc ? "asc" : "desc" ) ); + // Find the index of this distinct expression for the new naming scheme + CSphString sDistinct; + ToString ( sDistinct, *pNewExpr ); + int iIndex = m_pQuery->m_dGroupDistinct.GetFirst ( [&sDistinct] ( const CSphString & s ) { return s==sDistinct; } ); + + m_pQuery->m_sOrderBy.SetSprintf ( "@distinct_%d %s", iIndex, ( bSortAsc ? "asc" : "desc" ) ); return true; } bool SqlParser_c::MaybeAddFacetDistinct() { - if ( m_pQuery->m_sGroupDistinct.IsEmpty() ) - return true; - - // distinct could be already added by order by - if ( m_pQuery->m_dItems.Contains ( bind ( &CSphQueryItem::m_sExpr ), "@distinct" ) ) - return true; - - CSphQueryItem tItem; - tItem.m_sExpr = "@distinct"; - tItem.m_eAggrFunc = SPH_AGGR_NONE; - tItem.m_sAlias.SetSprintf ( "count(distinct %s)", m_pQuery->m_sGroupDistinct.cstr() ); - - int iCountPos = m_pQuery->m_dItems.GetFirst ( [] ( const CSphQueryItem & tElem ) { return ( tElem.m_sExpr=="count(*)" ); }); - if ( iCountPos==-1 ) + // Handle multiple distinct fields + for ( int i = 0; i < m_pQuery->m_dGroupDistinct.GetLength(); i++ ) { - yyerror ( this, "can not find COUNT clause" ); - return false; - } + CSphString sItemName; + sItemName.SetSprintf ( "@distinct_%d", i ); + + // Skip if already added + if ( m_pQuery->m_dItems.Contains ( bind ( &CSphQueryItem::m_sExpr ), sItemName ) ) + continue; + + CSphQueryItem tItem; + tItem.m_sExpr = sItemName; + tItem.m_eAggrFunc = SPH_AGGR_NONE; + tItem.m_sAlias.SetSprintf ( "count(distinct %s)", m_pQuery->m_dGroupDistinct[i].cstr() ); + + int iCountPos = m_pQuery->m_dItems.GetFirst ( [] ( const CSphQueryItem & tElem ) { return ( tElem.m_sExpr=="count(*)" ); }); + if ( iCountPos==-1 ) + { + yyerror ( this, "can not find COUNT clause" ); + return false; + } - m_pQuery->m_dItems.Insert ( iCountPos, tItem ); + m_pQuery->m_dItems.Insert ( iCountPos, tItem ); + } return SetNewSyntax(); } @@ -2080,7 +2104,7 @@ static bool SetupFacets ( CSphVector & dStmt ) static bool SetupFacetDistinct ( CSphVector & dStmt, CSphString & sError ) { - CSphString sDistinct; + CSphVector dReferenceDistinct; // need to keep order of query items same as at select list however do not duplicate items // that is why raw Vector.Uniq does not work here @@ -2096,15 +2120,31 @@ static bool SetupFacetDistinct ( CSphVector & dStmt, CSphString & sEr tItem.QueryItemHash(); } - if ( !tQuery.m_sGroupDistinct.IsEmpty() ) + // Check distinct consistency across FACET queries + if ( !tQuery.m_dGroupDistinct.IsEmpty() ) { - if ( !sDistinct.IsEmpty() && sDistinct!=tQuery.m_sGroupDistinct ) + if ( dReferenceDistinct.IsEmpty() ) { - sError.SetSprintf ( "distinct field for all FACET queries should be the same '%s', query %d got '%s'", sDistinct.cstr(), i, tQuery.m_sGroupDistinct.cstr() ); - return false; + // First query with distinct - use as reference + dReferenceDistinct = tQuery.m_dGroupDistinct; + } + else + { + // Validate that all distinct expressions match the reference + if ( dReferenceDistinct.GetLength() != tQuery.m_dGroupDistinct.GetLength() ) + { + sError.SetSprintf ( "distinct expressions count mismatch in FACET query %d", i ); + return false; + } + for ( int j = 0; j < dReferenceDistinct.GetLength(); j++ ) + { + if ( dReferenceDistinct[j] != tQuery.m_dGroupDistinct[j] ) + { + sError.SetSprintf ( "distinct field for all FACET queries should be the same '%s', query %d got '%s'", dReferenceDistinct[j].cstr(), i, tQuery.m_dGroupDistinct[j].cstr() ); + return false; + } + } } - if ( sDistinct.IsEmpty() ) - sDistinct = tQuery.m_sGroupDistinct; } } // got rid of duplicates @@ -2138,7 +2178,11 @@ static bool SetupFacetDistinct ( CSphVector & dStmt, CSphString & sEr } } - tStmt.m_tQuery.m_sGroupDistinct = sDistinct; + // Set distinct for all statements + if ( !dReferenceDistinct.IsEmpty() ) + { + tStmt.m_tQuery.m_dGroupDistinct = dReferenceDistinct; + } } return true; @@ -2283,9 +2327,10 @@ bool sphParseSqlQuery ( Str_t sQuery, CSphVector & dStmt, CSphString // need to keep same wide result set schema if ( dStmt.GetLength()>1 ) { - const CSphString & sDistinct = dStmt[0].m_tQuery.m_sGroupDistinct; + // Copy distinct expressions from first statement to all others + const auto & dDistinct = dStmt[0].m_tQuery.m_dGroupDistinct; for ( int i=1; iGetAttr("@distinct"); + const CSphColumnInfo * pDistinct = m_pSchema->GetAttr("@distinct_0"); assert(pDistinct); for ( const auto & tLocator : m_tGroupSorter.m_tLocator ) diff --git a/src/sphinx.cpp b/src/sphinx.cpp index 77f21f8a2c..c78d04a97f 100644 --- a/src/sphinx.cpp +++ b/src/sphinx.cpp @@ -1607,6 +1607,7 @@ class SelectParser_t void AddItem ( const char * pToken, YYSTYPE * pStart=NULL, YYSTYPE * pEnd=NULL ); void AliasLastItem ( YYSTYPE * pAlias ); void AddOption ( YYSTYPE * pOpt, YYSTYPE * pVal ); + bool AddDistinct ( YYSTYPE * pNewExpr, YYSTYPE * pStart, YYSTYPE * pEnd ); private: void AutoAlias ( CSphQueryItem & tItem, YYSTYPE * pStart, YYSTYPE * pEnd ); @@ -1810,6 +1811,34 @@ void SelectParser_t::AddOption ( YYSTYPE * pOpt, YYSTYPE * pVal ) } } +bool SelectParser_t::AddDistinct ( YYSTYPE * pNewExpr, YYSTYPE * pStart, YYSTYPE * pEnd ) +{ + CSphString sDistinct; + sDistinct.SetBinary ( m_pStart + pNewExpr->m_iStart, pNewExpr->m_iEnd - pNewExpr->m_iStart ); + sphColumnToLowercase ( const_cast( sDistinct.cstr() ) ); + + // Check if this distinct expression already exists + if ( m_pQuery->m_dGroupDistinct.Contains ( sDistinct ) ) + { + int iIndex = m_pQuery->m_dGroupDistinct.GetFirst ( [&sDistinct] ( const CSphString & s ) { return s==sDistinct; } ); + CSphString sItemName; + sItemName.SetSprintf ( "@distinct_%d", iIndex ); + AddItem ( sItemName.cstr(), pStart, pEnd ); + return true; + } + + // Add new distinct expression + int iDistinctIndex = m_pQuery->m_dGroupDistinct.GetLength(); + m_pQuery->m_dGroupDistinct.Add ( sDistinct ); + + // Create unique @distinct_N item name + CSphString sItemName; + sItemName.SetSprintf ( "@distinct_%d", iDistinctIndex ); + + AddItem ( sItemName.cstr(), pStart, pEnd ); + return true; +} + bool ParseSelectList ( CSphString & sError, CSphQuery & tQuery ) { tQuery.m_dItems.Reset(); @@ -2083,7 +2112,7 @@ void CSphIndex::SetMutableSettings ( const MutableIndexSettings_c & tSettings ) static bool DetectNonClonableSorters ( const CSphQuery & tQuery ) { - if ( !tQuery.m_sGroupDistinct.IsEmpty() ) + if ( !tQuery.m_dGroupDistinct.IsEmpty() ) return true; // FIXME: also need to handle @@ -2108,7 +2137,7 @@ static bool DetectPrecalcSorters ( const CSphQuery & tQuery, const ISphSchema & if ( !tQuery.m_tKnnSettings.m_sAttr.IsEmpty() ) return false; - bool bDistinct = !tQuery.m_sGroupDistinct.IsEmpty(); + bool bDistinct = !tQuery.m_dGroupDistinct.IsEmpty(); if ( bHasSI ) { // check for count distinct precalc diff --git a/src/sphinx.h b/src/sphinx.h index 129b1e2438..ee31c17468 100644 --- a/src/sphinx.h +++ b/src/sphinx.h @@ -583,7 +583,7 @@ struct CSphQuery CSphString m_sFacetBy; ///< facet-by attribute name(s) ESphGroupBy m_eGroupFunc = SPH_GROUPBY_ATTR; ///< function to pre-process group-by attribute value with CSphString m_sGroupSortBy { "@groupby desc" }; ///< sorting clause for groups in group-by mode - CSphString m_sGroupDistinct; ///< count distinct values for this attribute + CSphVector m_dGroupDistinct; ///< count distinct values for multiple attributes int m_iCutoff = -1; ///< matches count threshold to stop searching at (<=0 means to search until all matches are found) diff --git a/src/sphinxfilter.cpp b/src/sphinxfilter.cpp index f0a9c5ab6c..22decdb47a 100644 --- a/src/sphinxfilter.cpp +++ b/src/sphinxfilter.cpp @@ -1466,7 +1466,7 @@ static bool TryToCreateSpecialFilter ( std::unique_ptr & pFilter, co assert ( !pFilter ); // try to create a filter on a special attribute - if ( sAttrName.Begins("@") && !bHaving && ( sAttrName=="@groupby" || sAttrName=="@count" || sAttrName=="@distinct" ) ) + if ( sAttrName.Begins("@") && !bHaving && ( sAttrName=="@groupby" || sAttrName=="@count" || sAttrName.Begins("@distinct_") ) ) { sError.SetSprintf ( "unsupported filter column '%s'", sAttrName.cstr() ); return false; diff --git a/src/sphinxjsonquery.cpp b/src/sphinxjsonquery.cpp index 8f3cb678c4..bae79a75cb 100644 --- a/src/sphinxjsonquery.cpp +++ b/src/sphinxjsonquery.cpp @@ -2801,7 +2801,7 @@ CSphString sphEncodeResultJson ( const VecTraits_T& dRes, const Js } CSphString sDistinctName; tQuery.m_dItems.any_of ( [&]( const CSphQueryItem & tItem ) { - if ( tItem.m_sExpr=="@distinct" ) + if ( tItem.m_sExpr.Begins("@distinct_") ) { sDistinctName = tItem.m_sAlias; return true; diff --git a/src/sphinxselect.y b/src/sphinxselect.y index efc83bf139..2bd11feca4 100644 --- a/src/sphinxselect.y +++ b/src/sphinxselect.y @@ -91,8 +91,7 @@ select_expr: | SEL_GROUPBY '(' ')' { pParser->AddItem ( "groupby()", &$1, &$3 ); } | SEL_COUNT '(' '*' ')' { pParser->AddItem ( "count(*)", &$1, &$4 ); } | SEL_COUNT '(' SEL_DISTINCT SEL_TOKEN ')' - // FIXME: may be check if $4 == this->m_sGroupDistinct and warn/error, if not? - { pParser->AddItem ( "@distinct", &$1, &$5 ); } + { if ( !pParser->AddDistinct ( &$4, &$1, &$5 ) ) YYERROR; } ; expr: