Skip to content

Commit d9f77fa

Browse files
authored
Allow doc-values only search on ip fields (#82929)
Allows searching on ip fields when those fields are not indexed (index: false) but just doc values are enabled. This enables searches on archive data, which has access to doc values but not index structures. When combined with searchable snapshots, it allows downloading only data for a given (doc value) field to quickly filter down to a select set of documents. Relates #81210 and #52728
1 parent 3f723a4 commit d9f77fa

File tree

7 files changed

+263
-46
lines changed

7 files changed

+263
-46
lines changed

docs/reference/mapping/params/doc-values.asciidoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ makes this data access pattern possible. They store the same values as the
1717
sorting and aggregations. Doc values are supported on almost all field types,
1818
with the __notable exception of `text` and `annotated_text` fields__.
1919

20-
<<number,Numeric types>>, <<date,date types>>, the <<boolean,boolean type>>
21-
and the <<keyword,keyword type>>
20+
<<number,Numeric types>>, <<date,date types>>, the <<boolean,boolean type>>,
21+
the <<ip,ip type>> and the <<keyword,keyword type>>
2222
can also be queried using term or range-based queries
2323
when they are not <<mapping-index,indexed>> but only have doc values enabled.
2424
Query performance on doc values is much slower than on index structures, but

docs/reference/mapping/types/ip.asciidoc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ The following parameters are accepted by `ip` fields:
5757

5858
<<mapping-index,`index`>>::
5959

60-
Should the field be searchable? Accepts `true` (default) and `false`.
60+
Should the field be quickly searchable? Accepts `true` (default) and
61+
`false`. Fields that only have <<doc-values,`doc_values`>>
62+
enabled can still be queried using term or range-based queries,
63+
albeit slower.
6164

6265
<<null-value,`null_value`>>::
6366

docs/reference/query-dsl.asciidoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ the stability of the cluster. Those queries can be categorised as follows:
3333

3434
* Queries that need to do linear scans to identify matches:
3535
** <<query-dsl-script-query,`script` queries>>
36-
** queries on <<number,numeric>>, <<date,date>>, <<boolean,boolean>>, or <<keyword,keyword>> fields that are not indexed
37-
but have <<doc-values,doc values>> enabled
36+
** queries on <<number,numeric>>, <<date,date>>, <<boolean,boolean>>, <<ip,ip>> or <<keyword,keyword>> fields
37+
that are not indexed but have <<doc-values,doc values>> enabled
3838

3939
* Queries that have a high up-front cost:
4040
** <<query-dsl-fuzzy-query,`fuzzy` queries>> (except on

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/field_caps/10_basic.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ setup:
9292
non_indexed_boolean:
9393
type: boolean
9494
index: false
95+
non_indexed_ip:
96+
type: ip
97+
index: false
9598
geo:
9699
type: keyword
97100
object:
@@ -255,6 +258,18 @@ setup:
255258

256259
- match: {fields.non_indexed_boolean.boolean.searchable: true}
257260

261+
---
262+
"Field caps for ip field with only doc values":
263+
- skip:
264+
version: " - 8.0.99"
265+
reason: "doc values search was added in 8.1.0"
266+
- do:
267+
field_caps:
268+
index: 'test1,test2,test3'
269+
fields: non_indexed_ip
270+
271+
- match: {fields.non_indexed_ip.ip.searchable: true}
272+
258273
---
259274
"Get object and nested field caps":
260275

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ setup:
4242
boolean:
4343
type: boolean
4444
index: false
45+
ip:
46+
type: ip
47+
index: false
4548

4649
- do:
4750
index:
@@ -58,6 +61,7 @@ setup:
5861
date: "2017/01/01"
5962
keyword: "key1"
6063
boolean: "false"
64+
ip: "192.168.0.1"
6165

6266
- do:
6367
index:
@@ -74,6 +78,7 @@ setup:
7478
date: "2017/01/02"
7579
keyword: "key2"
7680
boolean: "true"
81+
ip: "192.168.0.2"
7782

7883
- do:
7984
indices.refresh: {}
@@ -284,3 +289,30 @@ setup:
284289
index: test
285290
body: { query: { range: { boolean: { gte: "false" } } } }
286291
- length: { hits.hits: 2 }
292+
293+
---
294+
"Test match query on ip field where only doc values are enabled":
295+
296+
- do:
297+
search:
298+
index: test
299+
body: { query: { match: { ip: { query: "192.168.0.1" } } } }
300+
- length: { hits.hits: 1 }
301+
302+
---
303+
"Test terms query on ip field where only doc values are enabled":
304+
305+
- do:
306+
search:
307+
index: test
308+
body: { query: { terms: { ip: [ "192.168.0.1", "192.168.0.2" ] } } }
309+
- length: { hits.hits: 2 }
310+
311+
---
312+
"Test range query on ip field where only doc values are enabled":
313+
314+
- do:
315+
search:
316+
index: test
317+
body: { query: { range: { ip: { gte: "192.168.0.1" } } } }
318+
- length: { hits.hits: 2 }

server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.apache.lucene.document.StoredField;
1515
import org.apache.lucene.index.LeafReaderContext;
1616
import org.apache.lucene.search.MatchNoDocsQuery;
17+
import org.apache.lucene.search.PointRangeQuery;
1718
import org.apache.lucene.search.Query;
1819
import org.apache.lucene.util.BytesRef;
1920
import org.elasticsearch.Version;
@@ -204,14 +205,27 @@ public IpFieldType(
204205
}
205206

206207
public IpFieldType(String name) {
207-
this(name, true, false, true, null, null, Collections.emptyMap(), false);
208+
this(name, true, true);
209+
}
210+
211+
public IpFieldType(String name, boolean isIndexed) {
212+
this(name, isIndexed, true);
213+
}
214+
215+
public IpFieldType(String name, boolean isIndexed, boolean hasDocValues) {
216+
this(name, isIndexed, false, hasDocValues, null, null, Collections.emptyMap(), false);
208217
}
209218

210219
@Override
211220
public String typeName() {
212221
return CONTENT_TYPE;
213222
}
214223

224+
@Override
225+
public boolean isSearchable() {
226+
return isIndexed() || hasDocValues();
227+
}
228+
215229
@Override
216230
public boolean mayExistInIndex(SearchExecutionContext context) {
217231
return context.fieldExistsInIndex(name());
@@ -252,25 +266,48 @@ protected Object parseSourceValue(Object value) {
252266

253267
@Override
254268
public Query termQuery(Object value, @Nullable SearchExecutionContext context) {
255-
failIfNotIndexed();
269+
failIfNotIndexedNorDocValuesFallback(context);
270+
Query query;
256271
if (value instanceof InetAddress) {
257-
return InetAddressPoint.newExactQuery(name(), (InetAddress) value);
272+
query = InetAddressPoint.newExactQuery(name(), (InetAddress) value);
258273
} else {
259274
if (value instanceof BytesRef) {
260275
value = ((BytesRef) value).utf8ToString();
261276
}
262277
String term = value.toString();
263278
if (term.contains("/")) {
264279
final Tuple<InetAddress, Integer> cidr = InetAddresses.parseCidr(term);
265-
return InetAddressPoint.newPrefixQuery(name(), cidr.v1(), cidr.v2());
280+
query = InetAddressPoint.newPrefixQuery(name(), cidr.v1(), cidr.v2());
281+
} else {
282+
InetAddress address = InetAddresses.forString(term);
283+
query = InetAddressPoint.newExactQuery(name(), address);
266284
}
267-
InetAddress address = InetAddresses.forString(term);
268-
return InetAddressPoint.newExactQuery(name(), address);
285+
}
286+
if (isIndexed()) {
287+
return query;
288+
} else {
289+
return convertToDocValuesQuery(query);
269290
}
270291
}
271292

293+
static Query convertToDocValuesQuery(Query query) {
294+
assert query instanceof PointRangeQuery;
295+
PointRangeQuery pointRangeQuery = (PointRangeQuery) query;
296+
return SortedSetDocValuesField.newSlowRangeQuery(
297+
pointRangeQuery.getField(),
298+
new BytesRef(pointRangeQuery.getLowerPoint()),
299+
new BytesRef(pointRangeQuery.getUpperPoint()),
300+
true,
301+
true
302+
);
303+
}
304+
272305
@Override
273306
public Query termsQuery(Collection<?> values, SearchExecutionContext context) {
307+
failIfNotIndexedNorDocValuesFallback(context);
308+
if (isIndexed() == false) {
309+
return super.termsQuery(values, context);
310+
}
274311
InetAddress[] addresses = new InetAddress[values.size()];
275312
int i = 0;
276313
for (Object value : values) {
@@ -301,14 +338,15 @@ public Query rangeQuery(
301338
boolean includeUpper,
302339
SearchExecutionContext context
303340
) {
304-
failIfNotIndexed();
305-
return rangeQuery(
306-
lowerTerm,
307-
upperTerm,
308-
includeLower,
309-
includeUpper,
310-
(lower, upper) -> InetAddressPoint.newRangeQuery(name(), lower, upper)
311-
);
341+
failIfNotIndexedNorDocValuesFallback(context);
342+
return rangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (lower, upper) -> {
343+
Query query = InetAddressPoint.newRangeQuery(name(), lower, upper);
344+
if (isIndexed()) {
345+
return query;
346+
} else {
347+
return convertToDocValuesQuery(query);
348+
}
349+
});
312350
}
313351

314352
/**

0 commit comments

Comments
 (0)