Skip to content

Commit 928c09a

Browse files
authored
Allow doc-values only search on date types (#82602)
Similar to #82409, but for date fields. Allows searching on date field types (date, date_nanos) when those fields are not indexed (index: false) but just doc values are enabled. This enables searches on archive data, which has access to doc values but not index structures. When combined with searchable snapshots, it allows downloading only data for a given (doc value) field to quickly filter down to a select set of documents. Relates #81210 and #52728
1 parent de9e347 commit 928c09a

File tree

10 files changed

+185
-27
lines changed

10 files changed

+185
-27
lines changed

docs/reference/mapping/params/doc-values.asciidoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ makes this data access pattern possible. They store the same values as the
1717
sorting and aggregations. Doc values are supported on almost all field types,
1818
with the __notable exception of `text` and `annotated_text` fields__.
1919

20-
<<number,Numeric types>>, such as `long` and `double`, can also be queried
20+
<<number,Numeric types>>, such as `long` and `double`, and <<date,Date types>>
21+
can also be queried
2122
when they are not <<mapping-index,indexed>> but only have doc values enabled.
2223
Query performance on doc values is much slower than on index structures, but
2324
offers an interesting tradeoff between disk usage and query performance for

docs/reference/mapping/types/date.asciidoc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,9 @@ The following parameters are accepted by `date` fields:
137137

138138
<<mapping-index,`index`>>::
139139

140-
Should the field be searchable? Accepts `true` (default) and `false`.
140+
Should the field be quickly searchable? Accepts `true` (default) and
141+
`false`. Date fields that only have <<doc-values,`doc_values`>>
142+
enabled can also be queried, albeit slower.
141143

142144
<<null-value,`null_value`>>::
143145

docs/reference/query-dsl.asciidoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ the stability of the cluster. Those queries can be categorised as follows:
3333

3434
* Queries that need to do linear scans to identify matches:
3535
** <<query-dsl-script-query,`script` queries>>
36-
** queries on <<number,numeric fields>> that are not indexed but have <<doc-values,doc values>> enabled
36+
** queries on <<number,numeric>> and <<date,date>> fields that are not indexed
37+
but have <<doc-values,doc values>> enabled
3738

3839
* Queries that have a high up-front cost:
3940
** <<query-dsl-fuzzy-query,`fuzzy` queries>> (except on

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/field_caps/10_basic.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ setup:
8383
type: long
8484
date:
8585
type: date
86+
non_indexed_date:
87+
type: date
88+
index: false
8689
geo:
8790
type: keyword
8891
object:
@@ -210,6 +213,18 @@ setup:
210213

211214
- match: {fields.object\.nested1.long.searchable: true}
212215

216+
---
217+
"Field caps for date field with only doc values":
218+
- skip:
219+
version: " - 8.0.99"
220+
reason: "doc values search was added in 8.1.0"
221+
- do:
222+
field_caps:
223+
index: 'test1,test2,test3'
224+
fields: non_indexed_date
225+
226+
- match: {fields.non_indexed_date.date.searchable: true}
227+
213228
---
214229
"Get object and nested field caps":
215230

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/140_pre_filter_search_shards.yml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ setup:
1010
created_at:
1111
type: date
1212
format: "yyyy-MM-dd"
13+
created_at_not_indexed:
14+
type: date
15+
index: false
16+
format: "yyyy-MM-dd"
1317
- do:
1418
indices.create:
1519
index: index_2
@@ -21,6 +25,10 @@ setup:
2125
created_at:
2226
type: date_nanos
2327
format: "yyyy-MM-dd"
28+
created_at_not_indexed:
29+
type: date
30+
index: false
31+
format: "yyyy-MM-dd"
2432
- do:
2533
indices.create:
2634
index: index_3
@@ -32,6 +40,10 @@ setup:
3240
created_at:
3341
type: date
3442
format: "yyyy-MM-dd"
43+
created_at_not_indexed:
44+
type: date
45+
index: false
46+
format: "yyyy-MM-dd"
3547

3648

3749
---
@@ -222,3 +234,53 @@ setup:
222234
- length: { hits.hits: 1 }
223235
- match: {hits.hits.0._id: "3" }
224236
- length: { aggregations.idx_terms.buckets: 3 }
237+
238+
---
239+
"prefilter on non-indexed date fields":
240+
- skip:
241+
version: "- 8.0.99"
242+
reason: "doc values search was added in 8.1.0"
243+
244+
- do:
245+
index:
246+
index: index_1
247+
id: 1
248+
body: { "created_at_not_indexed": "2016-01-01"}
249+
- do:
250+
index:
251+
index: index_2
252+
id: 2
253+
body: { "created_at_not_indexed": "2017-01-01" }
254+
255+
- do:
256+
index:
257+
index: index_3
258+
id: 3
259+
body: { "created_at_not_indexed": "2018-01-01" }
260+
- do:
261+
indices.refresh: {}
262+
263+
264+
- do:
265+
search:
266+
rest_total_hits_as_int: true
267+
body: { "size" : 0, "query" : { "range" : { "created_at_not_indexed" : { "gte" : "2016-02-01", "lt": "2018-02-01"} } } }
268+
269+
- match: { _shards.total: 3 }
270+
- match: { _shards.successful: 3 }
271+
- match: { _shards.skipped: 0 }
272+
- match: { _shards.failed: 0 }
273+
- match: { hits.total: 2 }
274+
275+
# this is a case where we would normally skip due to rewrite but we can't because we only have doc values
276+
- do:
277+
search:
278+
rest_total_hits_as_int: true
279+
pre_filter_shard_size: 1
280+
body: { "size" : 0, "query" : { "range" : { "created_at_not_indexed" : { "gte" : "2016-02-01", "lt": "2018-02-01"} } } }
281+
282+
- match: { _shards.total: 3 }
283+
- match: { _shards.successful: 3 }
284+
- match: { _shards.skipped : 0 }
285+
- match: { _shards.failed: 0 }
286+
- match: { hits.total: 2 }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/390_doc_values_search.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ setup:
3232
short:
3333
type: short
3434
index: false
35+
date:
36+
type: date
37+
format: yyyy/MM/dd
38+
index: false
3539

3640
- do:
3741
index:
@@ -45,6 +49,7 @@ setup:
4549
integer: 1
4650
long: 1
4751
short: 1
52+
date: "2017/01/01"
4853

4954
- do:
5055
index:
@@ -58,6 +63,7 @@ setup:
5863
integer: 2
5964
long: 2
6065
short: 2
66+
date: "2017/01/02"
6167

6268
- do:
6369
indices.refresh: {}
@@ -196,3 +202,21 @@ setup:
196202
index: test
197203
body: { query: { range: { short: { gte: 0 } } } }
198204
- length: { hits.hits: 2 }
205+
206+
---
207+
"Test match query on date field where only doc values are enabled":
208+
209+
- do:
210+
search:
211+
index: test
212+
body: { query: { match: { date: { query: "2017/01/01" } } } }
213+
- length: { hits.hits: 1 }
214+
215+
---
216+
"Test range query on date field where only doc values are enabled":
217+
218+
- do:
219+
search:
220+
index: test
221+
body: { query: { range: { date: { gte: "2017/01/01" } } } }
222+
- length: { hits.hits: 2 }

server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ public static final class DateFieldType extends MappedFieldType {
367367

368368
public DateFieldType(
369369
String name,
370-
boolean isSearchable,
370+
boolean isIndexed,
371371
boolean isStored,
372372
boolean hasDocValues,
373373
DateFormatter dateTimeFormatter,
@@ -376,7 +376,7 @@ public DateFieldType(
376376
FieldValues<Long> scriptValues,
377377
Map<String, String> meta
378378
) {
379-
super(name, isSearchable, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
379+
super(name, isIndexed, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
380380
this.dateTimeFormatter = dateTimeFormatter;
381381
this.dateMathParser = dateTimeFormatter.toDateMathParser();
382382
this.resolution = resolution;
@@ -388,6 +388,10 @@ public DateFieldType(String name) {
388388
this(name, true, false, true, DEFAULT_DATE_TIME_FORMATTER, Resolution.MILLISECONDS, null, null, Collections.emptyMap());
389389
}
390390

391+
public DateFieldType(String name, boolean isIndexed) {
392+
this(name, isIndexed, false, true, DEFAULT_DATE_TIME_FORMATTER, Resolution.MILLISECONDS, null, null, Collections.emptyMap());
393+
}
394+
391395
public DateFieldType(String name, DateFormatter dateFormatter) {
392396
this(name, true, false, true, dateFormatter, Resolution.MILLISECONDS, null, null, Collections.emptyMap());
393397
}
@@ -464,6 +468,11 @@ private String format(long timestamp, DateFormatter formatter) {
464468
return formatter.format(dateTime);
465469
}
466470

471+
@Override
472+
public boolean isSearchable() {
473+
return isIndexed() || hasDocValues();
474+
}
475+
467476
@Override
468477
public Query termQuery(Object value, @Nullable SearchExecutionContext context) {
469478
return rangeQuery(value, value, true, true, ShapeRelation.INTERSECTS, null, null, context);
@@ -480,7 +489,7 @@ public Query rangeQuery(
480489
@Nullable DateMathParser forcedDateParser,
481490
SearchExecutionContext context
482491
) {
483-
failIfNotIndexed();
492+
failIfNotIndexedNorDocValuesFallback(context);
484493
if (relation == ShapeRelation.DISJOINT) {
485494
throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support DISJOINT ranges");
486495
}
@@ -496,14 +505,18 @@ public Query rangeQuery(
496505
parser = forcedDateParser;
497506
}
498507
return dateRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, timeZone, parser, context, resolution, (l, u) -> {
499-
Query query = LongPoint.newRangeQuery(name(), l, u);
500-
if (hasDocValues()) {
501-
Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
502-
query = new IndexOrDocValuesQuery(query, dvQuery);
503-
504-
if (context.indexSortedOnField(name())) {
505-
query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query);
508+
Query query;
509+
if (isIndexed()) {
510+
query = LongPoint.newRangeQuery(name(), l, u);
511+
if (hasDocValues()) {
512+
Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
513+
query = new IndexOrDocValuesQuery(query, dvQuery);
506514
}
515+
} else {
516+
query = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u);
517+
}
518+
if (hasDocValues() && context.indexSortedOnField(name())) {
519+
query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query);
507520
}
508521
return query;
509522
});
@@ -593,6 +606,10 @@ public Relation isFieldWithinQuery(
593606
DateMathParser dateParser,
594607
QueryRewriteContext context
595608
) throws IOException {
609+
if (isIndexed() == false && hasDocValues()) {
610+
// we don't have a quick way to run this check on doc values, so fall back to default assuming we are within bounds
611+
return Relation.INTERSECTS;
612+
}
596613
byte[] minPackedValue = PointValues.getMinPackedValue(reader, name());
597614
if (minPackedValue == null) {
598615
// no points, so nothing matches

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2025,6 +2025,9 @@ public ShardLongFieldRange getTimestampRange() {
20252025
if (mappedFieldType instanceof DateFieldMapper.DateFieldType == false) {
20262026
return ShardLongFieldRange.UNKNOWN; // field missing or not a date
20272027
}
2028+
if (mappedFieldType.isIndexed() == false) {
2029+
return ShardLongFieldRange.UNKNOWN; // range information missing
2030+
}
20282031

20292032
final ShardLongFieldRange rawTimestampFieldRange;
20302033
try {

server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,19 @@ public void testIsFieldWithinRangeEmptyReader() throws IOException {
6363
);
6464
}
6565

66+
public void testIsFieldWithinRangeOnlyDocValues() throws IOException {
67+
QueryRewriteContext context = new QueryRewriteContext(parserConfig(), writableRegistry(), null, () -> nowInMillis);
68+
IndexReader reader = new MultiReader();
69+
DateFieldType ft = new DateFieldType("my_date", false);
70+
// in case of only doc-values, we can't establish disjointness
71+
assertEquals(
72+
Relation.INTERSECTS,
73+
ft.isFieldWithinQuery(reader, "2015-10-12", "2016-04-03", randomBoolean(), randomBoolean(), null, null, context)
74+
);
75+
}
76+
6677
public void testIsFieldWithinQueryDateMillis() throws IOException {
67-
DateFieldType ft = new DateFieldType("my_date", Resolution.MILLISECONDS);
78+
DateFieldType ft = new DateFieldType("my_date");
6879
isFieldWithinRangeTestCase(ft);
6980
}
7081

@@ -192,19 +203,23 @@ public void testTermQuery() {
192203
);
193204
assertEquals(expected, ft.termQuery(date, context));
194205

206+
ft = new DateFieldType("field", false);
207+
expected = SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant + 999);
208+
assertEquals(expected, ft.termQuery(date, context));
209+
195210
MappedFieldType unsearchable = new DateFieldType(
196211
"field",
197212
false,
198213
false,
199-
true,
214+
false,
200215
DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER,
201216
Resolution.MILLISECONDS,
202217
null,
203218
null,
204219
Collections.emptyMap()
205220
);
206221
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery(date, context));
207-
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
222+
assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage());
208223
}
209224

210225
public void testRangeQuery() throws IOException {
@@ -245,6 +260,10 @@ public void testRangeQuery() throws IOException {
245260
);
246261
assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(new MultiReader()));
247262

263+
MappedFieldType ft2 = new DateFieldType("field", false);
264+
Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2);
265+
assertEquals(expected2, ft2.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(new MultiReader()));
266+
248267
instant1 = nowInMillis;
249268
instant2 = instant1 + 100;
250269
expected = new DateRangeIncludingNowQuery(
@@ -255,11 +274,14 @@ public void testRangeQuery() throws IOException {
255274
);
256275
assertEquals(expected, ft.rangeQuery("now", instant2, true, true, null, null, null, context));
257276

277+
expected2 = new DateRangeIncludingNowQuery(SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2));
278+
assertEquals(expected2, ft2.rangeQuery("now", instant2, true, true, null, null, null, context));
279+
258280
MappedFieldType unsearchable = new DateFieldType(
259281
"field",
260282
false,
261283
false,
262-
true,
284+
false,
263285
DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER,
264286
Resolution.MILLISECONDS,
265287
null,
@@ -270,7 +292,7 @@ public void testRangeQuery() throws IOException {
270292
IllegalArgumentException.class,
271293
() -> unsearchable.rangeQuery(date1, date2, true, true, null, null, null, context)
272294
);
273-
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
295+
assertEquals("Cannot search on field [field] since it is not indexed nor has doc values.", e.getMessage());
274296
}
275297

276298
public void testRangeQueryWithIndexSort() {
@@ -321,6 +343,10 @@ public void testRangeQueryWithIndexSort() {
321343
new IndexOrDocValuesQuery(pointQuery, dvQuery)
322344
);
323345
assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context));
346+
347+
ft = new DateFieldType("field", false);
348+
expected = new IndexSortSortedNumericDocValuesRangeQuery("field", instant1, instant2, dvQuery);
349+
assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context));
324350
}
325351

326352
public void testDateNanoDocValues() throws IOException {

0 commit comments

Comments
 (0)