Skip to content

Commit 0873893

Browse files
authored
New GeoHexGrid aggregation (#82924)
This commit introduces a new geogrid aggregation called GeoHexGridAggregation that is based in Uber h3 grid. It only supports geo_point fields.
1 parent 15de797 commit 0873893

File tree

22 files changed

+1308
-8
lines changed

22 files changed

+1308
-8
lines changed

docs/changelog/82924.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 82924
2+
summary: New `GeoHexGrid` aggregation
3+
area: Geo
4+
type: feature
5+
issues: []

docs/reference/aggregations/bucket.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ include::bucket/geodistance-aggregation.asciidoc[]
4040

4141
include::bucket/geohashgrid-aggregation.asciidoc[]
4242

43+
include::bucket/geohexgrid-aggregation.asciidoc[]
44+
4345
include::bucket/geotilegrid-aggregation.asciidoc[]
4446

4547
include::bucket/global-aggregation.asciidoc[]
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
[role="xpack"]
2+
[[search-aggregations-bucket-geohexgrid-aggregation]]
3+
=== Geohex grid aggregation
4+
++++
5+
<titleabbrev>Geohex grid</titleabbrev>
6+
++++
7+
8+
A multi-bucket aggregation that groups <<geo-point,`geo_point`>>
9+
values into buckets that represent a grid.
10+
The resulting grid can be sparse and only
11+
contains cells that have matching data. Each cell corresponds to a
12+
https://h3geo.org/docs/core-library/h3Indexing#h3-cell-indexp[H3 cell index] and is
13+
labeled using the https://h3geo.org/docs/core-library/h3Indexing#h3index-representation[H3Index representation].
14+
15+
See https://h3geo.org/docs/core-library/restable[the table of cell areas for H3
16+
resolutions] on how precision (zoom) correlates to size on the ground.
17+
Precision for this aggregation can be between 0 and 15, inclusive.
18+
19+
WARNING: High-precision requests can be very expensive in terms of RAM and
20+
result sizes. For example, the highest-precision geohex with a precision of 15
21+
produces cells that cover less than 10cm by 10cm. We recommend you use a
22+
filter to limit high-precision requests to a smaller geographic area. For an example,
23+
refer to <<geohexgrid-high-precision>>.
24+
25+
[[geohexgrid-low-precision]]
26+
==== Simple low-precision request
27+
28+
[source,console,id=geohexgrid-aggregation-example]
29+
--------------------------------------------------
30+
PUT /museums
31+
{
32+
"mappings": {
33+
"properties": {
34+
"location": {
35+
"type": "geo_point"
36+
}
37+
}
38+
}
39+
}
40+
41+
POST /museums/_bulk?refresh
42+
{"index":{"_id":1}}
43+
{"location": "52.374081,4.912350", "name": "NEMO Science Museum"}
44+
{"index":{"_id":2}}
45+
{"location": "52.369219,4.901618", "name": "Museum Het Rembrandthuis"}
46+
{"index":{"_id":3}}
47+
{"location": "52.371667,4.914722", "name": "Nederlands Scheepvaartmuseum"}
48+
{"index":{"_id":4}}
49+
{"location": "51.222900,4.405200", "name": "Letterenhuis"}
50+
{"index":{"_id":5}}
51+
{"location": "48.861111,2.336389", "name": "Musée du Louvre"}
52+
{"index":{"_id":6}}
53+
{"location": "48.860000,2.327000", "name": "Musée d'Orsay"}
54+
55+
POST /museums/_search?size=0
56+
{
57+
"aggregations": {
58+
"large-grid": {
59+
"geohex_grid": {
60+
"field": "location",
61+
"precision": 4
62+
}
63+
}
64+
}
65+
}
66+
--------------------------------------------------
67+
68+
Response:
69+
70+
[source,console-result]
71+
--------------------------------------------------
72+
{
73+
...
74+
"aggregations": {
75+
"large-grid": {
76+
"buckets": [
77+
{
78+
"key": "841969dffffffff",
79+
"doc_count": 3
80+
},
81+
{
82+
"key": "841fb47ffffffff",
83+
"doc_count": 2
84+
},
85+
{
86+
"key": "841fa4dffffffff",
87+
"doc_count": 1
88+
}
89+
]
90+
}
91+
}
92+
}
93+
--------------------------------------------------
94+
// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/]
95+
96+
[[geohexgrid-high-precision]]
97+
==== High-precision requests
98+
99+
When requesting detailed buckets (typically for displaying a "zoomed in" map),
100+
a filter like <<query-dsl-geo-bounding-box-query,geo_bounding_box>> should be
101+
applied to narrow the subject area. Otherwise, potentially millions of buckets
102+
will be created and returned.
103+
104+
[source,console,id=geohexgrid-high-precision-ex]
105+
--------------------------------------------------
106+
POST /museums/_search?size=0
107+
{
108+
"aggregations": {
109+
"zoomed-in": {
110+
"filter": {
111+
"geo_bounding_box": {
112+
"location": {
113+
"top_left": "52.4, 4.9",
114+
"bottom_right": "52.3, 5.0"
115+
}
116+
}
117+
},
118+
"aggregations": {
119+
"zoom1": {
120+
"geohex_grid": {
121+
"field": "location",
122+
"precision": 12
123+
}
124+
}
125+
}
126+
}
127+
}
128+
}
129+
--------------------------------------------------
130+
// TEST[continued]
131+
132+
Response:
133+
134+
[source,console-result]
135+
--------------------------------------------------
136+
{
137+
...
138+
"aggregations": {
139+
"zoomed-in": {
140+
"doc_count": 3,
141+
"zoom1": {
142+
"buckets": [
143+
{
144+
"key": "8c1969c9b2617ff",
145+
"doc_count": 1
146+
},
147+
{
148+
"key": "8c1969526d753ff",
149+
"doc_count": 1
150+
},
151+
{
152+
"key": "8c1969526d26dff",
153+
"doc_count": 1
154+
}
155+
]
156+
}
157+
}
158+
}
159+
}
160+
--------------------------------------------------
161+
// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/]
162+
163+
[[geohexgrid-addtl-bounding-box-filtering]]
164+
==== Requests with additional bounding box filtering
165+
166+
The `geohex_grid` aggregation supports an optional `bounds` parameter
167+
that restricts the cells considered to those that intersect the
168+
provided bounds. The `bounds` parameter accepts the same
169+
<<query-dsl-geo-bounding-box-query-accepted-formats,bounding box formats>>
170+
as the geo-bounding box query. This bounding box can be used with or
171+
without an additional `geo_bounding_box` query for filtering the points prior to aggregating.
172+
It is an independent bounding box that can intersect with, be equal to, or be disjoint
173+
to any additional `geo_bounding_box` queries defined in the context of the aggregation.
174+
175+
[source,console,id=geohexgrid-aggregation-with-bounds]
176+
--------------------------------------------------
177+
POST /museums/_search?size=0
178+
{
179+
"aggregations": {
180+
"tiles-in-bounds": {
181+
"geohex_grid": {
182+
"field": "location",
183+
"precision": 12,
184+
"bounds": {
185+
"top_left": "52.4, 4.9",
186+
"bottom_right": "52.3, 5.0"
187+
}
188+
}
189+
}
190+
}
191+
}
192+
--------------------------------------------------
193+
// TEST[continued]
194+
195+
Response:
196+
197+
[source,console-result]
198+
--------------------------------------------------
199+
{
200+
...
201+
"aggregations": {
202+
"tiles-in-bounds": {
203+
"buckets": [
204+
{
205+
"key": "8c1969c9b2617ff",
206+
"doc_count": 1
207+
},
208+
{
209+
"key": "8c1969526d753ff",
210+
"doc_count": 1
211+
},
212+
{
213+
"key": "8c1969526d26dff",
214+
"doc_count": 1
215+
}
216+
]
217+
}
218+
}
219+
}
220+
--------------------------------------------------
221+
// TESTRESPONSE[s/\.\.\./"took": $body.took,"_shards": $body._shards,"hits":$body.hits,"timed_out":false,/]
222+
223+
[[geohexgrid-options]]
224+
==== Options
225+
226+
[horizontal]
227+
field::
228+
(Required, string) Field containing indexed geo-point values. Must be explicitly
229+
mapped as a <<geo-point,`geo_point`>> field. If the field contains an array,
230+
`geohex_grid` aggregates all array values.
231+
232+
precision::
233+
(Optional, integer) Integer zoom of the key used to define cells/buckets in
234+
the results. Defaults to `6`. Values outside of [`0`,`15`] will be rejected.
235+
236+
bounds::
237+
(Optional, object) Bounding box used to filter the geo-points in each bucket.
238+
Accepts the same bounding box formats as the
239+
<<query-dsl-geo-bounding-box-query-accepted-formats,geo-bounding box query>>.
240+
241+
size::
242+
(Optional, integer) Maximum number of buckets to return. Defaults to 10,000.
243+
When results are trimmed, buckets are prioritized based on the volume of
244+
documents they contain.
245+
246+
shard_size::
247+
(Optional, integer) Number of buckets returned from each shard. Defaults to
248+
`max(10,(size x number-of-shards))` to allow for more a accurate count of the
249+
top cells in the final result.

server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGridBucket.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public void writeTo(StreamOutput out) throws IOException {
5151
aggregations.writeTo(out);
5252
}
5353

54-
protected long hashAsLong() {
54+
public long hashAsLong() {
5555
return hashAsLong;
5656
}
5757

server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/ParsedGeoGrid.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public static ObjectParser<ParsedGeoGrid, Void> createParser(
3434
return parser;
3535
}
3636

37-
protected void setName(String name) {
37+
public void setName(String name) {
3838
super.setName(name);
3939
}
4040
}

test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridTestCase.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,16 +55,22 @@ protected int maxNumberOfBuckets() {
5555
@Override
5656
protected T createTestInstance(String name, Map<String, Object> metadata, InternalAggregations aggregations) {
5757
final int precision = randomPrecision();
58-
int size = randomNumberOfBuckets();
59-
List<InternalGeoGridBucket> buckets = new ArrayList<>(size);
58+
final int size = randomNumberOfBuckets();
59+
final List<InternalGeoGridBucket> buckets = new ArrayList<>(size);
60+
final List<Long> seen = new ArrayList<>(size);
61+
int finalSize = 0;
6062
for (int i = 0; i < size; i++) {
6163
double latitude = randomDoubleBetween(-90.0, 90.0, false);
6264
double longitude = randomDoubleBetween(-180.0, 180.0, false);
6365

6466
long hashAsLong = longEncode(longitude, latitude, precision);
65-
buckets.add(createInternalGeoGridBucket(hashAsLong, randomInt(IndexWriter.MAX_DOCS), aggregations));
67+
if (seen.contains(hashAsLong) == false) { // make sure we don't add twice the same bucket
68+
buckets.add(createInternalGeoGridBucket(hashAsLong, randomInt(IndexWriter.MAX_DOCS), aggregations));
69+
seen.add(hashAsLong);
70+
finalSize++;
71+
}
6672
}
67-
return createInternalGeoGrid(name, size, buckets, metadata);
73+
return createInternalGeoGrid(name, finalSize, buckets, metadata);
6874
}
6975

7076
@Override

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/spatial/action/SpatialStatsAction.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ private SpatialStatsAction() {
3939
* Items to track. Serialized by ordinals. Append only, don't remove or change order of items in this list.
4040
*/
4141
public enum Item {
42-
GEOLINE
42+
GEOLINE,
43+
GEOHEX
4344
}
4445

4546
public static class Request extends BaseNodesRequest<Request> implements ToXContentObject {

x-pack/plugin/spatial/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ dependencies {
1414
compileOnly project(path: ':modules:legacy-geo')
1515
compileOnly project(':modules:lang-painless:spi')
1616
compileOnly project(path: xpackModule('core'))
17+
api project(":libs:elasticsearch-h3")
1718
testImplementation(testArtifact(project(xpackModule('core'))))
1819
testImplementation project(path: xpackModule('vector-tile'))
1920
}

0 commit comments

Comments
 (0)