Skip to content

Commit 12cc4cb

Browse files
craig[bot]sumeerbholaRaduBerinde
committed
51241: geoindex,sql: use SRID bounds for a geometry inverted index r=sumeerbhola a=sumeerbhola Release note: None 51306: opt: move stub exec factory r=RaduBerinde a=RaduBerinde Move the stub factory to the exec package, so it can be used in more places. Release note: None Co-authored-by: sumeerbhola <[email protected]> Co-authored-by: Radu Berinde <[email protected]>
3 parents a215af7 + 83e3305 + 33fa36d commit 12cc4cb

File tree

10 files changed

+616
-427
lines changed

10 files changed

+616
-427
lines changed

pkg/geo/geoindex/s2_geometry_index.go

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ import (
1515

1616
"github.com/cockroachdb/cockroach/pkg/geo"
1717
"github.com/cockroachdb/cockroach/pkg/geo/geomfn"
18+
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
19+
"github.com/cockroachdb/cockroach/pkg/geo/geoprojbase"
1820
"github.com/cockroachdb/cockroach/pkg/geo/geos"
1921
"github.com/cockroachdb/errors"
2022
"github.com/golang/geo/r3"
@@ -32,15 +34,15 @@ type s2GeometryIndex struct {
3234

3335
var _ GeometryIndex = (*s2GeometryIndex)(nil)
3436

37+
// We adjust the clipping bounds to be smaller by this fraction, since using
38+
// the endpoints of face 0 in S2 causes coverings to spill out of that face.
39+
const clippingBoundsDelta = 0.01
40+
3541
// NewS2GeometryIndex returns an index with the given configuration. All reads and
3642
// writes on this index must use the same config. Writes must use the same
3743
// config to correctly process deletions. Reads must use the same config since
3844
// the bounds affect when a read needs to look at the exceedsBoundsCellID.
3945
func NewS2GeometryIndex(cfg S2GeometryConfig) GeometryIndex {
40-
// We adjust the clipping bounds to be smaller by this fraction, since using
41-
// the endpoints of face 0 in S2 causes coverings to spill out of that face.
42-
const boundsDelta = 0.01
43-
4446
// TODO(sumeer): Sanity check cfg.
4547
return &s2GeometryIndex{
4648
rc: &s2.RegionCoverer{
@@ -53,17 +55,19 @@ func NewS2GeometryIndex(cfg S2GeometryConfig) GeometryIndex {
5355
maxX: cfg.MaxX,
5456
minY: cfg.MinY,
5557
maxY: cfg.MaxY,
56-
deltaX: boundsDelta * (cfg.MaxX - cfg.MinX),
57-
deltaY: boundsDelta * (cfg.MaxY - cfg.MinY),
58+
deltaX: clippingBoundsDelta * (cfg.MaxX - cfg.MinX),
59+
deltaY: clippingBoundsDelta * (cfg.MaxY - cfg.MinY),
5860
}
5961
}
6062

63+
// TODO(sumeer): also support index config with parameters specified by CREATE
64+
// INDEX.
65+
6166
// DefaultGeometryIndexConfig returns a default config for a geometry index.
6267
func DefaultGeometryIndexConfig() *Config {
6368
return &Config{
6469
S2Geometry: &S2GeometryConfig{
6570
// Arbitrary bounding box.
66-
// TODO(sumeer): replace with parameters specified by CREATE INDEX.
6771
MinX: -10000,
6872
MaxX: 10000,
6973
MinY: -10000,
@@ -72,6 +76,38 @@ func DefaultGeometryIndexConfig() *Config {
7276
}
7377
}
7478

79+
// GeometryIndexConfigForSRID returns a geometry index config for srid.
80+
func GeometryIndexConfigForSRID(srid geopb.SRID) *Config {
81+
p, exists := geoprojbase.Projection(srid)
82+
if !exists || p.Bounds == nil {
83+
return DefaultGeometryIndexConfig()
84+
}
85+
b := p.Bounds
86+
minX, maxX, minY, maxY := b.MinX, b.MaxX, b.MinY, b.MaxY
87+
// There are projections where the min and max are equal e.g. 3571.
88+
// We need to have a valid rectangle as the geometry index bounds.
89+
if maxX-minX < 1 {
90+
maxX++
91+
}
92+
if maxY-minY < 1 {
93+
maxY++
94+
}
95+
// Expand the bounds by 2x the clippingBoundsDelta, to
96+
// ensure that shapes touching the bounds don't get
97+
// clipped.
98+
boundsExpansion := 2 * clippingBoundsDelta
99+
deltaX := (maxX - minX) * boundsExpansion
100+
deltaY := (maxY - minY) * boundsExpansion
101+
return &Config{
102+
S2Geometry: &S2GeometryConfig{
103+
MinX: minX - deltaX,
104+
MaxX: maxX + deltaX,
105+
MinY: minY - deltaY,
106+
MaxY: maxY + deltaY,
107+
S2Config: defaultS2Config()},
108+
}
109+
}
110+
75111
// A cell id unused by S2. We use it to index geometries that exceed the
76112
// configured bounds.
77113
const exceedsBoundsCellID = s2.CellID(^uint64(0))

pkg/geo/geoindex/s2_geometry_index_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@ import (
1616
"testing"
1717

1818
"github.com/cockroachdb/cockroach/pkg/geo"
19+
"github.com/cockroachdb/cockroach/pkg/geo/geoprojbase"
1920
"github.com/cockroachdb/cockroach/pkg/geo/geos"
2021
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
2122
"github.com/cockroachdb/datadriven"
23+
"github.com/stretchr/testify/require"
2224
)
2325

2426
func TestS2GeometryIndexBasic(t *testing.T) {
@@ -113,3 +115,31 @@ func TestClipEWKBByRect(t *testing.T) {
113115
}
114116
})
115117
}
118+
119+
func TestNoClippingAtSRIDBounds(t *testing.T) {
120+
defer leaktest.AfterTest(t)()
121+
122+
// Test that indexes that use the SRID bounds don't clip shapes that touch
123+
// those bounds. This test uses point shapes representing the four corners
124+
// of the bounds.
125+
for srid, projInfo := range geoprojbase.Projections {
126+
if projInfo.Bounds == nil {
127+
continue
128+
}
129+
b := projInfo.Bounds
130+
index := NewS2GeometryIndex(*GeometryIndexConfigForSRID(srid).S2Geometry)
131+
// Four corners of the bounds, proceeding clockwise from the lower-left.
132+
xCorners := []float64{b.MinX, b.MinX, b.MaxX, b.MaxX}
133+
yCorners := []float64{b.MinY, b.MaxY, b.MaxY, b.MinY}
134+
for i := range xCorners {
135+
g, err := geo.NewGeometryFromPointCoords(xCorners[i], yCorners[i])
136+
require.NoError(t, err)
137+
keys, err := index.InvertedIndexKeys(context.Background(), g)
138+
require.NoError(t, err)
139+
require.Equal(t, 1, len(keys))
140+
require.NotEqual(t, Key(exceedsBoundsCellID), keys[0],
141+
"SRID: %d, Point: %f, %f", srid, xCorners[i], yCorners[i])
142+
}
143+
}
144+
145+
}

pkg/geo/geoprojbase/geoprojbase.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func (p *Proj4Text) Equal(o Proj4Text) bool {
4747
return bytes.Equal(p.cStr, o.cStr)
4848
}
4949

50-
// Bounds represents the lat/lng bounds.
50+
// Bounds represents the projected or lat/lng bounds.
5151
type Bounds struct {
5252
MinX float64
5353
MaxX float64
@@ -70,7 +70,7 @@ type ProjInfo struct {
7070

7171
// Denormalized fields.
7272

73-
// Bounds defines the bounds (in lat lng) of the given coordinate system.
73+
// Bounds defines the bounds (projected or lat/lng) of the given coordinate system.
7474
// If nil, no bounds were defined for the given projection.
7575
Bounds *Bounds
7676
// IsLatLng stores whether the projection is a LatLng based projection (denormalized from above)

pkg/sql/create_index.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ func MakeIndexDescriptor(
162162
return nil, err
163163
}
164164
if columnDesc.Type.InternalType.Family == types.GeometryFamily {
165-
indexDesc.GeoConfig = *geoindex.DefaultGeometryIndexConfig()
165+
indexDesc.GeoConfig = *geoindex.GeometryIndexConfigForSRID(columnDesc.Type.GeoSRIDOrZero())
166166
telemetry.Inc(sqltelemetry.GeometryInvertedIndexCounter)
167167
}
168168
if columnDesc.Type.InternalType.Family == types.GeographyFamily {

pkg/sql/create_table.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1409,7 +1409,7 @@ func MakeTableDesc(
14091409
return desc, err
14101410
}
14111411
if columnDesc.Type.InternalType.Family == types.GeometryFamily {
1412-
idx.GeoConfig = *geoindex.DefaultGeometryIndexConfig()
1412+
idx.GeoConfig = *geoindex.GeometryIndexConfigForSRID(columnDesc.Type.GeoSRIDOrZero())
14131413
}
14141414
if columnDesc.Type.InternalType.Family == types.GeographyFamily {
14151415
idx.GeoConfig = *geoindex.DefaultGeographyIndexConfig()
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# LogicTest: local
2+
3+
# SRID of the geometry column is unspecified, so default index bounds are used.
4+
statement ok
5+
CREATE TABLE geo_table(
6+
k int primary key,
7+
geom geometry,
8+
INVERTED INDEX geom_index(geom)
9+
)
10+
11+
# Shapes with SRID 26918. We've taken small X, Y values and added 400,000 to the X coordinate
12+
# and 4,000,000 to the Y coordinate to place them inside the bounds of SRID 26918.
13+
statement ok
14+
INSERT INTO geo_table VALUES
15+
(1, 'SRID=26918;POINT(400001 4000001)'),
16+
(2, 'SRID=26918;LINESTRING(400001 4000001, 400002 4000002)'),
17+
(3, 'SRID=26918;POINT(400003 4000003)'),
18+
(4, 'SRID=26918;LINESTRING(400004 4000004, 400005 4000005)'),
19+
(5, 'SRID=26918;LINESTRING(400040 4000040, 400041 4000041)'),
20+
(6, 'SRID=26918;POLYGON((400001 4000001, 400005 4000001, 400005 4000005, 400001 4000005, 400001 4000001))')
21+
22+
query I
23+
SELECT k FROM geo_table WHERE ST_Intersects('SRID=26918;POINT(400003 4000003)'::geometry, geom) ORDER BY k
24+
----
25+
3
26+
6
27+
28+
# The InvertedFilterer stats show "rows read: 6" since all the above shapes overflow
29+
# the default index bounds.
30+
query T
31+
SELECT url FROM [EXPLAIN ANALYZE SELECT k FROM geo_table WHERE ST_Intersects('SRID=26918;POINT(400003 4000003)'::geometry, geom) ORDER BY k]
32+
----
33+
https://cockroachdb.github.io/distsqlplan/decode.html#eJykU1Fr20wQfP9-xbIvifku-E4StnOlYCdRWrWOncqGNo1MUKwliEg69-7UOgT_9yIpaWNDREz1InZ3Zm-GYR_R_MhQ4swf-6dzKHUG5-H0Aq79b5fjUTCB0WQ0vvruw-FZMJvPvow78AS9b4B3pG5sfJsRfP3ohz4Ye5MWlrShpTWHB7MwOHvv9I7F4N3lNJjMDz3OOXeh_nG3cyDlB3964c_DK1btyjswDc_8EE6u4H6BDAuV0CTOyaC8RoELhiutlmSM0lXrsQYEyRolZ5gWq9JW7QXDpdKE8hFtajNCifNKZEhxQrrLkWFCNk6zem317k1aJLQe_rGDDGeruDASukfiqGZo9cuApjiR0EOGxsZZBjbNSQI3yPD2wdIzQPQ9OMHFhqEq7V9RxsZ3hFJs2NuFB8VP0paS8zSzpEl3xbb657m_XmlQBQyFBFNJB2NjbWWEUeTwfhS5_X6EQEXStASPEJDhtLQShuINBvN4DTnlSj9AaSiR4HD4nL5u09nH5ieVFk_xONsGVzrNY_2wlc2TaDZ03qC7jhZ2UE1zB_uaE3cfJzOlLemuu-1iKP5Hhk2IcvdMuOCiOgiHO73eMX_5nfZGYuCJphjwgRh4nu-JA_nycoZO59-SFO1Jevv4D8msVGFoy_9rm_lmwZCSO2qu2ahSL-lSq2X9TFNOa17dSMjYZiqaIiiaUSXwJVm0kp12stNKdtvJbivZ2yEvNv_9DgAA__80dbuh
34+
35+
statement ok
36+
DROP TABLE geo_table
37+
38+
# SRID of the geometry column is specified, so SRID specific bounds are used.
39+
statement ok
40+
CREATE TABLE geo_table(
41+
k int primary key,
42+
geom geometry(geometry, 26918),
43+
INVERTED INDEX geom_index(geom)
44+
)
45+
46+
# Same shapes.
47+
statement ok
48+
INSERT INTO geo_table VALUES
49+
(1, 'SRID=26918;POINT(400001 4000001)'),
50+
(2, 'SRID=26918;LINESTRING(400001 4000001, 400002 4000002)'),
51+
(3, 'SRID=26918;POINT(400003 4000003)'),
52+
(4, 'SRID=26918;LINESTRING(400004 4000004, 400005 4000005)'),
53+
(5, 'SRID=26918;LINESTRING(400040 4000040, 400041 4000041)'),
54+
(6, 'SRID=26918;POLYGON((400001 4000001, 400005 4000001, 400005 4000005, 400001 4000005, 400001 4000001))')
55+
56+
57+
# Same result.
58+
query I
59+
SELECT k FROM geo_table WHERE ST_Intersects('SRID=26918;POINT(400003 4000003)'::geometry, geom) ORDER BY k
60+
----
61+
3
62+
6
63+
64+
# The InvertedFilterer stats show "rows read: 2" since all the above shapes are within the index
65+
# bounds.
66+
query T
67+
SELECT url FROM [EXPLAIN ANALYZE SELECT k FROM geo_table WHERE ST_Intersects('SRID=26918;POINT(400003 4000003)'::geometry, geom) ORDER BY k]
68+
----
69+
https://cockroachdb.github.io/distsqlplan/decode.html#eJykVFFP2zAQft-vON0LVMtU2-na4GlSC4QtrLQsrbQxXKHQnFhEGne2uxWh_vcpCWy0iA60vER3933n-853vkX7I0eJo7AfHoxhYXI4iocncB5-Pe33ogH0Br3-2bcQdg-j0Xj0ud-AO-h1DbwifeGSy5zgy8cwDsG6i6xwZCxNnd3dGcXR4XvR3uPBu9NhNBjvthhjzIfqx_zGjpQfwuFJOI7PvDLXrAHD-DCMYf8MrifoYaFTGiQzsijPkePEw7nRU7JWm9J1WwGidImSeZgV84Ur3RMPp9oQylt0mcsJJY7LImNKUjJNhh6m5JIsr9KW515kRUrL7h856OFonhRWQlPwNu-ITiA49zuBH7DgzWPfHiRFCj4D7b6Tseih0b8sGEpSCQI9tC7Jc3DZjCSwMn554-ge0BGwj5OVh3rh_kqwLrkilHzlPV9mVPwk4yg9ynJHhkyTr2u9j4fLuQFdQJdLsKVQsC4xTipUyu-8VYoxXynRZlOljFKCM6UE6xSRQqAi_SfuWCE8aslw4SR0-TOaM0uWMKOZNjewsFSiGHzKnu6ReEmPjnVW3E2CWO_O3GSzxNysjcFd0V5XPKPuaopgA1U7N7BPKfFfomSkjSPT9NdVdPlr9LCeALm5kYwzXu6eYKLd3mMPv4N2jwctXhsBC3jQaoUtviMfLmlXNP7vJvn2m2y9RH9Mdq4LS2v6n8rMVhMPKb2i-uGwemGmdGr0tDqmNocVr3KkZF0d5bURFXWoLPAhmW8li-1ksZXsbyf7W8mtDfJk9ep3AAAA__9EAdjp
70+
71+
# Also works when creating an index.
72+
statement ok
73+
DROP INDEX geo_table@geom_index
74+
75+
statement ok
76+
CREATE INVERTED INDEX geom_index ON geo_table(geom)
77+
78+
query T
79+
SELECT url FROM [EXPLAIN ANALYZE SELECT k FROM geo_table WHERE ST_Intersects('SRID=26918;POINT(400003 4000003)'::geometry, geom) ORDER BY k]
80+
----
81+
https://cockroachdb.github.io/distsqlplan/decode.html#eJykVFFP2zAQft-vON0LVMtU2-na4GlSC4QtrLQsrbQxXKHQnFhEGne2uxWh_vcpCWy0iA60vER3933n-853vkX7I0eJo7AfHoxhYXI4iocncB5-Pe33ogH0Br3-2bcQdg-j0Xj0ud-AO-h1DbwifeGSy5zgy8cwDsG6i6xwZCxNnd3dGcXR4XvR3uPBu9NhNBjvthhjzIfqx_zGjpQfwuFJOI7PvDLXrAHD-DCMYf8MrifoYaFTGiQzsijPkePEw7nRU7JWm9J1WwGidImSeZgV84Ur3RMPp9oQylt0mcsJJY7LImNKUjJNhh6m5JIsr9KW515kRUrL7h856OFonhRWQlPwNu-ITiA49zuBH7DgzWPfHiRFCj4D7b6Tseih0b8sGEpSCQI9tC7Jc3DZjCSwMn554-ge0BGwj5OVh3rh_kqwLrkilHzlPV9mVPwk4yg9ynJHhkyTr2u9j4fLuQFdQJdLsKVQsC4xTipUyu-8VYoxXynRZlOljFKCM6UE6xSRQqAi_SfuWCE8aslw4SR0-TOaM0uWMKOZNjewsFSiGHzKnu6ReEmPjnVW3E2CWO_O3GSzxNysjcFd0V5XPKPuaopgA1U7N7BPKfFfomSkjSPT9NdVdPlr9LCeALm5kYwzXu6eYKLd3mMPv4N2jwctXhsBC3jQaoUtviMfLmlXNP7vJvn2m2y9RH9Mdq4LS2v6n8rMVhMPKb2i-uGwemGmdGr0tDqmNocVr3KkZF0d5bURFXWoLPAhmW8li-1ksZXsbyf7W8mtDfJk9ep3AAAA__9EAdjp

pkg/sql/opt/bench/bench_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/cockroachdb/cockroach/pkg/security/securitytest"
2727
"github.com/cockroachdb/cockroach/pkg/server"
2828
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
29+
"github.com/cockroachdb/cockroach/pkg/sql/opt/exec"
2930
"github.com/cockroachdb/cockroach/pkg/sql/opt/exec/execbuilder"
3031
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
3132
"github.com/cockroachdb/cockroach/pkg/sql/opt/optbuilder"
@@ -571,8 +572,7 @@ func (h *harness) runUsingAPI(tb testing.TB, bmType BenchmarkType, usePrepared b
571572
}
572573

573574
root := execMemo.RootExpr()
574-
execFactory := stubFactory{}
575-
eb := execbuilder.New(&execFactory, execMemo, nil /* catalog */, root, &h.evalCtx)
575+
eb := execbuilder.New(exec.StubFactory{}, execMemo, nil /* catalog */, root, &h.evalCtx)
576576
if _, err = eb.Build(); err != nil {
577577
tb.Fatalf("%v", err)
578578
}

0 commit comments

Comments
 (0)