From 009ce67e30d8adc6432f91426525b1d4936ad456 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Mon, 23 Jun 2025 19:49:55 -0700 Subject: [PATCH] pgwire: support decoding VECTOR and BOX2D from binary This commit fixes an oversight where we forgot to add decoding support for PGVector and Box2D from binary format (of PGWire extended protocol). The encoding was added in c78326660f287d09522b1b9b1aabb95585c3ea28 and 7fa9129c4616c08261bee9988ae612a369091639, respectively. Note that for Box2D postgres doesn't support the binary format (I get an error "no binary output function available for type box2d"), but since we already introduced encoding, it seems reasonable to add decoding too. Release note (bug fix): CockroachDB can now decode VECTOR and BOX2D types from Binary format of PGWire Extended Protocol. --- pkg/sql/pgwire/pgwirebase/BUILD.bazel | 1 + pkg/sql/pgwire/pgwirebase/encoding.go | 38 +++++++++++++++++++++++++ pkg/sql/pgwire/testdata/pgtest/box2d | 32 +++++++++++++++++++++ pkg/sql/pgwire/testdata/pgtest/pgvector | 16 +++++++++++ pkg/util/vector/vector.go | 4 ++- 5 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 pkg/sql/pgwire/testdata/pgtest/box2d diff --git a/pkg/sql/pgwire/pgwirebase/BUILD.bazel b/pkg/sql/pgwire/pgwirebase/BUILD.bazel index 28ae4dd29b17..1bd07d801134 100644 --- a/pkg/sql/pgwire/pgwirebase/BUILD.bazel +++ b/pkg/sql/pgwire/pgwirebase/BUILD.bazel @@ -20,6 +20,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/geo", + "//pkg/geo/geopb", "//pkg/settings", "//pkg/sql/catalog/colinfo", "//pkg/sql/lex", diff --git a/pkg/sql/pgwire/pgwirebase/encoding.go b/pkg/sql/pgwire/pgwirebase/encoding.go index b499503cd5a5..48be1c345bd1 100644 --- a/pkg/sql/pgwire/pgwirebase/encoding.go +++ b/pkg/sql/pgwire/pgwirebase/encoding.go @@ -18,6 +18,7 @@ import ( "unicode/utf8" "github.com/cockroachdb/cockroach/pkg/geo" + "github.com/cockroachdb/cockroach/pkg/geo/geopb" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/sql/lex" "github.com/cockroachdb/cockroach/pkg/sql/oidext" @@ -817,6 +818,43 @@ func DecodeDatum( return nil, err } return tree.NewDTSVector(ret), nil + case oidext.T_pgvector: + // PG binary format is + // 2 bytes for dimensions + // 2 bytes for unused, and + // 4 bytes for each float4. + if len(b) < 4 { + return nil, pgerror.Newf(pgcode.Syntax, "vector requires at least 4 bytes for binary format") + } + dim := int(binary.BigEndian.Uint16(b)) + b = b[4:] + if dim > vector.MaxDim { + return nil, vector.MaxDimExceededErr + } + if len(b) < 4*dim { + return nil, pgerror.Newf(pgcode.Syntax, "vector with %d dimensions requires %d bytes for binary format", dim, 4*dim) + } + v := make(vector.T, dim) + for i := 0; i < dim; i++ { + v[i] = math.Float32frombits(binary.BigEndian.Uint32(b)) + b = b[4:] + } + return tree.NewDPGVector(v), nil + case oidext.T_box2d: + // Expect 8 bytes for each of LoX, HiX, LoY, HiY. + if len(b) < 32 { + return nil, pgerror.Newf(pgcode.Syntax, "box2d requires at least 32 bytes for binary format") + } + loX := math.Float64frombits(binary.BigEndian.Uint64(b[0:8])) + hiX := math.Float64frombits(binary.BigEndian.Uint64(b[8:16])) + loY := math.Float64frombits(binary.BigEndian.Uint64(b[16:24])) + hiY := math.Float64frombits(binary.BigEndian.Uint64(b[24:32])) + box := geo.CartesianBoundingBox{ + BoundingBox: geopb.BoundingBox{ + LoX: loX, HiX: hiX, LoY: loY, HiY: hiY, + }, + } + return da.NewDBox2D(tree.DBox2D{CartesianBoundingBox: box}), nil case oidext.T_geometry: v, err := geo.ParseGeometryFromEWKB(b) if err != nil { diff --git a/pkg/sql/pgwire/testdata/pgtest/box2d b/pkg/sql/pgwire/testdata/pgtest/box2d new file mode 100644 index 000000000000..0ee1728bfab1 --- /dev/null +++ b/pkg/sql/pgwire/testdata/pgtest/box2d @@ -0,0 +1,32 @@ +# "ResultFormatCodes": [1] = binary +send +Parse {"Name": "s", "Query": "SELECT 'BOX(1 0,1 0)'::BOX2D;"} +Bind {"DestinationPortal": "p", "PreparedStatement": "s", "ResultFormatCodes": [1]} +Execute {"Portal": "p"} +Sync +---- + +until +ReadyForQuery +---- +{"Type":"ParseComplete"} +{"Type":"BindComplete"} +{"Type":"DataRow","Values":[{"binary":"3ff00000000000003ff000000000000000000000000000000000000000000000"}]} +{"Type":"CommandComplete","CommandTag":"SELECT 1"} +{"Type":"ReadyForQuery","TxStatus":"I"} + +send +Parse {"Query": "SELECT $1::BOX2D"} +Bind {"ParameterFormatCodes": [1], "Parameters": [{"binary":"3ff00000000000003ff000000000000000000000000000000000000000000000"}]} +Execute +Sync +---- + +until +ReadyForQuery +---- +{"Type":"ParseComplete"} +{"Type":"BindComplete"} +{"Type":"DataRow","Values":[{"text":"BOX(1 0,1 0)"}]} +{"Type":"CommandComplete","CommandTag":"SELECT 1"} +{"Type":"ReadyForQuery","TxStatus":"I"} diff --git a/pkg/sql/pgwire/testdata/pgtest/pgvector b/pkg/sql/pgwire/testdata/pgtest/pgvector index 3dedab053de6..993924d12713 100644 --- a/pkg/sql/pgwire/testdata/pgtest/pgvector +++ b/pkg/sql/pgwire/testdata/pgtest/pgvector @@ -45,3 +45,19 @@ ReadyForQuery {"Type":"DataRow","Values":[{"binary":"000200004000000040400000"}]} {"Type":"CommandComplete","CommandTag":"SELECT 2"} {"Type":"ReadyForQuery","TxStatus":"I"} + +send +Parse {"Query": "SELECT $1::VECTOR"} +Bind {"ParameterFormatCodes": [1], "Parameters": [{"binary":"000100003f800000"}]} +Execute +Sync +---- + +until +ReadyForQuery +---- +{"Type":"ParseComplete"} +{"Type":"BindComplete"} +{"Type":"DataRow","Values":[{"text":"[1]"}]} +{"Type":"CommandComplete","CommandTag":"SELECT 1"} +{"Type":"ReadyForQuery","TxStatus":"I"} diff --git a/pkg/util/vector/vector.go b/pkg/util/vector/vector.go index 68d6c7a961ee..76134c23a053 100644 --- a/pkg/util/vector/vector.go +++ b/pkg/util/vector/vector.go @@ -21,6 +21,8 @@ import ( // MaxDim is the maximum number of dimensions a vector can have. const MaxDim = 16000 +var MaxDimExceededErr = pgerror.Newf(pgcode.ProgramLimitExceeded, "vector cannot have more than %d dimensions", MaxDim) + // T is the type of a PGVector-like vector. type T []float32 @@ -38,7 +40,7 @@ func ParseVector(input string) (T, error) { parts := strings.Split(input, ",") if len(parts) > MaxDim { - return T{}, pgerror.Newf(pgcode.ProgramLimitExceeded, "vector cannot have more than %d dimensions", MaxDim) + return T{}, MaxDimExceededErr } vector := make([]float32, len(parts))