-
Notifications
You must be signed in to change notification settings - Fork 126
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SNOW-983911 Add support for querying the vector data type #997
Changes from all commits
9d85546
50d7b5e
47af7e8
cd3bac5
e5b9be0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
"database/sql" | ||
"database/sql/driver" | ||
"encoding/hex" | ||
"encoding/json" | ||
"fmt" | ||
"math" | ||
"math/big" | ||
|
@@ -91,10 +92,10 @@ | |
} | ||
|
||
// snowflakeTypeToGo translates Snowflake data type to Go data type. | ||
func snowflakeTypeToGo(dbtype snowflakeType, scale int64) reflect.Type { | ||
switch dbtype { | ||
func snowflakeTypeToGo(rowType execResponseRowType) reflect.Type { | ||
switch getSnowflakeType(rowType.Type) { | ||
case fixedType: | ||
if scale == 0 { | ||
if rowType.Scale == 0 { | ||
return reflect.TypeOf(int64(0)) | ||
} | ||
return reflect.TypeOf(float64(0)) | ||
|
@@ -108,8 +109,22 @@ | |
return reflect.TypeOf([]byte{}) | ||
case booleanType: | ||
return reflect.TypeOf(true) | ||
case vectorType: | ||
if len(rowType.Fields) != 1 { | ||
logger.Errorf("invalid result metadata fields for vector: length=%d", len(rowType.Fields)) | ||
return reflect.TypeOf("") | ||
} | ||
switch getSnowflakeType(rowType.Fields[0].Type) { | ||
case fixedType: | ||
return reflect.TypeOf([]int32{}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on the documentation, in V2 there may be different number of bits per type. Does this PR cover only V1? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we only currently support 32 bit values. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we mention V1 in the commit message (so for now - in PR name only)? |
||
case realType: | ||
return reflect.TypeOf([]float32{}) | ||
default: | ||
logger.Errorf("invalid element type for vector: %s", rowType.Fields[0].Type) | ||
return reflect.TypeOf("") | ||
} | ||
} | ||
logger.Errorf("unsupported dbtype is specified. %v", dbtype) | ||
logger.Errorf("unsupported dbtype is specified. %v", rowType.Type) | ||
return reflect.TypeOf("") | ||
} | ||
|
||
|
@@ -329,6 +344,27 @@ | |
} | ||
*dest = b | ||
return nil | ||
case "vector": | ||
if len(srcColumnMeta.Fields) != 1 { | ||
return fmt.Errorf("invalid result metadata fields for vector: length=%d", len(srcColumnMeta.Fields)) | ||
} | ||
switch getSnowflakeType(srcColumnMeta.Fields[0].Type) { | ||
case fixedType: | ||
values := make([]int32, 0, srcColumnMeta.VectorDimension) | ||
if err := json.Unmarshal([]byte(*srcValue), &values); err != nil { | ||
return err | ||
} | ||
*dest = values | ||
case realType: | ||
values := make([]float32, 0, srcColumnMeta.VectorDimension) | ||
if err := json.Unmarshal([]byte(*srcValue), &values); err != nil { | ||
return err | ||
} | ||
*dest = values | ||
default: | ||
return fmt.Errorf("invalid element type for vector: %s", srcColumnMeta.Fields[0].Type) | ||
} | ||
return nil | ||
} | ||
*dest = *srcValue | ||
return nil | ||
|
@@ -610,6 +646,33 @@ | |
} | ||
} | ||
return err | ||
case vectorType: | ||
vectorData := srcValue.(*array.FixedSizeList) | ||
datatype := vectorData.DataType().(*arrow.FixedSizeListType) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this work only for arrow? What should happen if a client uses |
||
dim := int(datatype.Len()) | ||
switch datatype.Elem().ID() { | ||
case arrow.INT32: | ||
values := vectorData.ListValues().(*array.Int32).Int32Values() | ||
for i := 0; i < vectorData.Len(); i++ { | ||
if vectorData.IsNull(i) { | ||
destcol[i] = []int32(nil) | ||
} else { | ||
destcol[i] = values[i*dim : (i+1)*dim] | ||
} | ||
} | ||
case arrow.FLOAT32: | ||
values := vectorData.ListValues().(*array.Float32).Float32Values() | ||
for i := 0; i < vectorData.Len(); i++ { | ||
if vectorData.IsNull(i) { | ||
destcol[i] = []float32(nil) | ||
} else { | ||
destcol[i] = values[i*dim : (i+1)*dim] | ||
} | ||
} | ||
default: | ||
return fmt.Errorf("unsupported element type %q for a vector", datatype.Elem().String()) | ||
} | ||
return err | ||
} | ||
|
||
return fmt.Errorf("unsupported data type") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
binaryType | ||
timeType | ||
booleanType | ||
vectorType | ||
// the following are not snowflake types per se but internal types | ||
nullType | ||
sliceType | ||
|
@@ -47,6 +48,7 @@ | |
"BINARY": binaryType, | ||
"TIME": timeType, | ||
"BOOLEAN": booleanType, | ||
"VECTOR": vectorType, | ||
"NULL": nullType, | ||
"SLICE": sliceType, | ||
"CHANGE_TYPE": changeType, | ||
|
@@ -104,6 +106,8 @@ | |
DataTypeTime = []byte{timeType.Byte()} | ||
// DataTypeBoolean is a BOOLEAN datatype. | ||
DataTypeBoolean = []byte{booleanType.Byte()} | ||
// DataTypeVector is a VECTOR datatype. | ||
DataTypeVector = []byte{vectorType.Byte()} | ||
) | ||
|
||
// dataTypeMode returns the subsequent data type in a string representation. | ||
|
@@ -131,6 +135,40 @@ | |
return tsmode, nil | ||
} | ||
|
||
type vectorElements interface { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering about one thing. In the future we are expected to implement structured types, which contains arrays. Array is a very similar structure to vector, just more generic I'd say. Maybe we can prepare the code here to use these arrays instead of native backend type as vector? |
||
~int32 | ~float32 | ||
} | ||
|
||
// SQLVector is a wrapper type used to support deserializing SQL values into slices | ||
// in database/sql scans. Cast slice pointers as *SQLVector[T] when passing them to | ||
// a database/sql Scan method. The slice will be populated with the corresponding | ||
// column value when the scan completes. | ||
// | ||
// Here is an example: | ||
// | ||
// var v []int32 | ||
// err := rows.Scan((*SQLVector[int32])(&v)) | ||
type SQLVector[T vectorElements] []T | ||
|
||
// Vector is syntactic sugar for wrapping slices in SQLVector[t] so that they | ||
// can be deserialized in database/sql scans. | ||
// | ||
// Here is an example: | ||
// | ||
// var v []int32 | ||
// err := rows.Scan(Vector(&v)) | ||
func Vector[T vectorElements](value *[]T) *SQLVector[T] { | ||
return (*SQLVector[T])(value) | ||
} | ||
|
||
func (v *SQLVector[T]) Scan(src any) error { | ||
if vec, ok := src.([]T); ok { | ||
*v = vec | ||
return nil | ||
} | ||
return fmt.Errorf("cannot convert %T to a vector of type %T", src, *v) | ||
} | ||
|
||
// SnowflakeParameter includes the columns output from SHOW PARAMETER command. | ||
type SnowflakeParameter struct { | ||
Key string | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure of this. This function was independent of API response and it was good - separaton of concernts. I'm wondering if some specific struct can be used here instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I could parse the response type in the caller of this function and pass down some type of options struct with the vector metadata. However, since this function is only called in one place right now (and in the context of a request), it felt cleaner to have the function take in
execResponseRowType
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that separations of layers (API/mapping) is good :) I would prefer to have API parsing closer to API functions, and this function preserve with just types at it was before.