diff --git a/.bazelrc b/.bazelrc index 8743fddf..b177c125 100644 --- a/.bazelrc +++ b/.bazelrc @@ -5,20 +5,9 @@ build --copt=-DTFX_BSL_USE_ARROW_C_ABI build --cxxopt=-std=c++17 build --host_cxxopt=-std=c++17 -# Needed to avoid zetasql proto error. -build --protocopt=--experimental_allow_proto3_optional - # icu@: In create_linking_context: in call to create_linking_context(), # parameter 'user_link_flags' is deprecated and will be removed soon. # It may be temporarily re-enabled by setting --incompatible_require_linker_input_cc_api=false build --incompatible_require_linker_input_cc_api=false build:macos --apple_platform_type=macos build:macos_arm64 --cpu=darwin_arm64 - -# Most of these warnings are triggered from ZetaSQL, disable for now. Ideally, -# we would disable these only for projects we don't control, but there does not -# seem to be an easy way to do that yet. -build --copt -Wno-sign-compare -build --copt -Wno-deprecated-declarations -build --copt -Wno-return-type -build --copt -Wno-unused-but-set-parameter diff --git a/WORKSPACE b/WORKSPACE index a8370432..f14fb3f5 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -71,18 +71,6 @@ http_archive( ], ) -# Needed by abseil-py by zetasql. -http_archive( - name = "six_archive", - build_file = "//third_party:six.BUILD", - sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a", - strip_prefix = "six-1.10.0", - urls = [ - "http://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz", - "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz", - ], -) - load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") protobuf_deps() @@ -217,45 +205,6 @@ load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies") #, "go_repository") gazelle_dependencies() -################################################################################ -# ZetaSQL # -################################################################################ - -ZETASQL_COMMIT = "a516c6b26d183efc4f56293256bba92e243b7a61" # 11/01/2024 - -http_archive( - name = "com_google_zetasql", - patch_args = ["-p1"], - patches = ["//third_party:zetasql.patch"], - sha256 = "1afc2210d4aad371eff0a6bfdd8417ba99e02183a35dff167af2fa6097643f26", - strip_prefix = "zetasql-%s" % ZETASQL_COMMIT, - urls = ["https://github.com/google/zetasql/archive/%s.tar.gz" % ZETASQL_COMMIT], -) - -load("@com_google_zetasql//bazel:zetasql_deps_step_1.bzl", "zetasql_deps_step_1") - -zetasql_deps_step_1() - -load("@com_google_zetasql//bazel:zetasql_deps_step_2.bzl", "zetasql_deps_step_2") - -zetasql_deps_step_2( - analyzer_deps = True, - evaluator_deps = True, - java_deps = False, - testing_deps = False, - tools_deps = False, -) - -# No need to run zetasql_deps_step_3 and zetasql_deps_step_4 since all necessary dependencies are -# already installed. - -# load("@com_google_zetasql//bazel:zetasql_deps_step_3.bzl", "zetasql_deps_step_3") - -# zetasql_deps_step_3() - -# load("@com_google_zetasql//bazel:zetasql_deps_step_4.bzl", "zetasql_deps_step_4") - -# zetasql_deps_step_4() _PLATFORMS_VERSION = "0.0.6" diff --git a/tfx_bsl/arrow/sql_util.py b/tfx_bsl/arrow/sql_util.py deleted file mode 100644 index 6df122b7..00000000 --- a/tfx_bsl/arrow/sql_util.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Arrow Array utilities.""" - -import sys - -# pytype: disable=import-error -# pylint: disable=unused-import -# pylint: disable=g-import-not-at-top -# See b/148667210 for why the ImportError is ignored. -try: - from tfx_bsl.cc.tfx_bsl_extension.arrow.sql_util import RecordBatchSQLSliceQuery -except ImportError as err: - sys.stderr.write( - "Error importing tfx_bsl_google_extension.arrow.sql_util. " - "Some tfx_bsl functionalities are not available: {}".format(err)) diff --git a/tfx_bsl/arrow/sql_util_test.py b/tfx_bsl/arrow/sql_util_test.py deleted file mode 100644 index 2c929220..00000000 --- a/tfx_bsl/arrow/sql_util_test.py +++ /dev/null @@ -1,496 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License -"""Tests for tfx_bsl.arrow.sql_util.""" - -import sys -import unittest - -import pyarrow as pa -from tfx_bsl.arrow import sql_util - -from absl.testing import absltest -from absl.testing import parameterized - -_TEST_CASES_OF_PRIMITIVE_ARRAYS = [ - dict( - testcase_name='with_no_filters', - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(f1, f2) - ) as slice_key - FROM Examples as example;""", - expected_output=[ - [[('f1', '1'), ('f2', '10')]], - [[('f1', '2'), ('f2', '20')]], - [[('f1', 'NULL'), ('f2', 'NULL')]], - [[('f1', '3'), ('f2', '30')]], - ]), - dict( - testcase_name='with_filter_of_null_value', - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(f1, f2) - FROM - UNNEST([example]) AS e - WHERE f1 IS NOT NULL - ) as slice_key - FROM Examples as example;""", - expected_output=[ - [[('f1', '1'), ('f2', '10')]], - [[('f1', '2'), ('f2', '20')]], - [], - [[('f1', '3'), ('f2', '30')]], - ]), - dict( - testcase_name='with_filter_of_all_value', - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(f1, f2) - FROM - UNNEST([example]) AS e - WHERE f1 = 0 - ) as slice_key - FROM Examples as example;""", - expected_output=[ - [], - [], - [], - [], - ]), -] - -_TEST_CASES_OF_LIST_ARRAYS = [ - dict( - testcase_name='with_no_filters', - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(f1, f2) - FROM - example.f1, - example.f2 - ) as slice_key - FROM Examples as example;""", - expected_output=[[[('f1', '1'), ('f2', '10')], - [('f1', '1'), ('f2', '20')], - [('f1', '1'), ('f2', '30')], - [('f1', '2'), ('f2', '10')], - [('f1', '2'), ('f2', '20')], - [('f1', '2'), ('f2', '30')], - [('f1', '3'), ('f2', '10')], - [('f1', '3'), ('f2', '20')], - [('f1', '3'), ('f2', '30')]], - [[('f1', '4'), ('f2', '40')]], [], [], [], [], - [[('f1', 'NULL'), ('f2', 'NULL')]], - [[('f1', '7'), ('f2', 'NULL')]]]), - dict( - testcase_name='with_filter_of_f1', - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(f1, f2) - FROM - example.f1, - example.f2 - WHERE - f1 != 1 - ) as slice_key - FROM Examples as example;""", - expected_output=[[[('f1', '2'), ('f2', '10')], - [('f1', '2'), ('f2', '20')], - [('f1', '2'), ('f2', '30')], - [('f1', '3'), ('f2', '10')], - [('f1', '3'), ('f2', '20')], - [('f1', '3'), ('f2', '30')]], - [[('f1', '4'), ('f2', '40')]], [], [], [], [], [], - [[('f1', '7'), ('f2', 'NULL')]]]), - dict( - testcase_name='with_only_one_filed_selected_with_alias', - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(f1 as some_name) - FROM - example.f1 - WHERE - f1 != 1 - ) as slice_key - FROM Examples as example;""", - expected_output=[[[('some_name', '2')], [('some_name', '3')]], - [[('some_name', '4')]], [], [[('some_name', '5')]], [], - [[('some_name', '6')]], [], [[('some_name', '7')]]]), -] - -_TEST_CASES_WITH_ONE_FIELD_FORMAT_NOT_SUPPORTED = [ - dict( - testcase_name='with_supported_column_queried', - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(f2) - FROM - UNNEST([example]) AS e - WHERE f2 != 1 - ) as slice_key - FROM Examples as example;""", - expected_output=[[], [[('f2', '2')]], [], [[('f2', '3')]]], - error=False), - dict( - testcase_name='with_unsupported_column_queried', - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(f1) - FROM - UNNEST([example]) AS e - ) as slice_key - FROM Examples as example;""", - expected_output=None, - error=True), -] - -_TEST_CASES_WITH_ALL_FIELDS_FORMAT_NOT_SUPPORTED = [ - dict( - testcase_name='with_no_field_queried', - sql=""" - SELECT - ARRAY( - SELECT STRUCT(1 as field) - ) as slice_key - FROM Examples as example;""", - expected_output=[[[('field', '1')]], [[('field', '1')]], - [[('field', '1')]], [[('field', '1')]]]), -] - -_TEST_CASES_WITH_INVALID_STATEMENT = [ - dict( - testcase_name='with_syntax_error', - sql=""" - SELLLLLLECT - * - FROM Examples as example;""", - error="""Unexpected identifier "SELLLLLLECT"""), - dict( - testcase_name='with_more_than_one_columns', - sql=""" - SELECT - f1, f2 - FROM Examples as example;""", - error='Only one column should be returned.'), - dict( - testcase_name='with_no_struct_format_returned', - sql=""" - SELECT - f1 - FROM Examples as example;""", - error='query result should in an Array of Struct type.'), -] - -_TEST_CASES_OF_STRUCT_ARRAYS = [ - dict( - testcase_name='on_struct_and_nested_struct', - record_batch=pa.RecordBatch.from_arrays( - [ - pa.array([1, 2, None], type=pa.int64()), - pa.array( - [(3, 4.1), None, (5, 6.3)], - type=pa.struct( - [pa.field('a', pa.int32()), pa.field('b', pa.float32())] - ), - ), - pa.array( - [ - (7.1, ('o_string', b'x_bytes')), - (8.2, ('p_string', b'y_bytes')), - (9.3, ('q_string', b'z_bytes')), - ], - type=pa.struct([ - pa.field('c', pa.float64()), - pa.field( - 'd', - pa.struct([ - pa.field('e', pa.string()), - pa.field('f', pa.binary()), - ]), - ), - ]), - ), - ], - ['f1', 'f2', 'f3'], - ), - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(f1, f2.a as f2_a, f2.b as f2_b, f3.c as f3_c, - f3.d.e as f3_d_e, f3.d.f as f3_d_f) - ) as slice_key - FROM Examples as example;""", - expected_output=[ - [[ - ('f1', '1'), - ('f2_a', '3'), - ('f2_b', '4.1'), - ('f3_c', '7.1'), - ('f3_d_e', 'o_string'), - ('f3_d_f', 'x_bytes'), - ]], - [[ - ('f1', '2'), - ('f2_a', 'NULL'), - ('f2_b', 'NULL'), - ('f3_c', '8.2'), - ('f3_d_e', 'p_string'), - ('f3_d_f', 'y_bytes'), - ]], - [[ - ('f1', 'NULL'), - ('f2_a', '5'), - ('f2_b', '6.3'), - ('f3_c', '9.3'), - ('f3_d_e', 'q_string'), - ('f3_d_f', 'z_bytes'), - ]], - ], - ), - dict( - testcase_name='on_struct_of_list', - record_batch=pa.RecordBatch.from_arrays( - [ - pa.array( - [([5], ([1.1],)), None, ([8], ([3.3],))], - type=pa.struct([ - pa.field('int64_list', pa.list_(pa.int64())), - pa.field( - 'f2', - pa.struct([ - pa.field( - 'float64_list', pa.list_(pa.float64()) - ), - ]), - ), - ]), - ), - ], - ['f1'], - ), - sql=""" - SELECT - ARRAY( - SELECT - STRUCT(int64_list as f1_int64_list, - float64_list as f1_f2_float64_list) - FROM - example.f1.int64_list, - example.f1.f2.float64_list - ) as slice_key - FROM Examples as example;""", - expected_output=[ - [[('f1_int64_list', '5'), ('f1_f2_float64_list', '1.1')]], - [], - [[('f1_int64_list', '8'), ('f1_f2_float64_list', '3.3')]], - ], - ), -] - - -# The RecordBatchSQLSliceQuery uses ZetaSQL which cannot be compiled on Windows. -# b/191377114 -@unittest.skipIf( - sys.platform.startswith('win'), - 'RecordBatchSQLSliceQuery is not supported on Windows.') -class RecordBatchSQLSliceQueryTest(parameterized.TestCase): - - @parameterized.named_parameters(*_TEST_CASES_OF_PRIMITIVE_ARRAYS) - def test_query_primitive_arrays(self, sql, expected_output): - record_batch = pa.RecordBatch.from_arrays([ - pa.array([1, 2, None, 3], type=pa.int64()), - pa.array([10, 20, None, 30], type=pa.int32()), - ], ['f1', 'f2']) - - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - slices = query.Execute(record_batch) - self.assertEqual(slices, expected_output) - - @parameterized.named_parameters(*_TEST_CASES_OF_LIST_ARRAYS) - def test_query_list_arrays(self, sql, expected_output): - # List of int32 & int64. - record_batch = pa.RecordBatch.from_arrays([ - pa.array([[1, 2, 3], [4], None, [5], [], [6], [None], [7]], - type=pa.list_(pa.int64())), - pa.array([[10, 20, 30], [40], None, None, [], [], [None], [None]], - type=pa.list_(pa.int32())), - ], ['f1', 'f2']) - - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - slices = query.Execute(record_batch) - self.assertEqual(slices, expected_output) - - @parameterized.named_parameters(*_TEST_CASES_OF_LIST_ARRAYS) - def test_query_large_list_arrays(self, sql, expected_output): - # Large list of int32 & int64. - record_batch = pa.RecordBatch.from_arrays([ - pa.array([[1, 2, 3], [4], None, [5], [], [6], [None], [7]], - type=pa.large_list(pa.int64())), - pa.array([[10, 20, 30], [40], None, None, [], [], [None], [None]], - type=pa.large_list(pa.int32())), - ], ['f1', 'f2']) - - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - slices = query.Execute(record_batch) - - self.assertEqual(slices, expected_output) - - @parameterized.named_parameters( - *_TEST_CASES_WITH_ONE_FIELD_FORMAT_NOT_SUPPORTED) - def test_query_with_one_field_not_supported(self, sql, expected_output, - error): - record_batch = pa.RecordBatch.from_arrays([ - pa.array([[[10, 100]], [[20, 200]], None, [[30, 300]]], - type=pa.list_(pa.list_(pa.int64()))), - pa.array([1, 2, None, 3], type=pa.int32()), - ], ['f1', 'f2']) - - if error: - with self.assertRaisesRegex(RuntimeError, - 'Are you querying any unsupported column?'): - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - else: - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - slices = query.Execute(record_batch) - self.assertEqual(slices, expected_output) - - @parameterized.named_parameters( - *_TEST_CASES_WITH_ALL_FIELDS_FORMAT_NOT_SUPPORTED) - def test_query_with_all_fields_not_supported(self, sql, expected_output): - record_batch = pa.RecordBatch.from_arrays([ - pa.array([[[10, 100]], [[20, 200]], None, [[30, 300]]], - type=pa.list_(pa.list_(pa.int64()))), - ], ['f1']) - - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - slices = query.Execute(record_batch) - self.assertEqual(slices, expected_output) - - @parameterized.named_parameters(*_TEST_CASES_WITH_INVALID_STATEMENT) - def test_query_with_invalid_statement(self, sql, error): - record_batch = pa.RecordBatch.from_arrays([ - pa.array([[1, 2, 3], [4], None, [5], [], [6], [None], [7]], - type=pa.list_(pa.int64())), - pa.array([[10, 20, 30], [40], None, None, [], [], [None], [None]], - type=pa.list_(pa.int32())), - ], ['f1', 'f2']) - - with self.assertRaisesRegex(RuntimeError, error): - _ = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - - def test_query_with_unexpected_record_batch_schema(self): - record_batch_1 = pa.RecordBatch.from_arrays([ - pa.array([1, 2, 3], type=pa.int64()), - ], ['f1']) - record_batch_2 = pa.RecordBatch.from_arrays([ - pa.array([4, 5, 6], type=pa.int32()), - ], ['f1']) - sql = """SELECT ARRAY(SELECT STRUCT(f1)) as slice_key - FROM Examples as example;""" - - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch_1.schema) - with self.assertRaisesRegex(RuntimeError, 'Unexpected RecordBatch schema.'): - _ = query.Execute(record_batch_2) - - def test_query_with_empty_input(self): - record_batch = pa.RecordBatch.from_arrays([ - pa.array([], type=pa.int64()), - ], ['f1']) - sql = """SELECT ARRAY(SELECT STRUCT(f1)) as slice_key - FROM Examples as example;""" - - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - slices = query.Execute(record_batch) - self.assertEqual(slices, []) - - def test_query_with_all_supported_types(self): - record_batch = pa.RecordBatch.from_arrays([ - pa.array([[1], [2]], type=pa.list_(pa.int32())), - pa.array([[10], [20]], type=pa.list_(pa.int64())), - pa.array([[1.1], [2.2]], type=pa.list_(pa.float32())), - pa.array([[10.1], [20.2]], type=pa.list_(pa.float64())), - pa.array([['a'], ['b']], type=pa.list_(pa.string())), - pa.array([['a+'], ['b+']], type=pa.list_(pa.large_string())), - pa.array([[b'a_bytes'], [b'b_bytes']], type=pa.list_(pa.binary())), - pa.array([[b'a_bytes+'], [b'b_bytes+']], - type=pa.list_(pa.large_binary())), - ], [ - 'int32_list', - 'int64_list', - 'float32_list', - 'float64_list', - 'string_list', - 'large_string_list', - 'binary_list', - 'large_binary_list', - ]) - sql = """ - SELECT - ARRAY( - SELECT - STRUCT(int32_list, int64_list, - float32_list, float64_list, - string_list, large_string_list, - binary_list, large_binary_list) - FROM - example.int32_list, - example.int64_list, - example.float32_list, - example.float64_list, - example.string_list, - example.large_string_list, - example.binary_list, - example.large_binary_list - ) as slice_key - FROM Examples as example;""" - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - slices = query.Execute(record_batch) - self.assertEqual(slices, - [[[('int32_list', '1'), ('int64_list', '10'), - ('float32_list', '1.1'), ('float64_list', '10.1'), - ('string_list', 'a'), ('large_string_list', 'a+'), - ('binary_list', 'a_bytes'), - ('large_binary_list', 'a_bytes+')]], - [[('int32_list', '2'), ('int64_list', '20'), - ('float32_list', '2.2'), ('float64_list', '20.2'), - ('string_list', 'b'), ('large_string_list', 'b+'), - ('binary_list', 'b_bytes'), - ('large_binary_list', 'b_bytes+')]]]) - - @parameterized.named_parameters(*_TEST_CASES_OF_STRUCT_ARRAYS) - def test_query_with_struct_arrays(self, record_batch, sql, expected_output): - query = sql_util.RecordBatchSQLSliceQuery(sql, record_batch.schema) - slices = query.Execute(record_batch) - self.assertEqual(slices, expected_output) - - -if __name__ == '__main__': - absltest.main() diff --git a/tfx_bsl/cc/BUILD b/tfx_bsl/cc/BUILD index 0be99768..6477693e 100644 --- a/tfx_bsl/cc/BUILD +++ b/tfx_bsl/cc/BUILD @@ -22,7 +22,6 @@ tfx_bsl_pybind_extension( "//tfx_bsl/cc/arrow:arrow_submodule", "//tfx_bsl/cc/coders:coders_submodule", "//tfx_bsl/cc/sketches:sketches_submodule", - "//tfx_bsl/cc/statistics:statistics_sql_submodule", "//tfx_bsl/cc/statistics:statistics_submodule", "@arrow", "@pybind11", diff --git a/tfx_bsl/cc/arrow/BUILD b/tfx_bsl/cc/arrow/BUILD index 2283367c..4d62f80a 100644 --- a/tfx_bsl/cc/arrow/BUILD +++ b/tfx_bsl/cc/arrow/BUILD @@ -25,52 +25,6 @@ cc_library( ], ) -cc_library( - name = "sql_util", - srcs = ["sql_util.cc"], - hdrs = ["sql_util.h"], - copts = [ - "-fexceptions", - ], - features = ["-use_header_modules"], - deps = [ - "//tfx_bsl/cc/util:status_util", - "@arrow", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/status", - "@com_google_zetasql//zetasql/public:catalog", - "@com_google_zetasql//zetasql/public:evaluator", - "@com_google_zetasql//zetasql/public:evaluator_table_iterator", - "@com_google_zetasql//zetasql/public:simple_catalog", - "@com_google_zetasql//zetasql/public:type", - "@com_google_zetasql//zetasql/public:type_cc_proto", - "@com_google_zetasql//zetasql/public:value", - "@pybind11", - ], -) - -cc_library( - name = "sql_util_submodule", - srcs = select({ - "@bazel_tools//src/conditions:windows": [ - "sql_util_submodule_windows.cc", - ], - "//conditions:default": ["sql_util_submodule.cc"], - }), - hdrs = ["sql_util_submodule.h"], - copts = [ - "-fexceptions", - ], - features = ["-use_header_modules"], - deps = select({ - "@bazel_tools//src/conditions:windows": [], - "//conditions:default": [":sql_util"], - }) + [ - "//tfx_bsl/cc/pybind11:arrow_casters", - "@pybind11", - ], -) - cc_library( name = "table_util", srcs = ["table_util.cc"], @@ -99,7 +53,6 @@ cc_library( ], deps = [ ":array_util", - ":sql_util_submodule", ":table_util", "//tfx_bsl/cc/pybind11:arrow_casters", "@arrow", diff --git a/tfx_bsl/cc/arrow/arrow_submodule.cc b/tfx_bsl/cc/arrow/arrow_submodule.cc index 8216edc1..45f127a8 100644 --- a/tfx_bsl/cc/arrow/arrow_submodule.cc +++ b/tfx_bsl/cc/arrow/arrow_submodule.cc @@ -21,7 +21,6 @@ #include "absl/status/statusor.h" #include "arrow/api.h" #include "tfx_bsl/cc/arrow/array_util.h" -#include "tfx_bsl/cc/arrow/sql_util_submodule.h" #include "tfx_bsl/cc/arrow/table_util.h" #include "tfx_bsl/cc/pybind11/arrow_casters.h" #include "pybind11/stl.h" @@ -296,6 +295,5 @@ void DefineArrowSubmodule(pybind11::module main_module) { m.doc() = "Arrow utilities."; DefineArrayUtilSubmodule(m); DefineTableUtilSubmodule(m); - DefineSqlUtilSubmodule(m); } } // namespace tfx_bsl diff --git a/tfx_bsl/cc/arrow/sql_util.cc b/tfx_bsl/cc/arrow/sql_util.cc deleted file mode 100644 index cb4b72d3..00000000 --- a/tfx_bsl/cc/arrow/sql_util.cc +++ /dev/null @@ -1,564 +0,0 @@ -// Copyright 2021 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "tfx_bsl/cc/arrow/sql_util.h" - -#include -#include -#include -#include - -#include "zetasql/public/catalog.h" -#include "zetasql/public/evaluator.h" -#include "zetasql/public/simple_catalog.h" -#include "zetasql/public/type.h" -#include "zetasql/public/type.pb.h" -#include "zetasql/public/value.h" -#include "absl/memory/memory.h" -#include "absl/status/status.h" -#include "arrow/api.h" -#include "arrow/array.h" -#include "tfx_bsl/cc/util/status_util.h" - -namespace tfx_bsl { - -namespace { -constexpr char kTableName[] = "Examples"; - -using zetasql::SimpleCatalog; -using zetasql::SimpleTable; - -// Convert the sql query result to the slices list. -void ConvertQueryResultToSlices( - std::unique_ptr query_result_iterator, - std::vector>>>* - result) { - while (query_result_iterator->NextRow()) { - // Only one column will be returned. So we use index 0 here. - const zetasql::Value& value = query_result_iterator->GetValue(0); - std::vector>> row; - row.reserve(value.num_elements()); - - // The data format of the value is like: - // ARRAY([ - // STRUCT('slice_key_1': 1.1, 'slice_key_2': "male"), - // STRUCT('slice_key_1': 3.4, 'slice_key_2': "female"), - // STRUCT(...) - // ]) - for (const zetasql::Value& struct_element : value.elements()) { - std::vector> slices; - for (int i = 0; i < struct_element.num_fields(); i++) { - const zetasql::Value& field = struct_element.field(i); - std::string slice_key = - struct_element.type()->AsStruct()->field(i).name; - std::string slice_value; - // For string or byte type, field.ShortDebugString() returns with - // quotation marks, e.g '"real_value"', which we don't want. - if (field.type_kind() == zetasql::TYPE_STRING) { - slice_value = field.string_value(); - } else if (field.type_kind() == zetasql::TYPE_BYTES) { - slice_value = field.bytes_value(); - } else { - slice_value = field.ShortDebugString(); - } - slices.emplace_back(std::move(slice_key), std::move(slice_value)); - } - row.push_back(std::move(slices)); - } - result->push_back(std::move(row)); - } -} - -// Validate the slice sql query. -absl::Status ValidateSliceSqlQuery(const zetasql::PreparedQuery& query, - const std::string& sql) { - if (query.num_columns() != 1) { - return absl::InvalidArgumentError( - absl::StrCat("Invalid SQL statement: ", sql, - " Only one column should be returned.")); - } - if (!query.column_type(0)->IsArray() || - !query.column_type(0)->AsArray()->element_type()->IsStruct()) { - return absl::InvalidArgumentError(absl::StrCat( - "Invalid SQL statement: ", sql, - " The each row of query result should in an Array of Struct type.")); - } - for (const auto& field : - query.column_type(0)->AsArray()->element_type()->AsStruct()->fields()) { - if (!field.type->IsString() && !field.type->IsNumerical() && - !field.type->IsBytes()) { - return absl::InvalidArgumentError(absl::StrCat( - "Invalid SQL statement: ", sql, - " slices values must have primitive types. Found: ", field.name, ": ", - field.type->ShortTypeName(zetasql::ProductMode::PRODUCT_INTERNAL))); - } - } - return absl::OkStatus(); -} - -// The zetasql::StructType requires a zetasql::TypeFactory to construct and -// manage its life cycle. We need to make sure the type does not go out of -// scope when needed during query execution. And since the ZetaSql library -// does not expose its global type factory to the outside, we set up one here. -zetasql::TypeFactory& GetSqlTypeFactory() { - static zetasql::TypeFactory* type_factory = new zetasql::TypeFactory(); - return *type_factory; -} - -// A helper to bubble up the error message in absl::Status to arrow:Status when -// we build ZetaSQL struct type and data from arrow struct type and data, -// specifically to handle the case of invalid data. -arrow::Status ToArrowStatus(absl::Status status) { - if (ABSL_PREDICT_TRUE(status.ok())) return arrow::Status::OK(); - return arrow::Status::Invalid(status.message()); -} - -// This class converts the apache arrow array type to the corresponding -// zetasql type. -class ZetaSQLTypeVisitor : public arrow::TypeVisitor { - public: - ZetaSQLTypeVisitor() : zetasql_type_(nullptr) {} - - const zetasql::Type* ZetaSqlType() { return zetasql_type_; } - - arrow::Status Visit(const arrow::Int32Type& type) { - zetasql_type_ = zetasql::types::Int32Type(); - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::Int64Type& type) { - zetasql_type_ = zetasql::types::Int64Type(); - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::FloatType& type) { - zetasql_type_ = zetasql::types::FloatType(); - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::DoubleType& type) { - zetasql_type_ = zetasql::types::DoubleType(); - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::StringType& type) { - zetasql_type_ = zetasql::types::StringType(); - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::LargeStringType& type) { - zetasql_type_ = zetasql::types::StringType(); - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::BinaryType& type) { - zetasql_type_ = zetasql::types::BytesType(); - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::LargeBinaryType& type) { - zetasql_type_ = zetasql::types::BytesType(); - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::ListType& type) { return VisitList(type); } - arrow::Status Visit(const arrow::LargeListType& type) { - return VisitList(type); - } - - arrow::Status Visit(const arrow::StructType& type) { - std::vector fields; - for (int i = 0; i < type.num_fields(); ++i) { - ZetaSQLTypeVisitor visitor; - ARROW_RETURN_NOT_OK(type.field(i)->type()->Accept(&visitor)); - fields.push_back({type.field(i)->name(), visitor.ZetaSqlType()}); - } - return ToArrowStatus( - GetSqlTypeFactory().MakeStructTypeFromVector(fields, &zetasql_type_)); - } - - private: - // The life cycle of zetasql type is managed by TypeFactory. - const zetasql::Type* zetasql_type_; - - template - arrow::Status VisitList(const ListLikeType& type) { - ZetaSQLTypeVisitor visitor; - ARROW_RETURN_NOT_OK(type.value_type()->Accept(&visitor)); - const zetasql::Type* child_zetasql_type = visitor.ZetaSqlType(); - zetasql_type_ = zetasql::types::ArrayTypeFromSimpleTypeKind( - child_zetasql_type->kind()); - if (zetasql_type_ == nullptr) { - return arrow::Status::TypeError( - "Unsupported arrow data type: ", type.ToString(), - " For ListType arrow array, we only support an array of a primary " - "type. A ListType of ListType is not supported currently."); - } - return arrow::Status::OK(); - } -}; - -// This class converts the apache arrow array value to the corresponding -// zetasql value. -class ZetaSQLValueVisitor : public arrow::ArrayVisitor { - public: - ZetaSQLValueVisitor(const zetasql::Type* zetasql_type) - : zetasql_type_(zetasql_type), index_(0) {} - - void SetIndex(int64_t index) { index_ = index; } - - const zetasql::Value& ZetaSqlValue() const { return zetasql_value_; } - - arrow::Status Visit(const arrow::Int32Array& array) override { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::NullInt32(); - } else { - zetasql_value_ = zetasql::Value::Int32(array.Value(index_)); - } - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::Int64Array& array) override { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::NullInt64(); - } else { - zetasql_value_ = zetasql::Value::Int64(array.Value(index_)); - } - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::FloatArray& array) override { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::NullFloat(); - } else { - zetasql_value_ = zetasql::Value::Float(array.Value(index_)); - } - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::DoubleArray& array) override { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::NullDouble(); - } else { - zetasql_value_ = zetasql::Value::Double(array.Value(index_)); - } - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::StringArray& array) override { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::NullString(); - } else { - zetasql_value_ = zetasql::Value::StringValue(array.GetString(index_)); - } - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::LargeStringArray& array) override { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::NullString(); - } else { - zetasql_value_ = zetasql::Value::StringValue(array.GetString(index_)); - } - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::BinaryArray& array) override { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::NullBytes(); - } else { - zetasql_value_ = zetasql::Value::Bytes(array.GetString(index_)); - } - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::LargeBinaryArray& array) override { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::NullBytes(); - } else { - zetasql_value_ = zetasql::Value::Bytes(array.GetString(index_)); - } - return arrow::Status::OK(); - } - - arrow::Status Visit(const arrow::ListArray& array) override { - return VisitList(array); - } - - arrow::Status Visit(const arrow::LargeListArray& array) override { - return VisitList(array); - } - - arrow::Status Visit(const arrow::StructArray& array) override { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::Null(zetasql_type_); - return arrow::Status::OK(); - } - if (!zetasql_type_->IsStruct()) { - return arrow::Status::TypeError( - "Expect a ZetaSql struct type to convert data to ZetaSql values, " - ", but got: ", - zetasql_type_->DebugString()); - } - const zetasql::StructType* sql_struct_type = zetasql_type_->AsStruct(); - std::vector child_zetasql_values; - child_zetasql_values.reserve(array.num_fields()); - for (int i = 0; i < array.num_fields(); ++i) { - ZetaSQLValueVisitor child_arrow_visitor(sql_struct_type->field(i).type); - child_arrow_visitor.SetIndex(index_); - ARROW_RETURN_NOT_OK(array.field(i)->Accept(&child_arrow_visitor)); - child_zetasql_values.push_back(child_arrow_visitor.ZetaSqlValue()); - } - absl::StatusOr value = zetasql::Value::MakeStruct( - sql_struct_type, std::move(child_zetasql_values)); - if (value.ok()) zetasql_value_ = std::move(*value); - return ToArrowStatus(value.status()); - } - - private: - zetasql::Value zetasql_value_; - const zetasql::Type* zetasql_type_; - int64_t index_; - - template - arrow::Status VisitList(const ListLikeType& array) { - if (array.IsNull(index_)) { - zetasql_value_ = zetasql::Value::Null(zetasql_type_); - return arrow::Status::OK(); - } - - const auto value_position = array.value_offset(index_); - const auto value_length = array.value_length(index_); - - std::shared_ptr child_array = array.values(); - - // Recursively visit child element to get child zetasql values. - std::vector child_zetasql_values; - child_zetasql_values.reserve(value_length); - const zetasql::Type* child_type = - zetasql_type_->AsArray()->element_type(); - ZetaSQLValueVisitor child_arrow_visitor(child_type); - for (int j = value_position; j < value_position + value_length; j++) { - child_arrow_visitor.SetIndex(j); - ARROW_RETURN_NOT_OK(child_array->Accept(&child_arrow_visitor)); - const zetasql::Value& value = child_arrow_visitor.ZetaSqlValue(); - child_zetasql_values.push_back(value); - } - - // Generate current element zetasql value based on child zetasql value - // and type. - zetasql_value_ = zetasql::Value::UnsafeArray( - zetasql::types::ArrayTypeFromSimpleTypeKind(child_type->kind()), - std::move(child_zetasql_values)); - return arrow::Status::OK(); - } -}; - -// An iterator that wraps the RecordBatch data structure. -class RecordBatchEvaluatorTableIterator - : public zetasql::EvaluatorTableIterator { - public: - RecordBatchEvaluatorTableIterator(const RecordBatchEvaluatorTableIterator&) = - delete; - RecordBatchEvaluatorTableIterator& operator=( - const RecordBatchEvaluatorTableIterator&) = delete; - - RecordBatchEvaluatorTableIterator( - const arrow::RecordBatch& record_batch, - const std::vector& columns_name_and_type) - : record_batch_(record_batch), - columns_name_and_type_(columns_name_and_type), - current_row_index_(-1), - cancelled_(false), - status_(absl::OkStatus()), - zetasql_value_visitors_(columns_name_and_type.size()) { - for (int i = 0; i < zetasql_value_visitors_.size(); i++) { - zetasql_value_visitors_[i] = std::make_unique( - columns_name_and_type[i].second); - } - - // Build a map: "record batch colume name" -> "record batch_ colume index". - std::unordered_map record_batch_column_name_to_index; - for (int i = 0; i < record_batch_.num_columns(); i++) { - record_batch_column_name_to_index[record_batch_.column_name(i)] = i; - } - - // Build a map: "virtual sql table colume index" -> "record batch colume - // index". - for (int zetasql_index = 0; - zetasql_index < columns_name_and_type_.size(); zetasql_index++) { - const std::string& column_name = - columns_name_and_type_[zetasql_index].first; - index_map_[zetasql_index] = - record_batch_column_name_to_index[column_name]; - } - } - - int NumColumns() const override { return columns_name_and_type_.size(); } - - std::string GetColumnName(int i) const override { - return columns_name_and_type_[i].first; - } - - const zetasql::Type* GetColumnType(int i) const override { - return columns_name_and_type_[i].second; - } - - bool NextRow() override { - if (cancelled_) { - return false; - } - if (++current_row_index_ >= record_batch_.num_rows()) { - return false; - } - return true; - } - - const zetasql::Value& GetValue(int sql_column_index) const override { - // Convert to real record batch column index. - int record_batch_column_index = index_map_.at(sql_column_index); - std::shared_ptr column_array = - record_batch_.column(record_batch_column_index); - - const std::unique_ptr& zetasql_value_visitor = - zetasql_value_visitors_.at(sql_column_index); - zetasql_value_visitor->SetIndex(current_row_index_); - arrow::Status status = column_array->Accept(zetasql_value_visitor.get()); - assert(status.ok()); - - // A reference is returned here. So zetasql_arrow_visitor must live - // longer than current function scope. - return zetasql_value_visitor->ZetaSqlValue(); - } - - absl::Status Status() const final { - if (cancelled_) { - return absl::CancelledError( - "RecordBatchEvaluatorTableIterator was cancelled"); - } - return absl::OkStatus(); - } - - absl::Status Cancel() final { - cancelled_ = true; - return absl::OkStatus(); - } - - private: - const arrow::RecordBatch& record_batch_; - const std::vector& columns_name_and_type_; - int current_row_index_; - bool cancelled_; - absl::Status status_; - - std::vector> zetasql_value_visitors_; - // Map the index from zetasql table to record batch table. We need this - // mapping because the some column format of the record batch table is not - // supported by the zetasql. - std::unordered_map index_map_; -}; -} // namespace - -// static. Create RecordBatchSQLSliceQuery. -absl::Status RecordBatchSQLSliceQuery::Make( - const std::string& sql, std::shared_ptr arrow_schema, - std::unique_ptr* result) { - // Build name and type per column. - std::vector columns_name_and_type; - for (int i = 0; i < arrow_schema->num_fields(); i++) { - ZetaSQLTypeVisitor type_visitor; - arrow::Status status = - arrow_schema->field(i)->type()->Accept(&type_visitor); - // Only add supported columns to the sql table. - if (status.ok()) { - columns_name_and_type.emplace_back(arrow_schema->field(i)->name(), - type_visitor.ZetaSqlType()); - } - } - - // Build sql table. - std::unique_ptr table = - std::make_unique(kTableName, columns_name_and_type); - - // Build sql Catalog. - std::unique_ptr catalog = - std::make_unique("catalog"); - catalog->AddZetaSQLFunctions(); - catalog->AddTable(table->Name(), table.get()); - - // Prepare the query. - const zetasql::EvaluatorOptions evaluator_options; - std::unique_ptr query = - std::make_unique(sql, evaluator_options); - zetasql::AnalyzerOptions analyzer_options; - - absl::Status status = query->Prepare(analyzer_options, catalog.get()); - if (absl::IsInvalidArgument(status) && - absl::StartsWith(status.message(), "Unrecognized name:")) { - return absl::InvalidArgumentError(absl::StrCat( - "Unable to analyze SQL query. Error: ", - absl::StatusCodeToString(status.code()), " : ", status.message(), - ". Are you querying any unsupported column?")); - } - TFX_BSL_RETURN_IF_ERROR(status); - - TFX_BSL_RETURN_IF_ERROR(ValidateSliceSqlQuery(*query, sql)); - *result = absl::WrapUnique(new RecordBatchSQLSliceQuery( - std::move(arrow_schema), std::move(columns_name_and_type), - std::move(table), std::move(catalog), std::move(query))); - return absl::OkStatus(); -} - -RecordBatchSQLSliceQuery::RecordBatchSQLSliceQuery( - std::shared_ptr arrow_schema, - std::vector columns_name_and_type, - std::unique_ptr table, - std::unique_ptr catalog, - std::unique_ptr query) - : arrow_schema_(std::move(arrow_schema)), - columns_name_and_type_(std::move(columns_name_and_type)), - table_(std::move(table)), - catalog_(std::move(catalog)), - query_(std::move(query)) {} - -absl::Status RecordBatchSQLSliceQuery::Execute( - const arrow::RecordBatch& record_batch, - std::vector>>>* - result) { - // Check if the schema is as same as the stored schema. - if (!record_batch.schema()->Equals(*arrow_schema_)) { - return absl::InvalidArgumentError("Unexpected RecordBatch schema."); - } - - // Add record batch to the table. - table_->SetEvaluatorTableIteratorFactory( - [&record_batch, this](absl::Span columns) - -> absl::StatusOr< - std::unique_ptr> { - return std::make_unique( - record_batch, this->columns_name_and_type_); - }); - - // Excute. - absl::StatusOr> - query_result_iterator = query_->Execute(); - TFX_BSL_RETURN_IF_ERROR(query_result_iterator.status()); - - // Convert the query result to the output. - ConvertQueryResultToSlices(std::move(query_result_iterator.value()), result); - return absl::OkStatus(); -} - -RecordBatchSQLSliceQuery::~RecordBatchSQLSliceQuery() {} - -} // namespace tfx_bsl diff --git a/tfx_bsl/cc/arrow/sql_util.h b/tfx_bsl/cc/arrow/sql_util.h deleted file mode 100644 index cbeeb387..00000000 --- a/tfx_bsl/cc/arrow/sql_util.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2021 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#ifndef THIRD_PARTY_PY_TFX_BSL_GOOGLE_CC_ARROW_SQL_UTIL_H_ -#define THIRD_PARTY_PY_TFX_BSL_GOOGLE_CC_ARROW_SQL_UTIL_H_ - -#include -#include - -#include "zetasql/public/evaluator.h" -#include "zetasql/public/evaluator_table_iterator.h" -#include "zetasql/public/simple_catalog.h" -#include "absl/status/status.h" -#include "pybind11/pybind11.h" - -namespace arrow { -class Array; -class RecordBatch; -class Schema; -} // namespace arrow - -namespace tfx_bsl { - -class RecordBatchSQLSliceQuery { - public: - static absl::Status Make(const std::string& sql, - std::shared_ptr arrow_schema, - std::unique_ptr* result); - ~RecordBatchSQLSliceQuery(); - - // Creates slice keys for each row in a RecordBatch, according to a SQL - // statement. - absl::Status Execute( - const arrow::RecordBatch& record_batch, - std::vector< - std::vector>>>* - result); - - RecordBatchSQLSliceQuery(const RecordBatchSQLSliceQuery&) = delete; - RecordBatchSQLSliceQuery& operator=(const RecordBatchSQLSliceQuery&) = delete; - - private: - std::shared_ptr arrow_schema_; - std::vector columns_name_and_type_; - std::unique_ptr table_; - std::unique_ptr catalog_; - std::unique_ptr query_; - - RecordBatchSQLSliceQuery( - std::shared_ptr arrow_schema, - std::vector columns_name_and_type, - std::unique_ptr table, - std::unique_ptr catalog, - std::unique_ptr query); -}; - -} // namespace tfx_bsl - -#endif // THIRD_PARTY_PY_TFX_BSL_GOOGLE_CC_ARROW_SQL_UTIL_H_ diff --git a/tfx_bsl/cc/arrow/sql_util_submodule.cc b/tfx_bsl/cc/arrow/sql_util_submodule.cc deleted file mode 100644 index 9b83b1a6..00000000 --- a/tfx_bsl/cc/arrow/sql_util_submodule.cc +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2021 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "tfx_bsl/cc/arrow/sql_util_submodule.h" - -#include "tfx_bsl/cc/arrow/sql_util.h" -#include "tfx_bsl/cc/pybind11/arrow_casters.h" -#include "pybind11/stl.h" - -namespace tfx_bsl { -namespace py = ::pybind11; - -void DefineSqlUtilSubmodule(py::module arrow_module) { - auto m = arrow_module.def_submodule("sql_util"); - m.doc() = "Arrow Table SQL utilities."; - - py::class_(m, "RecordBatchSQLSliceQuery") - .def(py::init([](const std::string& sql, - std::shared_ptr arrow_schema) { - std::unique_ptr result; - absl::Status s = - RecordBatchSQLSliceQuery::Make(sql, arrow_schema, &result); - if (!s.ok()) { - throw std::runtime_error(s.ToString()); - } - return result; - }), - py::arg("sql"), py::arg("arrow_schema")) - .def( - "Execute", - [](RecordBatchSQLSliceQuery* slice_query, - const std::shared_ptr& record_batch) { - std::vector< - std::vector>>> - result; - absl::Status s = slice_query->Execute(*record_batch, &result); - if (!s.ok()) { - throw std::runtime_error(s.ToString()); - } - return result; - }, - py::call_guard()); -} - -} // namespace tfx_bsl diff --git a/tfx_bsl/cc/arrow/sql_util_submodule.h b/tfx_bsl/cc/arrow/sql_util_submodule.h deleted file mode 100644 index 07b562e3..00000000 --- a/tfx_bsl/cc/arrow/sql_util_submodule.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2021 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_PY_TFX_BSL_CC_ARROW_ARROW_SUBMODULE_SQL_UTIL_DEFINITION_H_ -#define THIRD_PARTY_PY_TFX_BSL_CC_ARROW_ARROW_SUBMODULE_SQL_UTIL_DEFINITION_H_ - -#include "pybind11/pybind11.h" - -namespace tfx_bsl { - -void DefineSqlUtilSubmodule(pybind11::module arrow_module); - -} // namespace tfx_bsl - -#endif // THIRD_PARTY_PY_TFX_BSL_CC_ARROW_ARROW_SUBMODULE_SQL_UTIL_DEFINITION_H_ diff --git a/tfx_bsl/cc/arrow/sql_util_submodule_windows.cc b/tfx_bsl/cc/arrow/sql_util_submodule_windows.cc deleted file mode 100644 index 7a8e1b79..00000000 --- a/tfx_bsl/cc/arrow/sql_util_submodule_windows.cc +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2021 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "tfx_bsl/cc/arrow/sql_util_submodule.h" -#include "tfx_bsl/cc/pybind11/arrow_casters.h" -#include "pybind11/stl.h" - -namespace tfx_bsl { -namespace py = ::pybind11; - -namespace { -class RecordBatchSQLSliceQuery {}; -} // namespace - -// This function defines a class called RecordBatchSQLSliceQuery which will -// fail on purpose once initialized. This is because the functional -// RecordBatchSQLSliceQuery uses ZetaSQL which cannot be compiled on windows. -// b/191377114. -void DefineSqlUtilSubmodule(py::module arrow_module) { - auto m = arrow_module.def_submodule("sql_util"); - m.doc() = "Arrow Table SQL utilities is not supported on Windows."; - - py::class_(m, "RecordBatchSQLSliceQuery") - .def(py::init([](const std::string& sql, - std::shared_ptr arrow_schema) { - std::unique_ptr result; - throw std::runtime_error( - "RecordBatchSQLSliceQuery is not supported on Windows."); - return result; - }), - py::arg("sql"), py::arg("arrow_schema")); -} - -} // namespace tfx_bsl diff --git a/tfx_bsl/cc/statistics/BUILD b/tfx_bsl/cc/statistics/BUILD index e9a54cb7..52dc6294 100644 --- a/tfx_bsl/cc/statistics/BUILD +++ b/tfx_bsl/cc/statistics/BUILD @@ -43,59 +43,6 @@ cc_library( ], ) -cc_library( - name = "sql_util", - srcs = select({ - "@bazel_tools//src/conditions:windows": [ - "sql_util_windows.cc", - ], - "//conditions:default": ["sql_util.cc"], - }), - hdrs = ["sql_util.h"], - visibility = [ - "//visibility:public", - ], - deps = select({ - "@bazel_tools//src/conditions:windows": [], - "//conditions:default": [ - "@com_google_zetasql//zetasql/public:analyzer_options", - "@com_google_zetasql//zetasql/public:evaluator", - "@com_google_zetasql//zetasql/public:value", - ], - }) + [ - "@com_github_tensorflow_metadata//tensorflow_metadata/proto/v0:cc_metadata_v0_proto_cc", - "@com_google_absl//absl/status", - "@com_google_absl//absl/status:statusor", - ], -) - -cc_library( - name = "statistics_sql_submodule", - srcs = select({ - "@bazel_tools//src/conditions:windows": [ - "statistics_sql_submodule_windows.cc", - ], - "//conditions:default": ["statistics_sql_submodule.cc"], - }), - hdrs = ["statistics_sql_submodule.h"], - copts = [ - "-fexceptions", - ], - features = ["-use_header_modules"], - visibility = [ - "//tfx_bsl/cc:__pkg__", - ], - deps = select({ - "@bazel_tools//src/conditions:windows": [], - "//conditions:default": [":sql_util"], - }) + [ - "@com_github_tensorflow_metadata//tensorflow_metadata/proto/v0:cc_metadata_v0_proto_cc", - "@com_google_absl//absl/status", - "@com_google_absl//absl/types:optional", - "@pybind11", - ], -) - cc_test( name = "merge_util_test", srcs = ["merge_util_test.cc"], diff --git a/tfx_bsl/cc/statistics/sql_util.cc b/tfx_bsl/cc/statistics/sql_util.cc deleted file mode 100644 index c3cc9b2a..00000000 --- a/tfx_bsl/cc/statistics/sql_util.cc +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2022 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "tfx_bsl/cc/statistics/sql_util.h" - -#include - -#include "zetasql/public/analyzer_options.h" -#include "zetasql/public/evaluator.h" -#include "zetasql/public/value.h" -#include "absl/status/status.h" -#include "absl/status/statusor.h" -#include "tensorflow_metadata/proto/v0/statistics.pb.h" - -namespace tfx_bsl { -namespace statistics { - -using tensorflow::metadata::v0::FeatureNameStatistics; - -namespace { - -absl::StatusOr GetResult( - const absl::StatusOr& result_or) { - // Potentially convert from zetasql to absl on OSS. - if (!result_or.ok()) return absl::StatusOr(result_or.status()); - zetasql::Value value = result_or.value(); - if (value.is_null()) { - // Maybe this should be false and not an error state? - return absl::InternalError("Validation predicate returned null."); - } - if (!value.is_valid()) { - return absl::InternalError("Validate predicate returned invalid value."); - } - if (!value.type()->IsBool()) { - return absl::InternalError("Validation predicate returned non-bool."); - } - return value.bool_value(); -} - -} // namespace - -absl::StatusOr EvaluatePredicate( - const FeatureNameStatistics& feature_statistics, - const std::string& query) { - zetasql::TypeFactory type_factory; - const zetasql::ProtoType* proto_type; - const absl::Status make_proto_type_status = type_factory.MakeProtoType( - FeatureNameStatistics::descriptor(), - &proto_type); - if (!make_proto_type_status.ok()) return make_proto_type_status; - - zetasql::AnalyzerOptions options; - const absl::Status analyzer_options_status = - options.SetInScopeExpressionColumn("feature", proto_type); - - - // TODO(zwestrick): Use TFX_BSL_RETURN_IF_ERROR once status utils stops - // bringing in arrow libs. - if (!analyzer_options_status.ok()) return analyzer_options_status; - - zetasql::PreparedExpression expr(query); - const absl::Status expr_status = expr.Prepare(options); - if (!expr_status.ok()) return expr_status; - - return GetResult(expr.Execute( - {{"feature", zetasql::values::Proto(proto_type, feature_statistics)}})); -} - -absl::StatusOr EvaluatePredicate( - const FeatureNameStatistics& - feature_statistics_base, - const FeatureNameStatistics& - feature_statistics_test, - - const std::string& query) { - zetasql::TypeFactory type_factory; - const zetasql::ProtoType* proto_type; - const absl::Status make_proto_type_status = type_factory.MakeProtoType( - FeatureNameStatistics::descriptor(), - &proto_type); - if (!make_proto_type_status.ok()) return make_proto_type_status; - - zetasql::AnalyzerOptions options; - for (const auto& name : {"feature_base", "feature_test"}) { - const absl::Status analyzer_options_status = - options.AddExpressionColumn(name, proto_type); - if (!analyzer_options_status.ok()) return analyzer_options_status; - } - - zetasql::PreparedExpression expr(query); - const absl::Status expr_status = expr.Prepare(options); - if (!expr_status.ok()) return expr_status; - - return GetResult(expr.Execute( - {{"feature_base", - zetasql::values::Proto(proto_type, feature_statistics_base)}, - {"feature_test", - zetasql::values::Proto(proto_type, feature_statistics_test)}})); -} - -} // namespace statistics -} // namespace tfx_bsl diff --git a/tfx_bsl/cc/statistics/sql_util.h b/tfx_bsl/cc/statistics/sql_util.h deleted file mode 100644 index e1a02977..00000000 --- a/tfx_bsl/cc/statistics/sql_util.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2022 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#ifndef THIRD_PARTY_PY_TFX_BSL_CC_STATISTICS_SQL_UTIL_H_ -#define THIRD_PARTY_PY_TFX_BSL_CC_STATISTICS_SQL_UTIL_H_ - -#include - -#include "absl/status/statusor.h" -#include "tensorflow_metadata/proto/v0/statistics.pb.h" - -namespace tfx_bsl { -namespace statistics { - -// APIs are experimental. - -// Evaluates a GoogleSQL expression returning a bool with `feature_statistics` -// bound to `feature`. -absl::StatusOr EvaluatePredicate( - const tensorflow::metadata::v0::FeatureNameStatistics& feature_statistics, - const std::string& query); - -// Evaluates a GoogleSQL expression returning a bool with -// `feature_statistics_base` bound to `feature_base` and -// `feature_statistics_test` bound to `feature_test`. -absl::StatusOr EvaluatePredicate( - const tensorflow::metadata::v0::FeatureNameStatistics& - feature_statistics_base, - const tensorflow::metadata::v0::FeatureNameStatistics& - feature_statistics_test, - const std::string& query); - -} // namespace statistics -} // namespace tfx_bsl - -#endif // THIRD_PARTY_PY_TFX_BSL_CC_STATISTICS_SQL_UTIL_H_ diff --git a/tfx_bsl/cc/statistics/statistics_sql_submodule.cc b/tfx_bsl/cc/statistics/statistics_sql_submodule.cc deleted file mode 100644 index b3a2e054..00000000 --- a/tfx_bsl/cc/statistics/statistics_sql_submodule.cc +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2022 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "tfx_bsl/cc/statistics/statistics_sql_submodule.h" - -#include -#include - -#include "absl/status/status.h" -#include "absl/types/optional.h" -#include "tfx_bsl/cc/statistics/sql_util.h" -#include "pybind11/attr.h" -#include "pybind11/pybind11.h" -#include "tensorflow_metadata/proto/v0/statistics.pb.h" - -namespace tfx_bsl { -namespace { -namespace py = pybind11; - -using tensorflow::metadata::v0::FeatureNameStatistics; - -void DefineEvaluatePredicate(py::module m) { - m.def( - "EvaluateUnaryStatsPredicate", - [](const std::string& feature_stats_serialized, - const std::string& query) { - FeatureNameStatistics feature_stats; - if (!feature_stats.ParseFromString(feature_stats_serialized)) { - throw std::runtime_error("Failed to parse FeatureNameStatistics."); - } - absl::StatusOr result_or = - statistics::EvaluatePredicate(feature_stats, query); - if (!result_or.ok()) { - throw std::runtime_error(result_or.status().ToString()); - } - return result_or.value(); - }, - py::doc("Evaluates a SQL predicate over a single FeatureNameStatistics " - "proto bound to 'feature'."), - py::call_guard()); - m.def( - "EvaluateBinaryStatsPredicate", - [](const std::string& base_feature_stats_serialized, - const std::string& test_feature_stats_serialized, - const std::string& query) { - FeatureNameStatistics base_feature_stats; - if (!base_feature_stats.ParseFromString( - base_feature_stats_serialized)) { - throw std::runtime_error("Failed to parse FeatureNameStatistics."); - } - FeatureNameStatistics test_feature_stats; - if (!test_feature_stats.ParseFromString( - test_feature_stats_serialized)) { - throw std::runtime_error("Failed to parse FeatureNameStatistics."); - } - absl::StatusOr result_or = statistics::EvaluatePredicate( - base_feature_stats, test_feature_stats, query); - if (!result_or.ok()) { - throw std::runtime_error(result_or.status().ToString()); - } - return result_or.value(); - }, - py::doc("Evaluates a SQL predicate over a pair of FeatureNameStatistics " - "protos bound to 'feature_base' and 'feature_test'."), - py::call_guard()); -} -} // namespace - -void DefineStatisticsSqlSubmodule(py::module main_module) { - auto m = main_module.def_submodule("statistics_sql"); - m.doc() = "Pybind11 bindings for (TFDV) statistics sql utilities."; - DefineEvaluatePredicate(m); -} - -} // namespace tfx_bsl diff --git a/tfx_bsl/cc/tfx_bsl_extension.cc b/tfx_bsl/cc/tfx_bsl_extension.cc index 24853501..9d5143d6 100644 --- a/tfx_bsl/cc/tfx_bsl_extension.cc +++ b/tfx_bsl/cc/tfx_bsl_extension.cc @@ -25,7 +25,6 @@ #include "tfx_bsl/cc/coders/coders_submodule.h" #include "tfx_bsl/cc/sketches/sketches_submodule.h" #include "tfx_bsl/cc/statistics/statistics_submodule.h" -#include "tfx_bsl/cc/statistics/statistics_sql_submodule.h" #include "pybind11/pybind11.h" #ifndef TFX_BSL_USE_ARROW_C_ABI @@ -51,7 +50,6 @@ PYBIND11_MODULE( DefineCodersSubmodule(m); DefineSketchesSubmodule(m); DefineStatisticsSubmodule(m); - DefineStatisticsSqlSubmodule(m); } } // namespace tfx_bsl diff --git a/tfx_bsl/statistics/sql_util.py b/tfx_bsl/statistics/sql_util.py deleted file mode 100644 index 01362c67..00000000 --- a/tfx_bsl/statistics/sql_util.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Provides SQL based validation helpers. Experimental.""" - -from tfx_bsl import statistics -from tensorflow_metadata.proto.v0 import statistics_pb2 - - -def EvaluateUnaryStatsPredicate(feature: statistics_pb2.FeatureNameStatistics, - query: str) -> bool: - """Evaluates a GoogleSQL expression over a single FeatureNameStatistics. - - Args: - feature: statistics for one feature. - query: A GoogleSQL expression resolving to a boolean value. The passed - feature statistics are bound to `feature`. See sql_util_test.py for usage. - - Returns: - The result of the query. - - Raises: - RuntimeError: On failure. - """ - - return statistics.EvaluateUnaryStatsPredicate(feature.SerializeToString(), - query) - - -def EvaluateBinaryStatsPredicate( - feature_base: statistics_pb2.FeatureNameStatistics, - feature_test: statistics_pb2.FeatureNameStatistics, query: str) -> bool: - """Evaluates a GoogleSQL expression over a pair of FeatureNameStatistics. - - Args: - feature_base: baseline statistics. - feature_test: test statistics. - query: A GoogleSQL expression resolving to a boolean value. The passed - feature statistics are bound to `feature_base` and `feature_test`. See - sql_util_test.py for usage. - - Returns: - The result of the query. - - Raises: - RuntimeError: On failure. - """ - return statistics.EvaluateBinaryStatsPredicate( - feature_base.SerializeToString(), feature_test.SerializeToString(), query) diff --git a/tfx_bsl/statistics/sql_util_test.py b/tfx_bsl/statistics/sql_util_test.py deleted file mode 100644 index 4fcdaa02..00000000 --- a/tfx_bsl/statistics/sql_util_test.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tests for sql_util.""" - -import sys -import unittest - -from absl.testing import absltest -from absl.testing import parameterized -from tfx_bsl.statistics import sql_util - -from google.protobuf import text_format -from tensorflow_metadata.proto.v0 import statistics_pb2 - -_FEATURE_STATS = text_format.Parse( - """ - type: FLOAT - num_stats { - mean: 2.6666666666666665 - histograms { - num_nan: 1 - } - histograms { - num_nan: 1 - type: QUANTILES - } - } - """, statistics_pb2.FeatureNameStatistics()) - -_FEATURE_STATS_TEST = text_format.Parse( - """ - type: FLOAT - num_stats { - mean: 98 - } - """, statistics_pb2.FeatureNameStatistics()) - - -class SqlUtilTest(parameterized.TestCase): - - @parameterized.named_parameters( - { - 'testcase_name': 'simple_return_true', - 'query': 'feature.num_stats.mean > 0', - 'expected_result': True, - }, { - 'testcase_name': 'implicit_feature_return_true', - 'query': 'num_stats.mean > 0', - 'expected_result': True, - }, { - 'testcase_name': 'simple_return_false', - 'query': 'feature.num_stats.mean < 0', - 'expected_result': False, - }, { - 'testcase_name': - 'aggregate_return_true', - 'query': - '(SELECT MAX(hist.num_nan) > 0 FROM UNNEST(feature.num_stats.histograms) as hist)', - 'expected_result': - True, - }, { - 'testcase_name': 'returns_wrong_type', - 'query': '(SELECT \'foo\')', - 'expect_error': True, - }, { - 'testcase_name': - 'returns_multiple_values', - 'query': - '(SELECT hist.num_nan FROM UNNEST(feature.num_stats.histograms) as hist)', - 'expect_error': - True, - }, { - 'testcase_name': 'invalid_query', - 'query': 'wiggle woggle', - 'expect_error': True, - }) - @unittest.skipIf( - sys.platform.startswith('win'), - 'SQL based validation is not supported on Windows.') - def test_unary_predicate(self, - query, - expect_error=False, - expected_result=None): - if expect_error: - with self.assertRaises(RuntimeError): - sql_util.EvaluateUnaryStatsPredicate(_FEATURE_STATS, query) - else: - self.assertEqual( - sql_util.EvaluateUnaryStatsPredicate(_FEATURE_STATS, query), - expected_result) - - @parameterized.named_parameters( - { - 'testcase_name': 'simple_return_true', - 'query': 'feature_base.num_stats.mean < feature_test.num_stats.mean', - 'expected_result': True, - }, { - 'testcase_name': 'simple_return_false', - 'query': 'feature_base.num_stats.mean > feature_test.num_stats.mean', - 'expected_result': False, - }) - @unittest.skipIf( - sys.platform.startswith('win'), - 'SQL based validation is not supported on Windows.') - def test_binary_predicate(self, query, expected_result=None): - - self.assertEqual( - sql_util.EvaluateBinaryStatsPredicate(_FEATURE_STATS, - _FEATURE_STATS_TEST, query), - expected_result) - - -if __name__ == '__main__': - absltest.main() diff --git a/third_party/BUILD b/third_party/BUILD index e1ea3327..b8ebe881 100644 --- a/third_party/BUILD +++ b/third_party/BUILD @@ -4,9 +4,3 @@ exports_files([ "pybind11.BUILD", "python_configure.bzl", ]) - -filegroup( - name = "zetasql_patch", - srcs = ["zetasql.patch"], - visibility = ["//visibility:public"], -) diff --git a/third_party/zetasql.patch b/third_party/zetasql.patch deleted file mode 100644 index c90d3b21..00000000 --- a/third_party/zetasql.patch +++ /dev/null @@ -1,674 +0,0 @@ -diff --git a/zetasql/analyzer/BUILD b/zetasql/analyzer/BUILD -index 590f1be1..3ca15df4 100644 ---- a/zetasql/analyzer/BUILD -+++ b/zetasql/analyzer/BUILD -@@ -18,7 +18,7 @@ load("@bazel_skylib//:bzl_library.bzl", "bzl_library") - load(":builddefs.bzl", "gen_analyzer_test") - - package( -- default_visibility = ["//zetasql/base:zetasql_implementation"], -+ default_visibility = ["//visibility:public"], - ) - - filegroup( -diff --git a/zetasql/analyzer/expr_resolver_helper.cc b/zetasql/analyzer/expr_resolver_helper.cc -index 93c3654d..8fb2256e 100644 ---- a/zetasql/analyzer/expr_resolver_helper.cc -+++ b/zetasql/analyzer/expr_resolver_helper.cc -@@ -357,7 +357,8 @@ ExprResolutionInfo::ExprResolutionInfo( - : ExprResolutionInfo( - query_resolution_info_in, name_scope_in, aggregate_name_scope_in, - analytic_name_scope_in, -- {.allows_aggregation = allows_aggregation_in, -+ ExprResolutionInfoOptions{ -+ .allows_aggregation = allows_aggregation_in, - .allows_analytic = allows_analytic_in, - .use_post_grouping_columns = use_post_grouping_columns_in, - .clause_name = clause_name_in, -diff --git a/zetasql/analyzer/name_scope.cc b/zetasql/analyzer/name_scope.cc -index b9a3176f..c1cf274a 100644 ---- a/zetasql/analyzer/name_scope.cc -+++ b/zetasql/analyzer/name_scope.cc -@@ -1549,7 +1549,7 @@ NameList::AddRangeVariableInWrappingNameList( - // variables, including for value tables, so we use `flatten_to_table` - // which drops range variables. - ZETASQL_RETURN_IF_ERROR(range_variable_name_list->MergeFrom( -- *original_name_list, ast_location, {.flatten_to_table = true})); -+ *original_name_list, ast_location, MergeOptions{.flatten_to_table = true})); - - auto wrapper_name_list = std::make_shared(); - ZETASQL_RETURN_IF_ERROR( -# diff --git a/bazel/zetasql_deps_step_2.bzl b/bazel/zetasql_deps_step_2.bzl -# index 6873dbe9..872ffd5e 100644 -# --- a/bazel/zetasql_deps_step_2.bzl -# +++ b/bazel/zetasql_deps_step_2.bzl -# @@ -477,7 +477,6 @@ alias( -# flex_register_toolchains(version = "2.6.4") -# bison_register_toolchains(version = "3.3.2") -# go_rules_dependencies() -# - go_register_toolchains(version = "1.21.6") -# gazelle_dependencies() -# textmapper_dependencies() - -diff --git a/zetasql/analyzer/resolver_expr.cc b/zetasql/analyzer/resolver_expr.cc -index 6116b4f7..70e8c9fd 100644 ---- a/zetasql/analyzer/resolver_expr.cc -+++ b/zetasql/analyzer/resolver_expr.cc -@@ -5586,7 +5586,8 @@ absl::Status Resolver::ResolveAnalyticFunctionCall( - { - ExprResolutionInfo analytic_arg_resolution_info( - expr_resolution_info, -- {.name_scope = expr_resolution_info->analytic_name_scope, -+ ExprResolutionInfoOptions{ -+ .name_scope = expr_resolution_info->analytic_name_scope, - .allows_analytic = expr_resolution_info->allows_analytic, - .clause_name = expr_resolution_info->clause_name}); - ZETASQL_RETURN_IF_ERROR(ResolveExpressionArguments( - -diff --git a/zetasql/base/BUILD b/zetasql/base/BUILD -index aa1f00da..7d4c3b3a 100644 ---- a/zetasql/base/BUILD -+++ b/zetasql/base/BUILD -@@ -15,7 +15,7 @@ - - licenses(["notice"]) - --package(default_visibility = [":zetasql_implementation"]) -+package(default_visibility = ["//visibility:public"]) - - package_group( - name = "zetasql_implementation", -diff --git a/zetasql/base/testing/BUILD b/zetasql/base/testing/BUILD -index 10596497..239c670f 100644 ---- a/zetasql/base/testing/BUILD -+++ b/zetasql/base/testing/BUILD -@@ -16,7 +16,7 @@ - - licenses(["notice"]) - --package(default_visibility = ["//zetasql/base:zetasql_implementation"]) -+package(default_visibility = ["//visibility:public"]) - - # A drop in replacement for gtest_main that parsers absl flags - cc_library( -diff --git a/zetasql/common/BUILD b/zetasql/common/BUILD -index cdafb15e..761e13cd 100644 ---- a/zetasql/common/BUILD -+++ b/zetasql/common/BUILD -@@ -14,7 +14,7 @@ - # limitations under the License. - - package( -- default_visibility = ["//zetasql/base:zetasql_implementation"], -+ default_visibility = ["//visibility:public"], - features = ["parse_headers"], - ) - -diff --git a/zetasql/common/internal_value.h b/zetasql/common/internal_value.h -index 770333d2..617ef628 100644 ---- a/zetasql/common/internal_value.h -+++ b/zetasql/common/internal_value.h -@@ -116,7 +116,7 @@ class InternalValue { - static std::string FormatInternal(const Value& x, - bool include_array_ordereness - ) { -- return x.FormatInternal({ -+ return x.FormatInternal(Type::FormatValueContentOptions{ - .force_type_at_top_level = true, - .include_array_ordereness = include_array_ordereness, - .indent = 0, -diff --git a/zetasql/parser/BUILD b/zetasql/parser/BUILD -index 433cf157..4fa4417c 100644 ---- a/zetasql/parser/BUILD -+++ b/zetasql/parser/BUILD -@@ -26,7 +26,7 @@ load("//bazel:textmapper.bzl", "tm_syntax") - load(":builddefs.bzl", "gen_parser_test") - - package( -- default_visibility = ["//zetasql/base:zetasql_implementation"], -+ default_visibility = ["//visibility:public"], - ) - - genrule( -diff --git a/zetasql/public/types/BUILD b/zetasql/public/types/BUILD -index 2b42fdcb..19ff2a4e 100644 ---- a/zetasql/public/types/BUILD -+++ b/zetasql/public/types/BUILD -@@ -14,7 +14,7 @@ - # limitations under the License. - # - --package(default_visibility = ["//zetasql/base:zetasql_implementation"]) -+package(default_visibility = ["//visibility:public"]) - - cc_library( - name = "types", - -diff --git a/zetasql/public/value.cc b/zetasql/public/value.cc -index 7aeffb01..c9f9f9dc 100644 ---- a/zetasql/public/value.cc -+++ b/zetasql/public/value.cc -@@ -1067,7 +1067,7 @@ std::string Value::DebugString(bool verbose) const { - - // Format will wrap arrays and structs. - std::string Value::Format(bool print_top_level_type) const { -- return FormatInternal( -+ return FormatInternal(Type::FormatValueContentOptions - {.force_type_at_top_level = print_top_level_type, .indent = 0}); - } - -@@ -1335,7 +1335,7 @@ std::string Value::FormatInternal( - std::vector element_strings(elements().size()); - for (int i = 0; i < elements().size(); ++i) { - element_strings[i] = -- elements()[i].FormatInternal(options.IncreaseIndent()); -+ elements()[i].FormatInternal(Type::FormatValueContentOptions{options.IncreaseIndent()}); - } - // Sanitize any '$' characters before creating substitution template. "$$" - // is replaced by "$" in the output from absl::Substitute. -@@ -1377,7 +1377,7 @@ std::string Value::FormatInternal( - const StructType* struct_type = type()->AsStruct(); - std::vector field_strings(struct_type->num_fields()); - for (int i = 0; i < struct_type->num_fields(); i++) { -- field_strings[i] = fields()[i].FormatInternal(options.IncreaseIndent()); -+ field_strings[i] = fields()[i].FormatInternal(Type::FormatValueContentOptions{options.IncreaseIndent()}); - } - // Sanitize any '$' characters before creating substitution template. "$$" - // is replaced by "$" in the output from absl::Substitute. -@@ -1423,9 +1423,9 @@ std::string Value::FormatInternal( - } - std::vector boundaries_strings; - boundaries_strings.push_back( -- start().FormatInternal(options.IncreaseIndent())); -+ start().FormatInternal(Type::FormatValueContentOptions{options.IncreaseIndent()})); - boundaries_strings.push_back( -- end().FormatInternal(options.IncreaseIndent())); -+ end().FormatInternal(Type::FormatValueContentOptions{options.IncreaseIndent()})); - // Sanitize any '$' characters before creating substitution template. "$$" - // is replaced by "$" in the output from absl::Substitute. - std::string templ = -diff --git a/zetasql/reference_impl/algebrizer.cc b/zetasql/reference_impl/algebrizer.cc -index 2e1258ab..48a3d7f4 100644 ---- a/zetasql/reference_impl/algebrizer.cc -+++ b/zetasql/reference_impl/algebrizer.cc -@@ -6738,7 +6738,7 @@ absl::StatusOr> Algebrizer::AlgebrizeTvfScan( - ZETASQL_RET_CHECK(tvf_scan->signature()->argument(i).is_scalar()); - ZETASQL_ASSIGN_OR_RETURN(auto expr_argument, - AlgebrizeExpression(argument->expr())); -- arguments.push_back({.value = std::move(expr_argument)}); -+ arguments.push_back(TVFOp::TVFOpArgument{.value = std::move(expr_argument)}); - continue; - } - -@@ -6767,14 +6767,14 @@ absl::StatusOr> Algebrizer::AlgebrizeTvfScan( - columns.push_back({relation_signature_column.name, - argument_column.type(), input_variable}); - } -- arguments.push_back({.relation = TVFOp::TvfInputRelation{ -+ arguments.push_back(TVFOp::TVFOpArgument{.relation = TVFOp::TvfInputRelation{ - std::move(relation), std::move(columns)}}); - continue; - } - - if (argument->model() != nullptr) { - ZETASQL_RET_CHECK(tvf_scan->signature()->argument(i).is_model()); -- arguments.push_back({.model = argument->model()->model()}); -+ arguments.push_back(TVFOp::TVFOpArgument{.model = argument->model()->model()}); - continue; - } - -diff --git a/zetasql/reference_impl/relational_op.cc b/zetasql/reference_impl/relational_op.cc -index 1619590a..a18a733f 100644 ---- a/zetasql/reference_impl/relational_op.cc -+++ b/zetasql/reference_impl/relational_op.cc -@@ -835,11 +835,11 @@ absl::StatusOr> TVFOp::CreateIterator( - } - ZETASQL_RET_CHECK_EQ(columns.size(), tuple_indexes.size()); - input_arguments.push_back( -- {.relation = {std::make_unique( -+ TableValuedFunction::TvfEvaluatorArg{.relation = {std::make_unique( - std::move(columns), std::move(tuple_indexes), context, - std::move(tuple_iterator))}}); - } else if (argument.model) { -- input_arguments.push_back({.model = argument.model}); -+ input_arguments.push_back(TableValuedFunction::TvfEvaluatorArg{.model = argument.model}); - } else { - ZETASQL_RET_CHECK_FAIL() << "Unexpected TVFOpArgument"; - } - -diff --git a/bazel/zetasql_deps_step_2.bzl b/bazel/zetasql_deps_step_2.bzl -index 6873dbe9..223f8dbd 100644 ---- a/bazel/zetasql_deps_step_2.bzl -+++ b/bazel/zetasql_deps_step_2.bzl -@@ -19,7 +19,6 @@ - load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies", "go_repository") - load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - load("@io_bazel_rules_go//go:deps.bzl", "go_register_toolchains", "go_rules_dependencies") --load("@llvm_toolchain//:toolchains.bzl", "llvm_register_toolchains") - load("@rules_bison//bison:bison.bzl", "bison_register_toolchains") - load("@rules_flex//flex:flex.bzl", "flex_register_toolchains") - load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies") -@@ -29,7 +28,6 @@ load("@rules_proto//proto:setup.bzl", "rules_proto_setup") - load("@rules_proto//proto:toolchains.bzl", "rules_proto_toolchains") - - def _load_deps_from_step_1(): -- llvm_register_toolchains() - rules_foreign_cc_dependencies() - - def textmapper_dependencies(): -@@ -49,21 +47,29 @@ def textmapper_dependencies(): - go_repository( - name = "dev_lsp_go_jsonrpc2", - importpath = "go.lsp.dev/jsonrpc2", -+ remote = "https://github.com/go-language-server/jsonrpc2", -+ vcs = "git", - commit = "8c68d4fd37cd4bd06b62b3243f0d2292c681d164", - ) - go_repository( - name = "dev_lsp_go_protocol", - importpath = "go.lsp.dev/protocol", -+ remote = "https://github.com/go-language-server/protocol", -+ vcs = "git", - commit = "da30f9ae0326cc45b76adc5cd8920ac1ffa14a15", - ) - go_repository( - name = "dev_lsp_go_uri", - importpath = "go.lsp.dev/uri", -+ remote = "https://github.com/go-language-server/uri", -+ vcs = "git", - commit = "63eaac75cc850f596be19073ff6d4ec198603779", - ) - go_repository( - name = "dev_lsp_go_pkg", - importpath = "go.lsp.dev/pkg", -+ remote = "https://github.com/go-language-server/pkg", -+ vcs = "git", - commit = "384b27a52fb2b5d74d78cfe89c7738e9a3e216a5", - ) - go_repository( -@@ -477,7 +483,6 @@ alias( - flex_register_toolchains(version = "2.6.4") - bison_register_toolchains(version = "3.3.2") - go_rules_dependencies() -- go_register_toolchains(version = "1.21.6") - gazelle_dependencies() - textmapper_dependencies() - - -diff --git a/bazel/zetasql_deps_step_1.bzl b/bazel/zetasql_deps_step_1.bzl -index 825bf8ea..7edd1352 100644 ---- a/bazel/zetasql_deps_step_1.bzl -+++ b/bazel/zetasql_deps_step_1.bzl -@@ -22,25 +22,11 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - # but depend on them being something different. So we have to override them both - # by defining the repo first. - load("@com_google_zetasql//bazel:zetasql_bazel_version.bzl", "zetasql_bazel_version") --load("@toolchains_llvm//toolchain:deps.bzl", "bazel_toolchain_dependencies") --load("@toolchains_llvm//toolchain:rules.bzl", "llvm_toolchain") - - def zetasql_deps_step_1(add_bazel_version = True): - if add_bazel_version: - zetasql_bazel_version() - -- bazel_toolchain_dependencies() -- llvm_toolchain( -- name = "llvm_toolchain", -- llvm_versions = { -- "": "16.0.0", -- # The LLVM repo stops providing pre-built binaries for the MacOS x86_64 -- # architecture for versions >= 16.0.0: https://github.com/llvm/llvm-project/releases, -- # but our Kokoro MacOS tests are still using x86_64 (ventura). -- # TODO: Upgrade the MacOS version to sonoma-slcn. -- "darwin-x86_64": "15.0.7", -- }, -- ) - - http_archive( - name = "io_bazel_rules_go", - -diff --git a/bazel/grpc_extra_deps.patch b/bazel/grpc_extra_deps.patch -index 771761b3..9c1b1cee 100644 ---- a/bazel/grpc_extra_deps.patch -+++ b/bazel/grpc_extra_deps.patch -@@ -13,3 +13,41 @@ index 4d8afa3131..b42224501f 100644 - # Pull-in the go 3rd party dependencies for protoc_gen_validate, which is - # needed for building C++ xDS protos - go_third_party() -+ -+ diff --git a/BUILD b/BUILD -+ index 3b5d7e5e3c..c5d61e6e4c 100644 -+ --- a/BUILD -+ +++ b/BUILD -+ @@ -544,6 +544,7 @@ grpc_cc_library( -+ defines = ["GRPC_NO_XDS"], -+ external_deps = [ -+ "absl/base:core_headers", -+ + "absl/status", -+ + "absl/strings", -+ ], -+ language = "c++", -+ public_hdrs = GRPC_PUBLIC_HDRS, -+ -+ diff --git a/include/grpcpp/impl/status.h b/include/grpcpp/impl/status.h -+ index 95436ab8fb..fe9f44adf0 100644 -+ --- a/include/grpcpp/impl/status.h -+ +++ b/include/grpcpp/impl/status.h -+ @@ -23,6 +23,7 @@ -+ -+ #include -+ -+ +#include "absl/status/status.h" -+ #include -+ #include -+ #include -+ @@ -99,6 +100,10 @@ class GRPC_MUST_USE_RESULT_WHEN_USE_STRICT_WARNING Status { -+ Status(StatusCode code, const std::string& error_message) -+ : code_(code), error_message_(error_message) {} -+ -+ + operator absl::Status() const& { -+ + return absl::Status(static_cast(code_), error_message_); -+ + } -+ + -+ /// Construct an instance with \a code, \a error_message and -+ /// \a error_details. It is an error to construct an OK status with non-empty -+ /// \a error_message and/or \a error_details. -+ - -diff --git a/bazel/icu4c-64_2.patch b/bazel/icu4c-64_2.patch -index 69d12b63..a23bdcaf 100644 ---- a/bazel/icu4c-64_2.patch -+++ b/bazel/icu4c-64_2.patch -@@ -5,7 +5,7 @@ - CXX = @CXX@ - AR = @AR@ - -ARFLAGS = @ARFLAGS@ r --+ARFLAGS = @ARFLAGS@ -crs -++ARFLAGS = @ARFLAGS@ - RANLIB = @RANLIB@ - COMPILE_LINK_ENVVAR = @COMPILE_LINK_ENVVAR@ - UCLN_NO_AUTO_CLEANUP = @UCLN_NO_AUTO_CLEANUP@ - - diff --git a/bazel/icu.BUILD b/bazel/icu.BUILD -index be36d7de..f61d8f3c 100644 ---- a/bazel/icu.BUILD -+++ b/bazel/icu.BUILD -@@ -35,20 +35,17 @@ filegroup( - configure_make( - name = "icu", - configure_command = "source/configure", -- args = select({ -- # AR is overridden to be libtool by rules_foreign_cc. It does not support ar style arguments -- # like "r". We need to prevent the icu make rules from adding unsupported parameters by -- # forcing ARFLAGS to keep the rules_foreign_cc value in this parameter -- "@platforms//os:macos": [ -- "ARFLAGS=\"-static -o\"", -- "MAKE=gnumake", -- ], -- "//conditions:default": [], -- }), -- env = { -- "CXXFLAGS": "-fPIC", # For JNI -- "CFLAGS": "-fPIC", # For JNI -- }, -+ env = select({ -+ "@platforms//os:macos": { -+ "AR": "", -+ "CXXFLAGS": "-fPIC", # For JNI -+ "CFLAGS": "-fPIC", # For JNI -+ }, -+ "//conditions:default": { -+ "CXXFLAGS": "-fPIC", # For JNI -+ "CFLAGS": "-fPIC", # For JNI -+ }, -+ }), - configure_options = [ - "--enable-option-checking", - "--enable-static", - - -diff --git a/zetasql/public/constant.h b/zetasql/public/constant.h -index 946183b0..03ac17e0 100644 ---- a/zetasql/public/constant.h -+++ b/zetasql/public/constant.h -@@ -80,7 +80,7 @@ class Constant { - const std::vector& name_path() const { return name_path_; } - - // Returns the type of this Constant. -- virtual const Type* type() const = 0; -+ virtual const zetasql::Type* type() const = 0; - - // Returns whether or not this Constant is a specific constant interface or - // implementation. - -diff --git a/zetasql/public/property_graph.h b/zetasql/public/property_graph.h -index 53ccca23..0eefe780 100644 ---- a/zetasql/public/property_graph.h -+++ b/zetasql/public/property_graph.h -@@ -348,7 +348,7 @@ class GraphPropertyDeclaration { - return ::zetasql::FullName(PropertyGraphNamePath(), Name()); - } - -- virtual const Type* Type() const = 0; -+ virtual const zetasql::Type* Type() const = 0; - - // Returns whether or not this GraphPropertyDeclaration is a specific - // interface or implementation. - -diff --git a/zetasql/analyzer/resolver_expr.cc b/zetasql/analyzer/resolver_expr.cc -index 51d095ab..8ba1eefc 100644 ---- a/zetasql/analyzer/resolver_expr.cc -+++ b/zetasql/analyzer/resolver_expr.cc -@@ -2996,7 +2996,7 @@ class SystemVariableConstant final : public Constant { - const Type* type) - : Constant(name_path), type_(type) {} - -- const Type* type() const override { return type_; } -+ const zetasql::Type* type() const override { return type_; } - std::string DebugString() const override { return FullName(); } - std::string ConstantValueDebugString() const override { return ""; } - - -diff --git a/zetasql/public/coercer.cc b/zetasql/public/coercer.cc -index dc4961dd..80d26183 100644 ---- a/zetasql/public/coercer.cc -+++ b/zetasql/public/coercer.cc -@@ -154,7 +154,7 @@ class TypeSuperTypes { - return false; - } - -- const Type* type() const { return type_; } -+ const zetasql::Type* type() const { return type_; } - TypeListView supertypes() const { return supertypes_; } - - std::vector ToVector() const { - -diff --git a/zetasql/public/function_signature.h b/zetasql/public/function_signature.h -index 29886cc2..5436071c 100644 ---- a/zetasql/public/function_signature.h -+++ b/zetasql/public/function_signature.h -@@ -702,7 +702,7 @@ class FunctionArgumentType { - // Returns NULL if kind_ is not ARG_TYPE_FIXED or ARG_TYPE_LAMBDA. If kind_ is - // ARG_TYPE_LAMBDA, returns the type of lambda body type, which could be NULL - // if the body type is templated. -- const Type* type() const { return type_; } -+ const zetasql::Type* type() const { return type_; } - - SignatureArgumentKind kind() const { return kind_; } - -diff --git a/zetasql/public/input_argument_type.h b/zetasql/public/input_argument_type.h -index f2098787..55b416e3 100644 ---- a/zetasql/public/input_argument_type.h -+++ b/zetasql/public/input_argument_type.h -@@ -81,7 +81,7 @@ class InputArgumentType { - ~InputArgumentType() {} - - // This may return nullptr (such as for lambda). -- const Type* type() const { return type_; } -+ const zetasql::Type* type() const { return type_; } - - const std::vector& field_types() const { - return field_types_; - -diff --git a/zetasql/public/simple_catalog.h b/zetasql/public/simple_catalog.h -index 76a94d43..a0d81b9d 100644 ---- a/zetasql/public/simple_catalog.h -+++ b/zetasql/public/simple_catalog.h -@@ -1202,7 +1202,7 @@ class SimpleConstant : public Constant { - const SimpleConstantProto& simple_constant_proto, - const TypeDeserializer& type_deserializer); - -- const Type* type() const override { return value_.type(); } -+ const zetasql::Type* type() const override { return value_.type(); } - - const Value& value() const { return value_; } - - -diff --git a/zetasql/public/sql_constant.h b/zetasql/public/sql_constant.h -index fa88344f..69defd3b 100644 ---- a/zetasql/public/sql_constant.h -+++ b/zetasql/public/sql_constant.h -@@ -60,7 +60,7 @@ class SQLConstant : public Constant { - - // Returns the Type of the resolved Constant based on its resolved - // expression type. -- const Type* type() const override { -+ const zetasql::Type* type() const override { - return constant_expression()->type(); - } - - -diff --git a/zetasql/public/value.h b/zetasql/public/value.h -index 49b60aec..86688538 100644 ---- a/zetasql/public/value.h -+++ b/zetasql/public/value.h -@@ -122,7 +122,7 @@ class Value { - ~Value(); - - // Returns the type of the value. -- const Type* type() const; -+ const zetasql::Type* type() const; - - // Returns the type kind of the value. Same as type()->type_kind() but in some - // cases can be a bit more efficient. -@@ -1152,7 +1152,7 @@ class Value { - - // Returns a pointer to Value's Type. Requires is_valid(). If TypeKind is - // stored in the Metadata, Type pointer is obtained from static TypeFactory. -- const Type* type() const; -+ const zetasql::Type* type() const; - - // Returns true, if instance stores pointer to a Type and false if type's - // kind. - -diff --git a/zetasql/public/value_inl.h b/zetasql/public/value_inl.h -index e917a97a..f324276f 100644 ---- a/zetasql/public/value_inl.h -+++ b/zetasql/public/value_inl.h -@@ -1077,7 +1077,7 @@ class Value::Metadata::ContentLayout<4> { - has_type_(false), - value_extended_content_(value_extended_content) {} - -- const Type* type() const { return type_; } -+ const zetasql::Type* type() const { return type_; } - int32_t value_extended_content() const { return value_extended_content_; } - bool is_null() const { return is_null_; } - bool preserves_order() const { return preserves_order_; } -@@ -1157,7 +1157,7 @@ class Value::Metadata::ContentLayout<8> { - // TODO: wait for fixed clang-format - // clang-format on - -- const Type* type() const { -+ const zetasql::Type* type() const { - return reinterpret_cast(type_ & kTypeMask); - } - int32_t value_extended_content() const { return value_extended_content_; } - -diff --git a/zetasql/reference_impl/operator.h b/zetasql/reference_impl/operator.h -index 24f0ddac..7adb701d 100644 ---- a/zetasql/reference_impl/operator.h -+++ b/zetasql/reference_impl/operator.h -@@ -240,7 +240,7 @@ class ExprArg : public AlgebraArg { - - ~ExprArg() override = default; - -- const Type* type() const { return type_; } -+ const zetasql::Type* type() const { return type_; } - - private: - const Type* type_; - -diff --git a/zetasql/resolved_ast/resolved_column.h b/zetasql/resolved_ast/resolved_column.h -index 912b3ca4..2e613f2a 100644 ---- a/zetasql/resolved_ast/resolved_column.h -+++ b/zetasql/resolved_ast/resolved_column.h -@@ -119,7 +119,7 @@ class ResolvedColumn { - IdString table_name_id() const { return table_name_; } - IdString name_id() const { return name_; } - -- const Type* type() const { return annotated_type_.type; } -+ const zetasql::Type* type() const { return annotated_type_.type; } - - const AnnotationMap* type_annotation_map() const { - return annotated_type_.annotation_map; - -diff --git a/zetasql/testing/test_value.h b/zetasql/testing/test_value.h -index 0412873e..d2d8c3e8 100644 ---- a/zetasql/testing/test_value.h -+++ b/zetasql/testing/test_value.h -@@ -106,7 +106,7 @@ class ValueConstructor { - : v_(v) {} - - const Value& get() const { return v_; } -- const Type* type() const { return v_.type(); } -+ const zetasql::Type* type() const { return v_.type(); } - - static std::vector ToValues(absl::Span slice) { - std::vector values; - - -diff --git a/zetasql/base/logging.h b/zetasql/base/logging.h -index 730ccdcb..46fe06b0 100644 ---- a/zetasql/base/logging.h -+++ b/zetasql/base/logging.h -@@ -59,6 +59,17 @@ inline void ZetaSqlMakeCheckOpValueString(std::ostream *os, const T &v) { - (*os) << v; - } - -+// This overloading is implemented to address the compilation issue when trying to log unique_ptr types -+// At the moment, we are not providing any specific implementation for handling unique_ptr types. -+template -+inline void ZetaSqlMakeCheckOpValueString(std::ostream* os, const std::unique_ptr& v) { -+ if (v == nullptr) { -+ (*os) << "nullptr"; -+ } else { -+ (*os) << v.get(); -+ } -+} -+ - // Overrides for char types provide readable values for unprintable - // characters. - template <> - - - -diff --git a/zetasql/base/testing/BUILD b/zetasql/base/testing/BUILD -index 10596497..a9b69be7 100644 ---- a/zetasql/base/testing/BUILD -+++ b/zetasql/base/testing/BUILD -@@ -55,6 +55,7 @@ cc_library( - "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest", -+ "@com_github_grpc_grpc//:grpc++", - ], - ) - -@@ -69,6 +70,7 @@ cc_test( - "@com_google_absl//absl/status", - "@com_google_absl//absl/status:statusor", - "@com_google_googletest//:gtest", -+ "@com_github_grpc_grpc//:grpc++", - ], - ) - -