Skip to content

Commit e53c05c

Browse files
committed
Enforce utf-8 validity in string() conversion from bytes.
Relies on SIMD-optimized library https://github.com/cyb70289/utf8. PiperOrigin-RevId: 353736473
1 parent bca699e commit e53c05c

File tree

6 files changed

+25
-11
lines changed

6 files changed

+25
-11
lines changed

conformance/BUILD

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,6 @@ cc_binary(
9090
# uncommented when the spec changes to truncation rather than rounding.
9191
"--skip_test=conversions/int/double_nearest,double_nearest_neg,double_half_away_neg,double_half_away_pos",
9292
"--skip_test=conversions/uint/double_nearest,double_nearest_int,double_half_away",
93-
# TODO(issues/82): Unexpected behavior when converting invalid bytes to string.
94-
"--skip_test=conversions/string/bytes_invalid",
9593
# TODO(issues/96): Well-known type conversion support.
9694
"--skip_test=proto2/literal_wellknown",
9795
"--skip_test=proto3/literal_wellknown",

eval/public/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ cc_library(
180180
":cel_function_adapter",
181181
":cel_function_registry",
182182
":cel_options",
183+
"//base:unilib",
183184
"//eval/public/containers:container_backed_list_impl",
184185
"@com_google_absl//absl/numeric:int128",
185186
"@com_google_absl//absl/status",

eval/public/builtin_func_registrar.cc

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "eval/public/cel_options.h"
1717
#include "eval/public/containers/container_backed_list_impl.h"
1818
#include "re2/re2.h"
19+
#include "base/unilib.h"
1920

2021
namespace google {
2122
namespace api {
@@ -1145,16 +1146,15 @@ absl::Status RegisterStringConversionFunctions(
11451146
return absl::OkStatus();
11461147
}
11471148

1148-
// TODO(issues/82): ensure the bytes conversion to string handles UTF-8
1149-
// properly, and avoids unncessary allocations.
1150-
// bytes -> string
1151-
auto status = FunctionAdapter<CelValue::StringHolder, CelValue::BytesHolder>::
1152-
CreateAndRegister(
1149+
auto status =
1150+
FunctionAdapter<CelValue, CelValue::BytesHolder>::CreateAndRegister(
11531151
builtin::kString, false,
1154-
[](Arena* arena,
1155-
CelValue::BytesHolder value) -> CelValue::StringHolder {
1156-
return CelValue::StringHolder(
1157-
Arena::Create<std::string>(arena, std::string(value.value())));
1152+
[](Arena* arena, CelValue::BytesHolder value) -> CelValue {
1153+
if (UniLib::IsStructurallyValid(value.value())) {
1154+
return CelValue::CreateStringView(value.value());
1155+
}
1156+
return CreateErrorValue(arena, "invalid UTF-8 bytes value",
1157+
absl::StatusCode::kInvalidArgument);
11581158
},
11591159
registry);
11601160
if (!status.ok()) return status;

eval/public/builtin_func_test.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1686,6 +1686,15 @@ TEST_F(BuiltinsTest, BytesToString) {
16861686
EXPECT_EQ(result_value.StringOrDie().value(), "abcd");
16871687
}
16881688

1689+
TEST_F(BuiltinsTest, BytesToStringInvalid) {
1690+
std::string input = "\xFF";
1691+
std::vector<CelValue> args = {CelValue::CreateBytes(&input)};
1692+
CelValue result_value;
1693+
ASSERT_NO_FATAL_FAILURE(
1694+
PerformRun(builtin::kString, {}, args, &result_value));
1695+
ASSERT_TRUE(result_value.IsError());
1696+
}
1697+
16891698
TEST_F(BuiltinsTest, StringToString) {
16901699
std::string input = "abcd";
16911700
std::vector<CelValue> args = {CelValue::CreateString(&input)};

eval/public/cel_value.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ class CelValue {
162162

163163
static CelValue CreateString(StringHolder holder) { return CelValue(holder); }
164164

165+
// Returns a string value from a string_view. Warning: the caller is
166+
// responsible for the lifecycle of the backing string. Prefer CreateString
167+
// instead.
165168
static CelValue CreateStringView(absl::string_view value) {
166169
return CelValue(StringHolder(value));
167170
}

testutil/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ cc_library(
4949

5050
cc_library(
5151
name = "test_data_io",
52+
testonly = True,
5253
srcs = [
5354
"test_data_io.cc",
5455
],
@@ -76,6 +77,7 @@ cc_library(
7677
# third_party/cel/spec/testdata/unique_values.textpb
7778
cc_binary(
7879
name = "test_data_gen",
80+
testonly = True,
7981
srcs = [
8082
"test_data_gen.cc",
8183
],
@@ -110,6 +112,7 @@ cc_test(
110112

111113
cc_library(
112114
name = "util",
115+
testonly = True,
113116
hdrs = [
114117
"util.h",
115118
],

0 commit comments

Comments
 (0)