Skip to content

Commit 82f7f8a

Browse files
1. Ignore SentencePiece::BYTE during encoding instead of throwing error
2. Early exit from DecodePrecompiledCharsmap when precompiled_charsmap is empty PiperOrigin-RevId: 826120506
1 parent aa839b1 commit 82f7f8a

File tree

2 files changed

+5
-0
lines changed

2 files changed

+5
-0
lines changed

tensorflow_text/core/kernels/sentencepiece/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ licenses(["notice"])
1010

1111
# Visibility rules
1212
package(default_visibility = [
13+
"//java/com/google/android/apps/pixel/psi:__subpackages__",
1314
"//visibility:public",
1415
])
1516

tensorflow_text/core/kernels/sentencepiece/model_converter.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ DecodePrecompiledCharsmap(
4646
const ::sentencepiece::NormalizerSpec& normalizer_spec) {
4747
// This function "undoes" encoding done by
4848
// sentencepiece::normalizer::Normalizer::EncodePrecompiledCharsMap.
49+
if (normalizer_spec.precompiled_charsmap().empty()) {
50+
return std::make_tuple(std::vector<uint32_t>(), std::vector<int8_t>());
51+
}
4952
const char* precompiled_map = normalizer_spec.precompiled_charsmap().data();
5053
const uint32_t trie_size =
5154
*reinterpret_cast<const uint32_t*>(precompiled_map);
@@ -89,6 +92,7 @@ absl::StatusOr<std::string> ConvertSentencepieceModelToFlatBuffer(
8992
break;
9093
case ::sentencepiece::ModelProto::SentencePiece::UNKNOWN:
9194
case ::sentencepiece::ModelProto::SentencePiece::CONTROL:
95+
case ::sentencepiece::ModelProto::SentencePiece::BYTE:
9296
// Ignore unknown and control codes.
9397
break;
9498
default:

0 commit comments

Comments
 (0)