Skip to content

Commit f6d036e

Browse files
committed
simdutf_connector: skip UTF-16 BOM
When converting UTF-16 to UTF-8, ingore the BOM so that no UTF-8 BOM is written to the output. Signed-off-by: Erik Cederberg <[email protected]>
1 parent cedf532 commit f6d036e

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

src/simdutf/flb_simdutf_connector.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,10 +216,20 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
216216
return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP;
217217
}
218218
else if ((encoding & simdutf::encoding_type::UTF16_LE) == simdutf::encoding_type::UTF16_LE) {
219+
/* Skip the UTF-16 BOM */
220+
if (length >= 2 && input[0] == '\xFF' && input[1] == '\xFE') {
221+
input += 2;
222+
length -= 2;
223+
}
219224
return convert_from_unicode(flb_simdutf_connector_convert_utf16le_to_utf8,
220225
input, length, output, out_size);
221226
}
222227
else if ((encoding & simdutf::encoding_type::UTF16_BE) == simdutf::encoding_type::UTF16_BE) {
228+
/* Skip the UTF-16 BOM */
229+
if (length >= 2 && input[0] == '\xFE' && input[1] == '\xFF') {
230+
input += 2;
231+
length -= 2;
232+
}
223233
return convert_from_unicode(flb_simdutf_connector_convert_utf16be_to_utf8,
224234
input, length, output, out_size);
225235
}

0 commit comments

Comments
 (0)