Skip to content

Commit 44767e4

Browse files
committed
simdutf_connector: skip UTF-16 BOM
When converting UTF-16 to UTF-8, ingore the BOM so that no UTF-8 BOM is written to the output. Signed-off-by: Erik Cederberg <[email protected]>
1 parent ce0ecd8 commit 44767e4

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

src/simdutf/flb_simdutf_connector.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,20 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
215215
return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP;
216216
}
217217
else if ((encoding & simdutf::encoding_type::UTF16_LE) == simdutf::encoding_type::UTF16_LE) {
218+
/* Skip the UTF-16 BOM */
219+
if (length >= 2 && input[0] == '\xFF' && input[1] == '\xFE') {
220+
input += 2;
221+
length -= 2;
222+
}
218223
return convert_from_unicode(flb_simdutf_connector_convert_utf16le_to_utf8,
219224
input, length, output, out_size);
220225
}
221226
else if ((encoding & simdutf::encoding_type::UTF16_BE) == simdutf::encoding_type::UTF16_BE) {
227+
/* Skip the UTF-16 BOM */
228+
if (length >= 2 && input[0] == '\xFE' && input[1] == '\xFF') {
229+
input += 2;
230+
length -= 2;
231+
}
222232
return convert_from_unicode(flb_simdutf_connector_convert_utf16be_to_utf8,
223233
input, length, output, out_size);
224234
}

0 commit comments

Comments
 (0)