1919
2020#include <simdutf.h>
2121#include <fluent-bit/simdutf/flb_simdutf_connector.h>
22- #include <memory.h>
2322#include <memory>
23+ extern "C"
24+ {
25+ #include <fluent-bit/flb_info.h>
26+ #include <fluent-bit/flb_log.h>
27+ #include <fluent-bit/flb_mem.h>
28+ }
29+
30+ typedef int (* conversion_function )(const char16_t * buf , size_t len ,
31+ char * * utf8_output , size_t * out_size );
32+
33+ static int convert_from_unicode (conversion_function convert ,
34+ const char * input , size_t length ,
35+ char * * output , size_t * out_size )
36+ {
37+ size_t len ;
38+ std ::unique_ptr < char16_t , decltype (& flb_free )> temp_buffer (NULL , flb_free );
39+ const char16_t * aligned_input = NULL ;
40+ int status ;
41+
42+ len = length ;
43+ if (len % 2 ) {
44+ len -- ;
45+ }
46+ if (len < 2 ) {
47+ return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
48+ }
49+
50+ /* Check alignment to determine whether to copy or not */
51+ if ((uintptr_t ) input % 2 == 0 ) {
52+ aligned_input = (const char16_t * ) input ;
53+ }
54+ else {
55+ temp_buffer .reset ((char16_t * ) flb_malloc (len ));
56+ if (temp_buffer .get () == NULL ) {
57+ flb_errno ();
58+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
59+ }
60+ memcpy (temp_buffer .get (), input , len );
61+ aligned_input = temp_buffer .get ();
62+ }
63+
64+ return convert (aligned_input , len / 2 , output , out_size );
65+ }
2466
2567int flb_simdutf_connector_utf8_length_from_utf16le (const char16_t * buf , size_t len )
2668{
@@ -61,23 +103,24 @@ int flb_simdutf_connector_convert_utf16le_to_utf8(const char16_t *buf, size_t le
61103 char * * utf8_output , size_t * out_size )
62104{
63105 size_t clen = 0 ;
64- size_t converted = 0 ;
65- simdutf ::result result ;
106+ simdutf ::result result = {};
66107
67108 clen = simdutf ::utf8_length_from_utf16le (buf , len );
68- /* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
69- std ::unique_ptr < char [ ]> output {new char [clen ]};
70- converted = simdutf ::convert_utf16le_to_utf8 (buf , len , output .get ());
71- result = simdutf ::validate_utf8_with_errors (output .get (), clen );
72- if (result .error == simdutf ::error_code ::SUCCESS && converted > 0 ) {
73- std ::string result_string (output .get (), clen );
109+ * utf8_output = (char * ) flb_malloc (clen + 1 );
110+ if (* utf8_output == NULL ) {
111+ flb_errno ();
112+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
113+ }
74114
75- * utf8_output = strdup (result_string .c_str ());
76- * out_size = converted ;
115+ result = simdutf ::convert_utf16le_to_utf8_with_errors (buf , len , * utf8_output );
116+ if (result .error == simdutf ::error_code ::SUCCESS && result .count > 0 ) {
117+ (* utf8_output )[result .count ] = '\0' ;
118+ * out_size = result .count ;
77119
78120 return FLB_SIMDUTF_ERROR_CODE_SUCCESS ;
79121 }
80122 else {
123+ flb_free (* utf8_output );
81124 * utf8_output = NULL ;
82125 * out_size = 0 ;
83126
@@ -89,23 +132,24 @@ int flb_simdutf_connector_convert_utf16be_to_utf8(const char16_t *buf, size_t le
89132 char * * utf8_output , size_t * out_size )
90133{
91134 size_t clen = 0 ;
92- size_t converted = 0 ;
93- simdutf ::result result ;
135+ simdutf ::result result = {};
94136
95137 clen = simdutf ::utf8_length_from_utf16be (buf , len );
96- /* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
97- std ::unique_ptr < char [ ]> output {new char [clen ]};
98- converted = simdutf ::convert_utf16be_to_utf8 (buf , len , output .get ());
99- result = simdutf ::validate_utf8_with_errors (output .get (), clen );
100- if (result .error == simdutf ::error_code ::SUCCESS && converted > 0 ) {
101- std ::string result_string (output .get (), clen );
138+ * utf8_output = (char * ) flb_malloc (clen + 1 );
139+ if (* utf8_output == NULL ) {
140+ flb_errno ();
141+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
142+ }
102143
103- * utf8_output = strdup (result_string .c_str ());
104- * out_size = converted ;
144+ result = simdutf ::convert_utf16be_to_utf8_with_errors (buf , len , * utf8_output );
145+ if (result .error == simdutf ::error_code ::SUCCESS && result .count > 0 ) {
146+ (* utf8_output )[result .count ] = '\0' ;
147+ * out_size = result .count ;
105148
106149 return FLB_SIMDUTF_ERROR_CODE_SUCCESS ;
107150 }
108151 else {
152+ flb_free (* utf8_output );
109153 * utf8_output = NULL ;
110154 * out_size = 0 ;
111155
@@ -117,23 +161,24 @@ int flb_simdutf_connector_convert_utf16_to_utf8(const char16_t *buf, size_t len,
117161 char * * utf8_output , size_t * out_size )
118162{
119163 size_t clen = 0 ;
120- size_t converted = 0 ;
121- simdutf ::result result ;
164+ simdutf ::result result = {};
122165
123166 clen = simdutf ::utf8_length_from_utf16 (buf , len );
124- /* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
125- std ::unique_ptr < char [ ]> output {new char [clen ]};
126- converted = simdutf ::convert_utf16_to_utf8 (buf , len , output .get ());
127- result = simdutf ::validate_utf8_with_errors (output .get (), clen );
128- if (result .error == simdutf ::error_code ::SUCCESS && converted > 0 ) {
129- std ::string result_string (output .get (), clen );
167+ * utf8_output = (char * ) flb_malloc (clen + 1 );
168+ if (* utf8_output == NULL ) {
169+ flb_errno ();
170+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
171+ }
130172
131- * utf8_output = strdup (result_string .c_str ());
132- * out_size = converted ;
173+ result = simdutf ::convert_utf16_to_utf8_with_errors (buf , len , * utf8_output );
174+ if (result .error == simdutf ::error_code ::SUCCESS && result .count > 0 ) {
175+ (* utf8_output )[result .count ] = '\0' ;
176+ * out_size = result .count ;
133177
134178 return FLB_SIMDUTF_ERROR_CODE_SUCCESS ;
135179 }
136180 else {
181+ flb_free (* utf8_output );
137182 * utf8_output = NULL ;
138183 * out_size = 0 ;
139184
@@ -155,11 +200,7 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
155200 const char * input , size_t length ,
156201 char * * output , size_t * out_size )
157202{
158- size_t len = 0 ;
159- size_t i = 0 ;
160203 int encoding = 0 ;
161- std ::u16string str16 ;
162-
163204 if (preferred_encoding == FLB_SIMDUTF_ENCODING_TYPE_UNICODE_AUTO ) {
164205 encoding = simdutf ::detect_encodings (input , length );
165206 }
@@ -175,46 +216,12 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
175216 return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
176217 }
177218 else if ((encoding & simdutf ::encoding_type ::UTF16_LE ) == simdutf ::encoding_type ::UTF16_LE ) {
178- len = length ;
179- if (len % 2 ) {
180- len -- ;
181- }
182- if (len < 2 ) {
183- return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
184- }
185- for (i = 0 ; i < len ;) {
186- if (i + 2 > len ) {
187- break ;
188- }
189- /* little-endian */
190- int lo = input [i ++ ] & 0xFF ;
191- int hi = input [i ++ ] & 0xFF ;
192- str16 .push_back (hi << 8 | lo );
193- }
194-
195- return flb_simdutf_connector_convert_utf16le_to_utf8 (str16 .c_str (), str16 .size (),
196- output , out_size );
219+ return convert_from_unicode (flb_simdutf_connector_convert_utf16le_to_utf8 ,
220+ input , length , output , out_size );
197221 }
198222 else if ((encoding & simdutf ::encoding_type ::UTF16_BE ) == simdutf ::encoding_type ::UTF16_BE ) {
199- len = length ;
200- if (len % 2 ) {
201- len -- ;
202- }
203- if (len < 2 ) {
204- return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
205- }
206- for (i = 0 ; i < len ;) {
207- if (i + 2 > len ) {
208- break ;
209- }
210- /* big-endian */
211- int lo = input [i ++ ] & 0xFF ;
212- int hi = input [i ++ ] & 0xFF ;
213- str16 .push_back (lo | hi << 8 );
214- }
215-
216- return flb_simdutf_connector_convert_utf16be_to_utf8 (str16 .c_str (), str16 .size (),
217- output , out_size );
223+ return convert_from_unicode (flb_simdutf_connector_convert_utf16be_to_utf8 ,
224+ input , length , output , out_size );
218225 }
219226 else {
220227 /* Note: UTF-32LE and UTF-32BE are used for internal usages
0 commit comments