19
19
20
20
#include <simdutf.h>
21
21
#include <fluent-bit/simdutf/flb_simdutf_connector.h>
22
- #include <memory.h>
23
22
#include <memory>
23
+ extern "C"
24
+ {
25
+ #include <fluent-bit/flb_info.h>
26
+ #include <fluent-bit/flb_log.h>
27
+ #include <fluent-bit/flb_mem.h>
28
+ }
29
+
30
+ typedef int (* conversion_function )(const char16_t * buf , size_t len ,
31
+ char * * utf8_output , size_t * out_size );
32
+
33
+ static int convert_from_unicode (conversion_function convert ,
34
+ const char * input , size_t length ,
35
+ char * * output , size_t * out_size )
36
+ {
37
+ size_t len ;
38
+ std ::unique_ptr < char16_t , decltype (& flb_free )> temp_buffer (NULL , flb_free );
39
+ const char16_t * aligned_input = NULL ;
40
+ int status ;
41
+
42
+ len = length ;
43
+ if (len % 2 ) {
44
+ len -- ;
45
+ }
46
+ if (len < 2 ) {
47
+ return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
48
+ }
49
+
50
+ /* Check alignment to determine whether to copy or not */
51
+ if ((uintptr_t ) input % 2 == 0 ) {
52
+ aligned_input = (const char16_t * ) input ;
53
+ }
54
+ else {
55
+ temp_buffer .reset ((char16_t * ) flb_malloc (len ));
56
+ if (temp_buffer .get () == NULL ) {
57
+ flb_errno ();
58
+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
59
+ }
60
+ memcpy (temp_buffer .get (), input , len );
61
+ aligned_input = temp_buffer .get ();
62
+ }
63
+
64
+ return convert (aligned_input , len / 2 , output , out_size );
65
+ }
24
66
25
67
int flb_simdutf_connector_utf8_length_from_utf16le (const char16_t * buf , size_t len )
26
68
{
@@ -61,23 +103,24 @@ int flb_simdutf_connector_convert_utf16le_to_utf8(const char16_t *buf, size_t le
61
103
char * * utf8_output , size_t * out_size )
62
104
{
63
105
size_t clen = 0 ;
64
- size_t converted = 0 ;
65
- simdutf ::result result ;
106
+ simdutf ::result result = {};
66
107
67
108
clen = simdutf ::utf8_length_from_utf16le (buf , len );
68
- /* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
69
- std ::unique_ptr < char [ ]> output {new char [clen ]};
70
- converted = simdutf ::convert_utf16le_to_utf8 (buf , len , output .get ());
71
- result = simdutf ::validate_utf8_with_errors (output .get (), clen );
72
- if (result .error == simdutf ::error_code ::SUCCESS && converted > 0 ) {
73
- std ::string result_string (output .get (), clen );
109
+ * utf8_output = (char * ) flb_malloc (clen + 1 );
110
+ if (* utf8_output == NULL ) {
111
+ flb_errno ();
112
+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
113
+ }
74
114
75
- * utf8_output = strdup (result_string .c_str ());
76
- * out_size = converted ;
115
+ result = simdutf ::convert_utf16le_to_utf8_with_errors (buf , len , * utf8_output );
116
+ if (result .error == simdutf ::error_code ::SUCCESS && result .count > 0 ) {
117
+ (* utf8_output )[result .count ] = '\0' ;
118
+ * out_size = result .count ;
77
119
78
120
return FLB_SIMDUTF_ERROR_CODE_SUCCESS ;
79
121
}
80
122
else {
123
+ flb_free (* utf8_output );
81
124
* utf8_output = NULL ;
82
125
* out_size = 0 ;
83
126
@@ -89,23 +132,24 @@ int flb_simdutf_connector_convert_utf16be_to_utf8(const char16_t *buf, size_t le
89
132
char * * utf8_output , size_t * out_size )
90
133
{
91
134
size_t clen = 0 ;
92
- size_t converted = 0 ;
93
- simdutf ::result result ;
135
+ simdutf ::result result = {};
94
136
95
137
clen = simdutf ::utf8_length_from_utf16be (buf , len );
96
- /* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
97
- std ::unique_ptr < char [ ]> output {new char [clen ]};
98
- converted = simdutf ::convert_utf16be_to_utf8 (buf , len , output .get ());
99
- result = simdutf ::validate_utf8_with_errors (output .get (), clen );
100
- if (result .error == simdutf ::error_code ::SUCCESS && converted > 0 ) {
101
- std ::string result_string (output .get (), clen );
138
+ * utf8_output = (char * ) flb_malloc (clen + 1 );
139
+ if (* utf8_output == NULL ) {
140
+ flb_errno ();
141
+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
142
+ }
102
143
103
- * utf8_output = strdup (result_string .c_str ());
104
- * out_size = converted ;
144
+ result = simdutf ::convert_utf16be_to_utf8_with_errors (buf , len , * utf8_output );
145
+ if (result .error == simdutf ::error_code ::SUCCESS && result .count > 0 ) {
146
+ (* utf8_output )[result .count ] = '\0' ;
147
+ * out_size = result .count ;
105
148
106
149
return FLB_SIMDUTF_ERROR_CODE_SUCCESS ;
107
150
}
108
151
else {
152
+ flb_free (* utf8_output );
109
153
* utf8_output = NULL ;
110
154
* out_size = 0 ;
111
155
@@ -117,23 +161,24 @@ int flb_simdutf_connector_convert_utf16_to_utf8(const char16_t *buf, size_t len,
117
161
char * * utf8_output , size_t * out_size )
118
162
{
119
163
size_t clen = 0 ;
120
- size_t converted = 0 ;
121
- simdutf ::result result ;
164
+ simdutf ::result result = {};
122
165
123
166
clen = simdutf ::utf8_length_from_utf16 (buf , len );
124
- /* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
125
- std ::unique_ptr < char [ ]> output {new char [clen ]};
126
- converted = simdutf ::convert_utf16_to_utf8 (buf , len , output .get ());
127
- result = simdutf ::validate_utf8_with_errors (output .get (), clen );
128
- if (result .error == simdutf ::error_code ::SUCCESS && converted > 0 ) {
129
- std ::string result_string (output .get (), clen );
167
+ * utf8_output = (char * ) flb_malloc (clen + 1 );
168
+ if (* utf8_output == NULL ) {
169
+ flb_errno ();
170
+ return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR ;
171
+ }
130
172
131
- * utf8_output = strdup (result_string .c_str ());
132
- * out_size = converted ;
173
+ result = simdutf ::convert_utf16_to_utf8_with_errors (buf , len , * utf8_output );
174
+ if (result .error == simdutf ::error_code ::SUCCESS && result .count > 0 ) {
175
+ (* utf8_output )[result .count ] = '\0' ;
176
+ * out_size = result .count ;
133
177
134
178
return FLB_SIMDUTF_ERROR_CODE_SUCCESS ;
135
179
}
136
180
else {
181
+ flb_free (* utf8_output );
137
182
* utf8_output = NULL ;
138
183
* out_size = 0 ;
139
184
@@ -155,11 +200,7 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
155
200
const char * input , size_t length ,
156
201
char * * output , size_t * out_size )
157
202
{
158
- size_t len = 0 ;
159
- size_t i = 0 ;
160
203
int encoding = 0 ;
161
- std ::u16string str16 ;
162
-
163
204
if (preferred_encoding == FLB_SIMDUTF_ENCODING_TYPE_UNICODE_AUTO ) {
164
205
encoding = simdutf ::detect_encodings (input , length );
165
206
}
@@ -175,46 +216,12 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
175
216
return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
176
217
}
177
218
else if ((encoding & simdutf ::encoding_type ::UTF16_LE ) == simdutf ::encoding_type ::UTF16_LE ) {
178
- len = length ;
179
- if (len % 2 ) {
180
- len -- ;
181
- }
182
- if (len < 2 ) {
183
- return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
184
- }
185
- for (i = 0 ; i < len ;) {
186
- if (i + 2 > len ) {
187
- break ;
188
- }
189
- /* little-endian */
190
- int lo = input [i ++ ] & 0xFF ;
191
- int hi = input [i ++ ] & 0xFF ;
192
- str16 .push_back (hi << 8 | lo );
193
- }
194
-
195
- return flb_simdutf_connector_convert_utf16le_to_utf8 (str16 .c_str (), str16 .size (),
196
- output , out_size );
219
+ return convert_from_unicode (flb_simdutf_connector_convert_utf16le_to_utf8 ,
220
+ input , length , output , out_size );
197
221
}
198
222
else if ((encoding & simdutf ::encoding_type ::UTF16_BE ) == simdutf ::encoding_type ::UTF16_BE ) {
199
- len = length ;
200
- if (len % 2 ) {
201
- len -- ;
202
- }
203
- if (len < 2 ) {
204
- return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP ;
205
- }
206
- for (i = 0 ; i < len ;) {
207
- if (i + 2 > len ) {
208
- break ;
209
- }
210
- /* big-endian */
211
- int lo = input [i ++ ] & 0xFF ;
212
- int hi = input [i ++ ] & 0xFF ;
213
- str16 .push_back (lo | hi << 8 );
214
- }
215
-
216
- return flb_simdutf_connector_convert_utf16be_to_utf8 (str16 .c_str (), str16 .size (),
217
- output , out_size );
223
+ return convert_from_unicode (flb_simdutf_connector_convert_utf16be_to_utf8 ,
224
+ input , length , output , out_size );
218
225
}
219
226
else {
220
227
/* Note: UTF-32LE and UTF-32BE are used for internal usages
0 commit comments