Skip to content

Commit cedf532

Browse files
committed
simdutf_connector: reduce copying
- Do not copy input if data is already aligned. - Only allocate output once. Signed-off-by: Erik Cederberg <[email protected]>
1 parent 3c8f9f2 commit cedf532

File tree

1 file changed

+80
-73
lines changed

1 file changed

+80
-73
lines changed

src/simdutf/flb_simdutf_connector.cpp

Lines changed: 80 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,50 @@
1919

2020
#include <simdutf.h>
2121
#include <fluent-bit/simdutf/flb_simdutf_connector.h>
22-
#include <memory.h>
2322
#include <memory>
23+
extern "C"
24+
{
25+
#include <fluent-bit/flb_info.h>
26+
#include <fluent-bit/flb_log.h>
27+
#include <fluent-bit/flb_mem.h>
28+
}
29+
30+
typedef int (*conversion_function)(const char16_t *buf, size_t len,
31+
char **utf8_output, size_t *out_size);
32+
33+
static int convert_from_unicode(conversion_function convert,
34+
const char *input, size_t length,
35+
char **output, size_t *out_size)
36+
{
37+
size_t len;
38+
std::unique_ptr<char16_t, decltype(&flb_free)> temp_buffer(NULL, flb_free);
39+
const char16_t *aligned_input = NULL;
40+
int status;
41+
42+
len = length;
43+
if (len % 2) {
44+
len--;
45+
}
46+
if (len < 2) {
47+
return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP;
48+
}
49+
50+
/* Check alignment to determine whether to copy or not */
51+
if ((uintptr_t) input % 2 == 0) {
52+
aligned_input = (const char16_t *) input;
53+
}
54+
else {
55+
temp_buffer.reset((char16_t *) flb_malloc(len));
56+
if (temp_buffer.get() == NULL) {
57+
flb_errno();
58+
return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR;
59+
}
60+
memcpy(temp_buffer.get(), input, len);
61+
aligned_input = temp_buffer.get();
62+
}
63+
64+
return convert(aligned_input, len / 2, output, out_size);
65+
}
2466

2567
int flb_simdutf_connector_utf8_length_from_utf16le(const char16_t *buf, size_t len)
2668
{
@@ -61,23 +103,24 @@ int flb_simdutf_connector_convert_utf16le_to_utf8(const char16_t *buf, size_t le
61103
char **utf8_output, size_t *out_size)
62104
{
63105
size_t clen = 0;
64-
size_t converted = 0;
65-
simdutf::result result;
106+
simdutf::result result = {};
66107

67108
clen = simdutf::utf8_length_from_utf16le(buf, len);
68-
/* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
69-
std::unique_ptr<char[]> output{new char[clen]};
70-
converted = simdutf::convert_utf16le_to_utf8(buf, len, output.get());
71-
result = simdutf::validate_utf8_with_errors(output.get(), clen);
72-
if (result.error == simdutf::error_code::SUCCESS && converted > 0) {
73-
std::string result_string(output.get(), clen);
109+
*utf8_output = (char *) flb_malloc(clen + 1);
110+
if (*utf8_output == NULL) {
111+
flb_errno();
112+
return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR;
113+
}
74114

75-
*utf8_output = strdup(result_string.c_str());
76-
*out_size = converted;
115+
result = simdutf::convert_utf16le_to_utf8_with_errors(buf, len, *utf8_output);
116+
if (result.error == simdutf::error_code::SUCCESS && result.count > 0) {
117+
(*utf8_output)[result.count] = '\0';
118+
*out_size = result.count;
77119

78120
return FLB_SIMDUTF_ERROR_CODE_SUCCESS;
79121
}
80122
else {
123+
flb_free(*utf8_output);
81124
*utf8_output = NULL;
82125
*out_size = 0;
83126

@@ -89,23 +132,24 @@ int flb_simdutf_connector_convert_utf16be_to_utf8(const char16_t *buf, size_t le
89132
char **utf8_output, size_t *out_size)
90133
{
91134
size_t clen = 0;
92-
size_t converted = 0;
93-
simdutf::result result;
135+
simdutf::result result = {};
94136

95137
clen = simdutf::utf8_length_from_utf16be(buf, len);
96-
/* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
97-
std::unique_ptr<char[]> output{new char[clen]};
98-
converted = simdutf::convert_utf16be_to_utf8(buf, len, output.get());
99-
result = simdutf::validate_utf8_with_errors(output.get(), clen);
100-
if (result.error == simdutf::error_code::SUCCESS && converted > 0) {
101-
std::string result_string(output.get(), clen);
138+
*utf8_output = (char *) flb_malloc(clen + 1);
139+
if (*utf8_output == NULL) {
140+
flb_errno();
141+
return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR;
142+
}
102143

103-
*utf8_output = strdup(result_string.c_str());
104-
*out_size = converted;
144+
result = simdutf::convert_utf16be_to_utf8_with_errors(buf, len, *utf8_output);
145+
if (result.error == simdutf::error_code::SUCCESS && result.count > 0) {
146+
(*utf8_output)[result.count] = '\0';
147+
*out_size = result.count;
105148

106149
return FLB_SIMDUTF_ERROR_CODE_SUCCESS;
107150
}
108151
else {
152+
flb_free(*utf8_output);
109153
*utf8_output = NULL;
110154
*out_size = 0;
111155

@@ -117,23 +161,24 @@ int flb_simdutf_connector_convert_utf16_to_utf8(const char16_t *buf, size_t len,
117161
char **utf8_output, size_t *out_size)
118162
{
119163
size_t clen = 0;
120-
size_t converted = 0;
121-
simdutf::result result;
164+
simdutf::result result = {};
122165

123166
clen = simdutf::utf8_length_from_utf16(buf, len);
124-
/* convert_utfXXXX_to_utf8 function needs to pass allocated memory region with C++ style */
125-
std::unique_ptr<char[]> output{new char[clen]};
126-
converted = simdutf::convert_utf16_to_utf8(buf, len, output.get());
127-
result = simdutf::validate_utf8_with_errors(output.get(), clen);
128-
if (result.error == simdutf::error_code::SUCCESS && converted > 0) {
129-
std::string result_string(output.get(), clen);
167+
*utf8_output = (char *) flb_malloc(clen + 1);
168+
if (*utf8_output == NULL) {
169+
flb_errno();
170+
return FLB_SIMDUTF_CONNECTOR_CONVERT_ERROR;
171+
}
130172

131-
*utf8_output = strdup(result_string.c_str());
132-
*out_size = converted;
173+
result = simdutf::convert_utf16_to_utf8_with_errors(buf, len, *utf8_output);
174+
if (result.error == simdutf::error_code::SUCCESS && result.count > 0) {
175+
(*utf8_output)[result.count] = '\0';
176+
*out_size = result.count;
133177

134178
return FLB_SIMDUTF_ERROR_CODE_SUCCESS;
135179
}
136180
else {
181+
flb_free(*utf8_output);
137182
*utf8_output = NULL;
138183
*out_size = 0;
139184

@@ -155,11 +200,7 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
155200
const char *input, size_t length,
156201
char **output, size_t *out_size)
157202
{
158-
size_t len = 0;
159-
size_t i = 0;
160203
int encoding = 0;
161-
std::u16string str16;
162-
163204
if (preferred_encoding == FLB_SIMDUTF_ENCODING_TYPE_UNICODE_AUTO) {
164205
encoding = simdutf::detect_encodings(input, length);
165206
}
@@ -175,46 +216,12 @@ int flb_simdutf_connector_convert_from_unicode(int preferred_encoding,
175216
return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP;
176217
}
177218
else if ((encoding & simdutf::encoding_type::UTF16_LE) == simdutf::encoding_type::UTF16_LE) {
178-
len = length;
179-
if (len % 2) {
180-
len--;
181-
}
182-
if (len < 2) {
183-
return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP;
184-
}
185-
for (i = 0 ; i < len;) {
186-
if (i + 2 > len) {
187-
break;
188-
}
189-
/* little-endian */
190-
int lo = input[i++] & 0xFF;
191-
int hi = input[i++] & 0xFF;
192-
str16.push_back(hi << 8 | lo);
193-
}
194-
195-
return flb_simdutf_connector_convert_utf16le_to_utf8(str16.c_str(), str16.size(),
196-
output, out_size);
219+
return convert_from_unicode(flb_simdutf_connector_convert_utf16le_to_utf8,
220+
input, length, output, out_size);
197221
}
198222
else if ((encoding & simdutf::encoding_type::UTF16_BE) == simdutf::encoding_type::UTF16_BE) {
199-
len = length;
200-
if (len % 2) {
201-
len--;
202-
}
203-
if (len < 2) {
204-
return FLB_SIMDUTF_CONNECTOR_CONVERT_NOP;
205-
}
206-
for (i = 0; i < len;) {
207-
if (i + 2 > len) {
208-
break;
209-
}
210-
/* big-endian */
211-
int lo = input[i++] & 0xFF;
212-
int hi = input[i++] & 0xFF;
213-
str16.push_back(lo | hi << 8);
214-
}
215-
216-
return flb_simdutf_connector_convert_utf16be_to_utf8(str16.c_str(), str16.size(),
217-
output, out_size);
223+
return convert_from_unicode(flb_simdutf_connector_convert_utf16be_to_utf8,
224+
input, length, output, out_size);
218225
}
219226
else {
220227
/* Note: UTF-32LE and UTF-32BE are used for internal usages

0 commit comments

Comments
 (0)