Skip to content

Commit 02b5548

Browse files
Quick test
1 parent 933b984 commit 02b5548

File tree

1 file changed

+50
-12
lines changed

1 file changed

+50
-12
lines changed

src/WebSocketProtocol.h

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
#include <cstdlib>
2626
#include <string_view>
2727

28+
// Quick test
29+
#include <immintrin.h>
30+
2831
namespace uWS {
2932

3033
/* We should not overcomplicate these */
@@ -114,24 +117,59 @@ T cond_byte_swap(T value) {
114117
// Optimized for predominantly 7-bit content by Alex Hultman, 2016
115118
// Licensed as Zlib, like the rest of this project
116119
// This runs about 40% faster than simdutf with g++ -mavx
117-
static bool isValidUtf8(unsigned char *s, size_t length)
118-
{
119-
for (unsigned char *e = s + length; s != e; ) {
120-
if (s + 16 <= e) {
121-
uint64_t tmp[2];
122-
memcpy(tmp, s, 16);
123-
if (((tmp[0] & 0x8080808080808080) | (tmp[1] & 0x8080808080808080)) == 0) {
124-
s += 16;
125-
continue;
120+
static bool isValidUtf8(unsigned char *s, size_t length) {
121+
auto firstUtf8EscapeByte = [](unsigned char *s, unsigned char *e) {
122+
// Align
123+
if (s + 32 <= e) {
124+
int mask = _mm256_movemask_epi8(_mm256_loadu_si256((const __m256i*)s));
125+
if (mask) {
126+
return s + __builtin_ctz(mask);
127+
}
128+
s += 32 - ((uintptr_t)s % 32);
129+
} else {
130+
// Worst path
131+
while (s < e) {
132+
if (*s & 0x80) {
133+
return s;
134+
}
135+
s++;
126136
}
137+
return e;
127138
}
128139

129-
while (!(*s & 0x80)) {
130-
if (++s == e) {
131-
return true;
140+
while (s + 128 <= e) {
141+
// Aligned
142+
int mask = _mm256_movemask_epi8(_mm256_load_si256((const __m256i*)s));
143+
if (mask) {
144+
return s + __builtin_ctz(mask);
145+
}
146+
s += 32;
147+
mask = _mm256_movemask_epi8(_mm256_load_si256((const __m256i*)s));
148+
if (mask) {
149+
return s + __builtin_ctz(mask);
132150
}
151+
s += 32;
152+
mask = _mm256_movemask_epi8(_mm256_load_si256((const __m256i*)s));
153+
if (mask) {
154+
return s + __builtin_ctz(mask);
155+
}
156+
s += 32;
157+
mask = _mm256_movemask_epi8(_mm256_load_si256((const __m256i*)s));
158+
if (mask) {
159+
return s + __builtin_ctz(mask);
160+
}
161+
s += 32;
133162
}
134163

164+
// Exit
165+
while ((*s & 0x80) == 0 && s < e) {
166+
s++;
167+
}
168+
return s;
169+
};
170+
171+
for (unsigned char *e = s + length; (s = (unsigned char *) firstUtf8EscapeByte(s, e)) != e; ) {
172+
135173
if ((s[0] & 0x60) == 0x40) {
136174
if (s + 1 >= e || (s[1] & 0xc0) != 0x80 || (s[0] & 0xfe) == 0xc0) {
137175
return false;

0 commit comments

Comments
 (0)