Skip to content

Commit 4167bf3

Browse files
committed
borrow sonic-rs impl
1 parent a9762ef commit 4167bf3

File tree

11 files changed

+758
-1565
lines changed

11 files changed

+758
-1565
lines changed

.github/workflows/CI.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,30 @@ jobs:
4545
- name: Run tests
4646
run: cargo test
4747

48+
miri:
49+
runs-on: ubuntu-latest
50+
steps:
51+
- uses: actions/checkout@v5
52+
- name: Setup Rust
53+
uses: dtolnay/rust-toolchain@stable
54+
with:
55+
targets: x86_64-unknown-linux-gnu
56+
components: miri
57+
toolchain: nightly
58+
- uses: actions/setup-node@v5
59+
with:
60+
node-version: 22
61+
cache: 'yarn'
62+
- name: Install dependencies
63+
run: yarn install
64+
- name: Download fixtures
65+
run: node download-fixtures.js
66+
env:
67+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
68+
- name: Run miri
69+
run: cargo miri test
70+
env:
71+
MIRIFLAGS: "-Zmiri-disable-isolation"
4872
bench:
4973
strategy:
5074
matrix:

Cargo.lock

Lines changed: 41 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ name = "escape"
1717
path = "examples/escape.rs"
1818

1919
[features]
20-
force_aarch64_neon = [] # Force use of neon implementation on aarch64
2120
codspeed = ["criterion2/codspeed"]
2221

2322
[[bench]]
@@ -26,10 +25,13 @@ harness = false
2625

2726
[dependencies]
2827
anyhow = "1"
28+
sonic-simd = "0.1"
29+
thiserror = "2"
2930

3031
[dev-dependencies]
3132
criterion2 = "3"
3233
glob = "0.3"
34+
rand = "0.9"
3335
serde = "1"
3436
serde_json = "1"
3537
v_jsonescape = "0.7"

README.md

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,7 @@
44
![docs.rs](https://img.shields.io/docsrs/json-escape-simd)
55
[![CodSpeed Badge](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/napi-rs/json-escape-simd)
66

7-
Optimized SIMD routines for escaping JSON strings. This repository contains the `json-escape-simd` crate, comparison fixtures, and Criterion benches against commonly used alternatives.
8-
9-
> [!IMPORTANT]
10-
>
11-
> On aarch64 NEON hosts the available register width is **128** bits, which is narrower than the lookup table this implementation prefers. As a result the SIMD path may not outperform the generic fallback, which is reflected in the benchmark numbers below.
12-
>
13-
> On some modern macOS devices with larger register numbers, the SIMD path may outperform the generic fallback, see the [M3 max benchmark](#apple-m3-max) below.
14-
15-
> [!NOTE]
16-
>
17-
> The `force_aarch64_neon` feature flag can be used to force use of the neon implementation on aarch64. This is useful for the benchmark.
7+
Optimized SIMD routines for escaping JSON strings. The implementation is from [sonic-rs](https://github.com/cloudwego/sonic-rs), we only take the string escaping part to avoid the abstraction overhead.
188

199
## Benchmarks
2010

benches/escape.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ use std::{fs, hint::black_box};
22

33
use criterion::{Criterion, criterion_group, criterion_main};
44

5-
use json_escape_simd::{escape, escape_generic};
5+
use generic::escape_generic;
6+
use json_escape_simd::escape;
7+
8+
mod generic;
69

710
fn get_rxjs_sources() -> Vec<String> {
811
let rxjs_paths = glob::glob("node_modules/rxjs/src/**/*.ts").unwrap();
@@ -86,6 +89,16 @@ fn run_benchmarks(c: &mut Criterion, sources: &[String], prefix: &str) {
8689
});
8790
}
8891

92+
fn short_string_benchmark(c: &mut Criterion) {
93+
let sources = vec![
94+
"Hello, world!".to_string(),
95+
r#"abcdefghijklmnopqrstuvwxyz .*? hello world escape json string"#.to_string(),
96+
"normal string 🥹".to_string(),
97+
"中文 English 🚀 \n❓ 𝄞".to_string(),
98+
];
99+
run_benchmarks(c, &sources, "short string");
100+
}
101+
89102
fn rxjs_benchmark(c: &mut Criterion) {
90103
let sources = get_rxjs_sources();
91104
if !sources.is_empty() {
@@ -100,5 +113,10 @@ fn fixtures_benchmark(c: &mut Criterion) {
100113
}
101114
}
102115

103-
criterion_group!(benches, rxjs_benchmark, fixtures_benchmark);
116+
criterion_group!(
117+
benches,
118+
short_string_benchmark,
119+
rxjs_benchmark,
120+
fixtures_benchmark
121+
);
104122
criterion_main!(benches);

src/generic.rs renamed to benches/generic.rs

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,6 @@ pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
1313
unsafe { String::from_utf8_unchecked(result) }
1414
}
1515

16-
#[inline]
17-
pub fn escape_into_generic<S: AsRef<str>>(s: S, output: &mut Vec<u8>) {
18-
let s = s.as_ref();
19-
let bytes = s.as_bytes();
20-
output.push(b'"');
21-
escape_inner(bytes, output);
22-
output.push(b'"');
23-
}
24-
2516
#[inline]
2617
// Slightly modified version of
2718
// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>
@@ -140,49 +131,3 @@ pub(crate) static HEX_BYTES: [HexPair; 32] = [
140131
HexPair(b'1', b'e'),
141132
HexPair(b'1', b'f'),
142133
];
143-
144-
// Optimized escape table with 8-byte arrays for fast bulk writing
145-
// First element is the length of escape sequence, followed by the escape bytes
146-
pub(crate) static ESCAPE_TABLE: [(u8, [u8; 8]); 256] = {
147-
let mut table = [(0u8, [0u8; 8]); 256];
148-
149-
// Control characters \u0000 - \u001f
150-
table[0x00] = (6, *b"\\u0000\0\0");
151-
table[0x01] = (6, *b"\\u0001\0\0");
152-
table[0x02] = (6, *b"\\u0002\0\0");
153-
table[0x03] = (6, *b"\\u0003\0\0");
154-
table[0x04] = (6, *b"\\u0004\0\0");
155-
table[0x05] = (6, *b"\\u0005\0\0");
156-
table[0x06] = (6, *b"\\u0006\0\0");
157-
table[0x07] = (6, *b"\\u0007\0\0");
158-
table[0x08] = (2, *b"\\b\0\0\0\0\0\0");
159-
table[0x09] = (2, *b"\\t\0\0\0\0\0\0");
160-
table[0x0A] = (2, *b"\\n\0\0\0\0\0\0");
161-
table[0x0B] = (6, *b"\\u000b\0\0");
162-
table[0x0C] = (2, *b"\\f\0\0\0\0\0\0");
163-
table[0x0D] = (2, *b"\\r\0\0\0\0\0\0");
164-
table[0x0E] = (6, *b"\\u000e\0\0");
165-
table[0x0F] = (6, *b"\\u000f\0\0");
166-
table[0x10] = (6, *b"\\u0010\0\0");
167-
table[0x11] = (6, *b"\\u0011\0\0");
168-
table[0x12] = (6, *b"\\u0012\0\0");
169-
table[0x13] = (6, *b"\\u0013\0\0");
170-
table[0x14] = (6, *b"\\u0014\0\0");
171-
table[0x15] = (6, *b"\\u0015\0\0");
172-
table[0x16] = (6, *b"\\u0016\0\0");
173-
table[0x17] = (6, *b"\\u0017\0\0");
174-
table[0x18] = (6, *b"\\u0018\0\0");
175-
table[0x19] = (6, *b"\\u0019\0\0");
176-
table[0x1A] = (6, *b"\\u001a\0\0");
177-
table[0x1B] = (6, *b"\\u001b\0\0");
178-
table[0x1C] = (6, *b"\\u001c\0\0");
179-
table[0x1D] = (6, *b"\\u001d\0\0");
180-
table[0x1E] = (6, *b"\\u001e\0\0");
181-
table[0x1F] = (6, *b"\\u001f\0\0");
182-
183-
// Special characters
184-
table[0x22] = (2, *b"\\\"\0\0\0\0\0\0"); // "
185-
table[0x5C] = (2, *b"\\\\\0\0\0\0\0\0"); // \
186-
187-
table
188-
};

examples/escape.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
use std::fs;
22

3-
use json_escape_simd::{escape, escape_generic};
3+
use json_escape_simd::escape;
44

55
fn main() {
66
for fixture in get_rxjs_sources() {
77
let encoded = escape(&fixture);
8-
let encoded_fallback = escape_generic(&fixture);
9-
assert_eq!(encoded, encoded_fallback);
108
assert_eq!(encoded, sonic_rs::to_string(&fixture).unwrap());
119
assert_eq!(encoded, serde_json::to_string(&fixture).unwrap());
1210
}

0 commit comments

Comments
 (0)