perf(parser): fast-path name tokenization to avoid TextDecoder (#33)

ntedvs · web-flow · commit 82ce95b7e95c · 2026-02-22T13:28:56.000+11:00
Skip intermediate number[] array, Uint8Array allocation, and
TextDecoder.decode() for the 99%+ of PDF names that contain no #XX
hex escapes. Build string directly via String.fromCharCode loop.

5-10% improvement on parsing benchmarks (CPU profile showed readName +
TextDecoder.decode at ~20% of total parse time).
diff --git a/src/parser/token-reader.test.ts b/src/parser/token-reader.test.ts
@@ -323,6 +323,27 @@ describe("TokenReader", () => {
       expect(token).toMatchObject({ type: "name", value: "Test#5" });
     });
 
+    it("uses fast path for plain ASCII name", () => {
+      const r = reader("/Type");
+      const token = r.nextToken();
+
+      expect(token).toMatchObject({ type: "name", value: "Type" });
+    });
+
+    it("falls back to slow path when # is first char", () => {
+      const r = reader("/#48ello"); // #48 = 'H'
+      const token = r.nextToken();
+
+      expect(token).toMatchObject({ type: "name", value: "Hello" });
+    });
+
+    it("falls back to slow path when # appears mid-name", () => {
+      const r = reader("/Type#20Name"); // #20 = space
+      const token = r.nextToken();
+
+      expect(token).toMatchObject({ type: "name", value: "Type Name" });
+    });
+
     it("stops at whitespace", () => {
       const r = reader("/Type /Page");
 
diff --git a/src/parser/token-reader.ts b/src/parser/token-reader.ts
@@ -284,6 +284,48 @@ export class TokenReader {
     // Skip the leading /
     this.scanner.advance();
 
+    const data = this.scanner.bytes;
+    const start = this.scanner.position;
+    let pos = start;
+    const len = data.length;
+
+    // Fast path: scan for end of name, checking for # escapes
+    let hasEscape = false;
+
+    while (pos < len) {
+      const byte = data[pos];
+
+      if (!isRegularChar(byte)) {
+        break;
+      }
+
+      if (byte === CHAR_HASH) {
+        hasEscape = true;
+        break;
+      }
+
+      pos++;
+    }
+
+    if (!hasEscape) {
+      // Common case: pure ASCII name with no escapes.
+      // Build string directly from byte range — no intermediate array,
+      // no Uint8Array allocation, no TextDecoder.
+      this.scanner.moveTo(pos);
+
+      let value = "";
+
+      for (let i = start; i < pos; i++) {
+        value += String.fromCharCode(data[i]);
+      }
+
+      return { type: "name", value, position };
+    }
+
+    // Slow path: name contains # escapes, need byte-by-byte processing.
+    // Reset scanner to start and use the original accumulation approach.
+    this.scanner.moveTo(start);
+
     const bytes: number[] = [];
 
     while (true) {