Skip to content

Commit

Permalink
Replace charCode splitting with proper UTF-8 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
BraydenLangley committed Feb 27, 2025
1 parent b717db3 commit 721d878
Showing 1 changed file with 47 additions and 10 deletions.
57 changes: 47 additions & 10 deletions src/primitives/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,56 @@ const base64ToArray = (msg: string): number[] => {
return result
}

const utf8ToArray = (msg: string): number[] => {
const res: number[] = []
for (let i = 0; i < msg.length; i++) {
const c = msg.charCodeAt(i)
const hi = c >> 8
const lo = c & 0xff
if (hi !== 0) {
res.push(hi, lo)
/**
* Encodes a string into an array of bytes representing its UTF-8 encoding.
* Any lone surrogates are replaced with the Unicode replacement character (U+FFFD).
*
* @param str - The string to encode.
* @returns An array of numbers, each representing a byte in the UTF-8 encoded string.
*/
function utf8ToArray (str: string): number[] {
const result: number[] = []

for (let i = 0; i < str.length; i++) {
let codePoint = str.codePointAt(i)!

if (codePoint > 0xFFFF) {
// Valid surrogate pair => skip the next code unit because codePointAt
// has already combined them into a single code point.
i++
} else {
res.push(lo)
// Check if codePoint is a lone (unpaired) high surrogate or low surrogate.
if (codePoint >= 0xD800 && codePoint <= 0xDFFF) {
// Replace with the replacement character (U+FFFD).
codePoint = 0xFFFD
}
}

// Encode according to the UTF-8 standard
if (codePoint <= 0x7F) {
result.push(codePoint)
} else if (codePoint <= 0x7FF) {
result.push(
0xC0 | (codePoint >> 6),
0x80 | (codePoint & 0x3F)
)
} else if (codePoint <= 0xFFFF) {
result.push(
0xE0 | (codePoint >> 12),
0x80 | ((codePoint >> 6) & 0x3F),
0x80 | (codePoint & 0x3F)
)
} else {
result.push(
0xF0 | (codePoint >> 18),
0x80 | ((codePoint >> 12) & 0x3F),
0x80 | ((codePoint >> 6) & 0x3F),
0x80 | (codePoint & 0x3F)
)
}
}
return res

return result
}

/**
Expand Down

0 comments on commit 721d878

Please sign in to comment.