Skip to content

Commit

Permalink
feat(stdlib): Add Char.encodedLength (#2238)
Browse files Browse the repository at this point in the history
Co-authored-by: Oscar Spencer <[email protected]>
  • Loading branch information
spotandjake and ospencer authored Feb 16, 2025
1 parent bdb7f7c commit c549fac
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 0 deletions.
14 changes: 14 additions & 0 deletions compiler/test/stdlib/char.test.gr
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ assert Char.code(Char.pred(Char.fromCode(0xE000))) == 0xD7FF
assert Char.toString('f') == "f"
assert Char.toString('💯') == "💯"

// byteCount
assert Char.encodedLength(Char.UTF8, 'a') == 1
assert Char.encodedLength(Char.UTF8, '©') == 2
assert Char.encodedLength(Char.UTF8, '☃') == 3
assert Char.encodedLength(Char.UTF8, '🌾') == 4
assert Char.encodedLength(Char.UTF16, 'a') == 1
assert Char.encodedLength(Char.UTF16, '©') == 1
assert Char.encodedLength(Char.UTF16, '☃') == 1
assert Char.encodedLength(Char.UTF16, '🌾') == 2
assert Char.encodedLength(Char.UTF32, 'a') == 4
assert Char.encodedLength(Char.UTF32, '©') == 4
assert Char.encodedLength(Char.UTF32, '☃') == 4
assert Char.encodedLength(Char.UTF32, '🌾') == 4

// issue #927
let chars = [> '\u{1F3F4}', '\u{E0067}']
let mut charPosition = 0
Expand Down
36 changes: 36 additions & 0 deletions stdlib/char.gr
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,42 @@ provide let toString = (char: Char) => {
WasmI32.toGrain(string): String
}

/**
* Byte encodings
*
* @since v0.7.0
*/
provide enum Encoding {
UTF8,
UTF16,
UTF32,
}

/**
* Returns the byte count of a character if encoded in the given encoding.
*
* @param encoding: The encoding to check
* @param char: The character
* @returns The byte count of the character in the given encoding
*
* @example Char.encodedLength(Char.UTF8, 'a') == 1
* @example Char.encodedLength(Char.UTF8, '🌾') == 4
* @example Char.encodedLength(Char.UTF16, '©') == 1
*
* @since v0.7.0
*/
@unsafe
provide let encodedLength = (encoding, char: Char) => {
let usv = untagChar(char)
let utf8ByteCount = usvEncodeLength(usv)
let utf8ByteCount = tagSimpleNumber(utf8ByteCount)
match (encoding) {
UTF32 => 4,
UTF16 => if (utf8ByteCount == 4) 2 else 1,
UTF8 => utf8ByteCount,
}
}

/**
* Checks if the first character is less than the second character by Unicode scalar value.
*
Expand Down
61 changes: 61 additions & 0 deletions stdlib/char.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,27 @@ from "char" include Char
'🌾'
```

## Types

Type declarations included in the Char module.

### Char.**Encoding**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
enum Encoding {
UTF8,
UTF16,
UTF32,
}
```

Byte encodings

## Values

Functions and constants included in the Char module.
Expand Down Expand Up @@ -285,6 +306,46 @@ Char.toString('a') == "a"
Char.toString('🌾') == "🌾"
```

### Char.**encodedLength**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
encodedLength : (encoding: Encoding, char: Char) => Number
```

Returns the byte count of a character if encoded in the given encoding.

Parameters:

|param|type|description|
|-----|----|-----------|
|`encoding`|`Encoding`|The encoding to check|
|`char`|`Char`|The character|

Returns:

|type|description|
|----|-----------|
|`Number`|The byte count of the character in the given encoding|

Examples:

```grain
Char.encodedLength(Char.UTF8, 'a') == 1
```

```grain
Char.encodedLength(Char.UTF8, '🌾') == 4
```

```grain
Char.encodedLength(Char.UTF16, '©') == 1
```

### Char.**(<)**

<details disabled>
Expand Down

0 comments on commit c549fac

Please sign in to comment.