diff --git a/.changeset/afraid-chicken-attack.md b/.changeset/afraid-chicken-attack.md new file mode 100644 index 00000000000..9898087c0b5 --- /dev/null +++ b/.changeset/afraid-chicken-attack.md @@ -0,0 +1,5 @@ +--- +'openzeppelin-solidity': minor +--- + +`Bytes`: Add `splice(bytes,uint256)` and `splice(bytes,uint256,uint256)`, two "in place" variants of the existing slice functions diff --git a/.changeset/loose-lamps-bake.md b/.changeset/loose-lamps-bake.md new file mode 100644 index 00000000000..bc4703817ae --- /dev/null +++ b/.changeset/loose-lamps-bake.md @@ -0,0 +1,5 @@ +--- +'openzeppelin-solidity': minor +--- + +`Base58`: Add a library for encoding and decoding bytes buffers into base58 strings. diff --git a/.changeset/thirty-pugs-pick.md b/.changeset/thirty-pugs-pick.md new file mode 100644 index 00000000000..955c449b620 --- /dev/null +++ b/.changeset/thirty-pugs-pick.md @@ -0,0 +1,5 @@ +--- +'openzeppelin-solidity': minor +--- + +`Bytes`: Add `countLeading` and `countConsecutive` diff --git a/contracts/mocks/Stateless.sol b/contracts/mocks/Stateless.sol index 97e79085bfb..1cc0ccdb6f8 100644 --- a/contracts/mocks/Stateless.sol +++ b/contracts/mocks/Stateless.sol @@ -7,6 +7,7 @@ pragma solidity ^0.8.26; import {Address} from "../utils/Address.sol"; import {Arrays} from "../utils/Arrays.sol"; import {AuthorityUtils} from "../access/manager/AuthorityUtils.sol"; +import {Base58} from "../utils/Base58.sol"; import {Base64} from "../utils/Base64.sol"; import {BitMaps} from "../utils/structs/BitMaps.sol"; import {Blockhash} from "../utils/Blockhash.sol"; diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol new file mode 100644 index 00000000000..42a59902ff5 --- /dev/null +++ b/contracts/utils/Base58.sol @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: MIT + +pragma solidity ^0.8.20; + +/** + * @dev Provides a set of functions to operate with Base58 strings. + * + * Initially based on https://github.com/storyicon/base58-solidity/commit/807428e5174e61867e4c606bdb26cba58a8c5cb1[storyicon's implementation] (MIT). + * Based on the updated and improved https://github.com/Vectorized/solady/blob/main/src/utils/Base58.sol[Vectorized version] (MIT). + */ +library Base58 { + error InvalidBase56Digit(uint8); + + /** + * @dev Encode a `bytes` buffer as a Base58 `string`. + */ + function encode(bytes memory input) internal pure returns (string memory) { + return string(_encode(input)); + } + + /** + * @dev Decode a Base58 `string` into a `bytes` buffer. + */ + function decode(string memory input) internal pure returns (bytes memory) { + return _decode(bytes(input)); + } + + function _encode(bytes memory input) private pure returns (bytes memory output) { + uint256 inputLength = input.length; + if (inputLength == 0) return ""; + + assembly ("memory-safe") { + // Count number of zero bytes at the beginning of `input`. These are encoded using the same number of '1's + // at then beginning of the encoded string. + let inputLeadingZeros := 0 + for {} lt(byte(0, mload(add(add(input, 0x20), inputLeadingZeros))), lt(inputLeadingZeros, inputLength)) {} { + inputLeadingZeros := add(inputLeadingZeros, 1) + } + + // Start the output offset by an over-estimate of the length. + let outputLengthEstim := add(inputLeadingZeros, div(mul(sub(inputLength, inputLeadingZeros), 8351), 6115)) + + // This is going to be our "scratch" workspace. Be leave enough room on the left to store length + encoded input. + let scratch := add(mload(0x40), add(outputLengthEstim, 0x21)) + + // Cut the input buffer in section (limbs) of 31 bytes (248 bits). Store in scratch. + let ptr := scratch + for { + // first section is possibly smaller than 31 bytes + let i := mod(inputLength, 31) + // unfold first loop, with a different shift. + if i { + mstore(ptr, shr(mul(sub(32, i), 8), mload(add(input, 0x20)))) + ptr := add(ptr, 0x20) + } + } lt(i, inputLength) { + ptr := add(ptr, 0x20) // next limb + i := add(i, 31) // move in buffer + } { + // Load 32 bytes from the input buffer and shift to only keep the 31 leftmost. + mstore(ptr, shr(8, mload(add(add(input, 0x20), i)))) + } + + // Store the encoding table. This overlaps with the FMP that we are going to reset later anyway. + // See sections 2 of https://inputtracker.ietf.org/doc/html/draft-msporny-base58-03 + mstore(0x1f, "123456789ABCDEFGHJKLMNPQRSTUVWXY") + mstore(0x3f, "Zabcdefghijkmnopqrstuvwxyz") + + // Encoding the "input" part of the result. + // `output` point the the left part of the encoded string. we start from scratch, which means we have + // outputLengthEstim bytes to work with before hitting the FMP + for { + output := scratch + } 1 {} { + // check if there are non-zero limbs remaining + let i := scratch + for {} and(iszero(mload(i)), lt(i, ptr)) { + i := add(i, 0x20) + } {} + if eq(i, ptr) { + break + } + + // base 58 arithmetic on the 248bits limbs + let carry := 0 + for { + i := scratch + } lt(i, ptr) { + i := add(i, 0x20) + } { + let acc := add(shl(248, carry), mload(i)) + mstore(i, div(acc, 58)) + carry := mod(acc, 58) + } + + // encode carry using base58 table, and add it to the output + output := sub(output, 1) + mstore8(output, mload(carry)) + } + + // Write the input leading zeros at the left of the encoded. + // This will spill to the left into the "length" of the buffer. + for { + let i := 0 + } lt(i, inputLeadingZeros) {} { + i := add(i, 0x20) + mstore(sub(output, i), "11111111111111111111111111111111") + } + + // Move output pointer to account for inputLeadingZeros + output := sub(output, add(inputLeadingZeros, 0x20)) + + // Store length and allocate (reserve) memory up to scratch. + mstore(output, sub(scratch, add(output, 0x20))) + mstore(0x40, scratch) + } + } + + function _decode(bytes memory input) private pure returns (bytes memory output) { + bytes4 errorSelector = InvalidBase56Digit.selector; + + uint256 inputLength = input.length; + if (inputLength == 0) return ""; + + /// @solidity memory-safe-assembly + assembly { + let inputLeadingZeros := 0 // Number of leading '1' in `input`. + // Count leading zeros. In base58, zeros are represented using '1' (chr(49)). + for {} and( + eq(byte(0, mload(add(add(input, 0x20), inputLeadingZeros))), 49), + lt(inputLeadingZeros, inputLength) + ) {} { + inputLeadingZeros := add(inputLeadingZeros, 1) + } + + // Start the output offset by an over-estimate of the length. + let outputLengthEstim := add(inputLeadingZeros, div(mul(sub(inputLength, inputLeadingZeros), 6115), 8351)) + + // This is going to be our "scratch" workspace. Be leave enough room on the left to store length + encoded input. + let scratch := add(mload(0x40), add(outputLengthEstim, 0x21)) + + // Store the decoding table. This overlaps with the FMP that we are going to reset later anyway. + mstore(0x2a, 0x30313233343536373839) + mstore(0x20, 0x1718191a1b1c1d1e1f20ffffffffffff2122232425262728292a2bff2c2d2e2f) + mstore(0x00, 0x000102030405060708ffffffffffffff090a0b0c0d0e0f10ff1112131415ff16) + + // Decode each char of the input string, and stored that in section (limbs) of 31 bytes. Store in scratch. + let ptr := scratch + let mask := shr(8, not(0)) + for { + let j := 0 + } lt(j, inputLength) { + j := add(j, 1) + } { + // for each char, decode it ... + let c := sub(byte(0, mload(add(add(input, 0x20), j))), 49) + // slither-disable-next-line incorrect-shift + if iszero(and(shl(c, 1), 0x3fff7ff03ffbeff01ff)) { + mstore(0, errorSelector) + mstore(4, add(c, 49)) + revert(0, 0x24) + } + let carry := byte(0, mload(c)) + + // ... and add it to the limbs starting a `scratch` + for { + let i := scratch + } lt(i, ptr) { + i := add(i, 0x20) + } { + let acc := add(carry, mul(58, mload(i))) + mstore(i, and(mask, acc)) + carry := shr(248, acc) + } + // If the char just read result in a leftover carry, extend the limbs with the new value + if carry { + mstore(ptr, carry) + ptr := add(ptr, 0x20) + } + } + + // Copy and compact the uint248 limbs + remove any zeros at the beginning. + output := scratch + for { + let i := scratch + } lt(i, ptr) { + i := add(i, 0x20) + } { + output := sub(output, 31) + mstore(sub(output, 1), mload(i)) + } + for {} lt(byte(0, mload(output)), lt(output, scratch)) {} { + output := add(output, 1) + } + + // Add the zeros that were encoded in the input (prefix '1's) + calldatacopy(sub(output, inputLeadingZeros), calldatasize(), inputLeadingZeros) + + // Move output pointer to account for inputLeadingZeros + output := sub(output, add(inputLeadingZeros, 0x20)) + + // Store length and allocate (reserve) memory up to scratch. + mstore(output, sub(scratch, add(output, 0x20))) + mstore(0x40, scratch) + } + } +} diff --git a/contracts/utils/Base64.sol b/contracts/utils/Base64.sol index c6ee6a524aa..1870cff00cd 100644 --- a/contracts/utils/Base64.sol +++ b/contracts/utils/Base64.sol @@ -8,21 +8,21 @@ pragma solidity ^0.8.20; */ library Base64 { /** - * @dev Base64 Encoding/Decoding Table + * @dev Base64 encoding table * See sections 4 and 5 of https://datatracker.ietf.org/doc/html/rfc4648 */ string internal constant _TABLE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; string internal constant _TABLE_URL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; /** - * @dev Converts a `bytes` to its Bytes64 `string` representation. + * @dev Converts a `bytes` to its Base64 `string` representation. */ function encode(bytes memory data) internal pure returns (string memory) { return _encode(data, _TABLE, true); } /** - * @dev Converts a `bytes` to its Bytes64Url `string` representation. + * @dev Converts a `bytes` to its Base64Url `string` representation. * Output is not padded with `=` as specified in https://www.rfc-editor.org/rfc/rfc4648[rfc4648]. */ function encodeURL(bytes memory data) internal pure returns (string memory) { diff --git a/contracts/utils/Bytes.sol b/contracts/utils/Bytes.sol index 1234b845513..b5d2f81c3aa 100644 --- a/contracts/utils/Bytes.sol +++ b/contracts/utils/Bytes.sol @@ -68,6 +68,28 @@ library Bytes { } } + /** + * @dev Count number of occurrences of `search` at the beginning of `buffer`. + */ + function countLeading(bytes memory buffer, bytes1 search) internal pure returns (uint256) { + return countConsecutive(buffer, 0, search); + } + + /** + * @dev Count number of occurrences of `search` in `buffer`, starting from position `offset`. + */ + function countConsecutive(bytes memory buffer, uint256 offset, bytes1 search) internal pure returns (uint256 i) { + uint256 length = Math.saturatingSub(buffer.length, offset); + assembly ("memory-safe") { + for { + let ptr := add(add(buffer, 0x20), offset) + i := 0 + } and(iszero(shr(248, xor(mload(add(ptr, i)), search))), lt(i, length)) { + i := add(i, 1) + } {} + } + } + /** * @dev Copies the content of `buffer`, from `start` (included) to the end of `buffer` into a new bytes object in * memory. @@ -99,6 +121,35 @@ library Bytes { return result; } + /** + * @dev In place slice: moves the content of `buffer`, from `start` (included) to the end of `buffer` to the start of that buffer. + * + * NOTE: This function modifies the provided buffer in place. If you need to preserve the original buffer, use {slice} instead + */ + function splice(bytes memory buffer, uint256 start) internal pure returns (bytes memory) { + return splice(buffer, start, buffer.length); + } + + /** + * @dev In place slice: moves the content of `buffer`, from `start` (included) to end (excluded) to the start of that buffer. + * + * NOTE: This function modifies the provided buffer in place. If you need to preserve the original buffer, use {slice} instead + */ + function splice(bytes memory buffer, uint256 start, uint256 end) internal pure returns (bytes memory) { + // sanitize + uint256 length = buffer.length; + end = Math.min(end, length); + start = Math.min(start, end); + + // allocate and copy + assembly ("memory-safe") { + mcopy(add(buffer, 0x20), add(add(buffer, 0x20), start), sub(end, start)) + mstore(buffer, sub(end, start)) + } + + return buffer; + } + /** * @dev Reads a bytes32 from a bytes array without bounds checking. * diff --git a/contracts/utils/README.adoc b/contracts/utils/README.adoc index 8640e56fa51..0149c9019a7 100644 --- a/contracts/utils/README.adoc +++ b/contracts/utils/README.adoc @@ -24,6 +24,7 @@ Miscellaneous contracts and libraries containing utility functions you can use t * {Create2}: Wrapper around the https://blog.openzeppelin.com/getting-the-most-out-of-create2/[`CREATE2` EVM opcode] for safe use without having to deal with low-level assembly. * {Address}: Collection of functions for overloading Solidity's https://docs.soliditylang.org/en/latest/types.html#address[`address`] type. * {Arrays}: Collection of functions that operate on https://docs.soliditylang.org/en/latest/types.html#arrays[`arrays`]. + * {Base58}: On-chain base58 encoding and decoding. * {Base64}: On-chain base64 and base64URL encoding according to https://datatracker.ietf.org/doc/html/rfc4648[RFC-4648]. * {Bytes}: Common operations on bytes objects. * {Calldata}: Helpers for manipulating calldata. @@ -105,6 +106,8 @@ Ethereum contracts have no native concept of an interface, so applications must {{Arrays}} +{{Base58}} + {{Base64}} {{Bytes}} diff --git a/test/utils/Base58.t.sol b/test/utils/Base58.t.sol new file mode 100644 index 00000000000..f75a2d99436 --- /dev/null +++ b/test/utils/Base58.t.sol @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: MIT + +pragma solidity ^0.8.26; + +import {Test} from "forge-std/Test.sol"; +import {Base58} from "@openzeppelin/contracts/utils/Base58.sol"; + +contract Base58Test is Test { + function testEncodeDecodeEmpty() external pure { + assertEq(Base58.decode(Base58.encode("")), ""); + } + + function testEncodeDecodeZeros() external pure { + bytes memory zeros = hex"0000000000000000"; + assertEq(Base58.decode(Base58.encode(zeros)), zeros); + + bytes memory almostZeros = hex"00000000a400000000"; + assertEq(Base58.decode(Base58.encode(almostZeros)), almostZeros); + } + + function testEncodeDecode(bytes memory input) external pure { + assertEq(Base58.decode(Base58.encode(input)), input); + } +} diff --git a/test/utils/Base58.test.js b/test/utils/Base58.test.js new file mode 100644 index 00000000000..bb19fa90250 --- /dev/null +++ b/test/utils/Base58.test.js @@ -0,0 +1,60 @@ +const { ethers } = require('hardhat'); +const { expect } = require('chai'); +const { loadFixture } = require('@nomicfoundation/hardhat-network-helpers'); + +async function fixture() { + const mock = await ethers.deployContract('$Base58'); + return { mock }; +} + +describe('Base58', function () { + beforeEach(async function () { + Object.assign(this, await loadFixture(fixture)); + }); + + describe('base58', function () { + describe('encode/decode random buffers', function () { + // length 512 runs out of gas. + // this checks are very slow when running coverage, causing CI to timeout. + for (const length of [0, 1, 2, 3, 4, 32, 42, 128, 384]) + it( + [length > 32 && '[skip-on-coverage]', `buffer of length ${length}`].filter(Boolean).join(' '), + async function () { + const buffer = ethers.randomBytes(length); + const hex = ethers.hexlify(buffer); + const b58 = ethers.encodeBase58(buffer); + + await expect(this.mock.$encode(hex)).to.eventually.equal(b58); + await expect(this.mock.$decode(b58)).to.eventually.equal(hex); + }, + ); + }); + + // Tests case from section 5 of the (no longer active) Base58 Encoding Scheme RFC + // https://datatracker.ietf.org/doc/html/draft-msporny-base58-03 + describe('test vectors', function () { + for (const { raw, b58 } of [ + { raw: 'Hello World!', b58: '2NEpo7TZRRrLZSi2U' }, + { + raw: 'The quick brown fox jumps over the lazy dog.', + b58: 'USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z', + }, + { raw: '0x0000287fb4cd', b58: '11233QC4' }, + ]) + it(raw, async function () { + const buffer = (ethers.isHexString(raw) ? ethers.getBytes : ethers.toUtf8Bytes)(raw); + const hex = ethers.hexlify(buffer); + + await expect(this.mock.$encode(hex)).to.eventually.equal(b58); + await expect(this.mock.$decode(b58)).to.eventually.equal(hex); + }); + }); + + describe('decode invalid format', function () { + for (const chr of ['I', '-', '~']) + it(`Invalid base58 char ${chr}`, async function () { + await expect(this.mock.$decode(`VYRWKp${chr}pnN7`)).to.be.reverted; + }); + }); + }); +}); diff --git a/test/utils/Base64.test.js b/test/utils/Base64.test.js index 5c427466671..008b6b634db 100644 --- a/test/utils/Base64.test.js +++ b/test/utils/Base64.test.js @@ -11,7 +11,7 @@ async function fixture() { return { mock }; } -describe('Strings', function () { +describe('Base64', function () { beforeEach(async function () { Object.assign(this, await loadFixture(fixture)); }); @@ -27,8 +27,8 @@ describe('Strings', function () { ]) it(title, async function () { const buffer = Buffer.from(input, 'ascii'); - expect(await this.mock.$encode(buffer)).to.equal(ethers.encodeBase64(buffer)); - expect(await this.mock.$encode(buffer)).to.equal(expected); + await expect(this.mock.$encode(buffer)).to.eventually.equal(ethers.encodeBase64(buffer)); + await expect(this.mock.$encode(buffer)).to.eventually.equal(expected); }); }); @@ -43,8 +43,8 @@ describe('Strings', function () { ]) it(title, async function () { const buffer = Buffer.from(input, 'ascii'); - expect(await this.mock.$encodeURL(buffer)).to.equal(base64toBase64Url(ethers.encodeBase64(buffer))); - expect(await this.mock.$encodeURL(buffer)).to.equal(expected); + await expect(this.mock.$encodeURL(buffer)).to.eventually.equal(base64toBase64Url(ethers.encodeBase64(buffer))); + await expect(this.mock.$encodeURL(buffer)).to.eventually.equal(expected); }); }); @@ -53,7 +53,7 @@ describe('Strings', function () { const buffer32 = ethers.id('example'); const buffer31 = buffer32.slice(0, -2); - expect(await mock.encode(buffer31)).to.equal(ethers.encodeBase64(buffer31)); - expect(await mock.encode(buffer32)).to.equal(ethers.encodeBase64(buffer32)); + await expect(mock.encode(buffer31)).to.eventually.equal(ethers.encodeBase64(buffer31)); + await expect(mock.encode(buffer32)).to.eventually.equal(ethers.encodeBase64(buffer32)); }); }); diff --git a/test/utils/Bytes.test.js b/test/utils/Bytes.test.js index 52a1ae95e77..07682aec852 100644 --- a/test/utils/Bytes.test.js +++ b/test/utils/Bytes.test.js @@ -56,8 +56,37 @@ describe('Bytes', function () { }); }); - describe('slice', function () { - describe('slice(bytes, uint256)', function () { + describe('countConsecutive', function () { + it('empty buffer', async function () { + await expect(this.mock.$countConsecutive('0x', 0, '0x00')).to.eventually.equal(0); + }); + + it('no occurrence', async function () { + await expect(this.mock.$countConsecutive('0xa4f678', 0, '0x00')).to.eventually.equal(0); + await expect(this.mock.$countConsecutive('0x000000', 0, '0x01')).to.eventually.equal(0); + }); + + it('single occurrence', async function () { + await expect(this.mock.$countConsecutive('0xa4f678', 0, '0xa4')).to.eventually.equal(1); + await expect(this.mock.$countConsecutive('0xa4f678', 1, '0xf6')).to.eventually.equal(1); + await expect(this.mock.$countConsecutive('0xa4f678', 2, '0x78')).to.eventually.equal(1); + }); + + it('multiple occurrence', async function () { + await expect(this.mock.$countConsecutive('0xa4a4f6f6f6f678', 0, '0xa4')).to.eventually.equal(2); + await expect(this.mock.$countConsecutive('0xa4a4f6f6f6f678', 2, '0xf6')).to.eventually.equal(4); + await expect(this.mock.$countConsecutive('0x78787878787878', 0, '0x78')).to.eventually.equal(7); + await expect(this.mock.$countConsecutive('0x78787878787878', 3, '0x78')).to.eventually.equal(4); + }); + + it('out of bound offset', async function () { + await expect(this.mock.$countConsecutive('0x000000', 3, '0x00')).to.eventually.equal(0); + await expect(this.mock.$countConsecutive('0x000000', 42, '0x00')).to.eventually.equal(0); + }); + }); + + describe('slice & splice', function () { + describe('slice(bytes, uint256) & splice(bytes, uint256)', function () { for (const [descr, start] of Object.entries({ 'start = 0': 0, 'start within bound': 10, @@ -66,11 +95,12 @@ describe('Bytes', function () { it(descr, async function () { const result = ethers.hexlify(lorem.slice(start)); expect(await this.mock.$slice(lorem, start)).to.equal(result); + expect(await this.mock.$splice(lorem, start)).to.equal(result); }); } }); - describe('slice(bytes, uint256, uint256)', function () { + describe('slice(bytes, uint256, uint256) & splice(bytes, uint256, uint256)', function () { for (const [descr, [start, end]] of Object.entries({ 'start = 0': [0, 42], 'start and end within bound': [17, 42], @@ -81,6 +111,7 @@ describe('Bytes', function () { it(descr, async function () { const result = ethers.hexlify(lorem.slice(start, end)); expect(await this.mock.$slice(lorem, start, ethers.Typed.uint256(end))).to.equal(result); + expect(await this.mock.$splice(lorem, start, ethers.Typed.uint256(end))).to.equal(result); }); } });