Skip to content

Commit

Permalink
Merge pull request #19 from bitcoin-sv/feature/script_chunk_bug_fixs
Browse files Browse the repository at this point in the history
Fix: Use unsigned integer reading methods for PUSHDATA opcode lengths
  • Loading branch information
ALiberalVoluntarist authored Feb 28, 2025
2 parents 8257b5f + 70efbd4 commit 6dcb56d
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 4 deletions.
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,27 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
### Security
- (Notify of any improvements related to security vulnerabilities or potential risks.)

---
## [1.0.2] - 2025-02-28

### Added
- BIP32_DERIVATION_PATH environment variable for customizing default derivation paths
- Dedicated BIP32 derivation functions with clearer interface
- BIP44-specific functions that build on the BIP32 foundation
- Comprehensive tests for HD wallet key derivation consistency
- Enhanced error messages for hardened key derivation attempts from xpub keys

### Fixed
- PUSHDATA opcode length parsing now uses unsigned integer reading methods (read_uint8, read_uint16_le, read_uint32_le) instead of signed integer methods to prevent incorrect chunk parsing with large data lengths
- Proper handling of edge cases in Script parsing including length 0 and incomplete length specifications
- Serialization/deserialization consistency for various PUSHDATA operations

### Changed
- Refined HD wallet key derivation interface while maintaining backward compatibility
- Improved error messages for invalid derivation attempts
- Marked legacy derivation functions as deprecated while maintaining compatibility


---
## [1.0.1] - 2025-01-09

Expand Down
2 changes: 2 additions & 0 deletions bsv/keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ def verify_recoverable(
def sign_text(self, text: str) -> Tuple[str, str]:
"""sign arbitrary text with bitcoin private key
:returns: (p2pkh_address, stringified_recoverable_ecdsa_signature)
This function follows Bitcoin Signed Message Format.
For BRC-77, use signed_message.py instead.
"""
message: bytes = text_digest(text)
return self.address(), stringify_ecdsa_recoverable(self.sign_recoverable(message), self.compressed)
Expand Down
14 changes: 10 additions & 4 deletions bsv/script/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,18 @@ def _build_chunks(self):
data = None
if b'\x01' <= op <= b'\x4b':
data = reader.read_bytes(int.from_bytes(op, 'big'))
elif op == OpCode.OP_PUSHDATA1:
data = reader.read_bytes(reader.read_int8())
elif op == OpCode.OP_PUSHDATA1: # 0x4c
length = reader.read_uint8()
if length is not None:
data = reader.read_bytes(length)
elif op == OpCode.OP_PUSHDATA2:
data = reader.read_bytes(reader.read_int16_le())
length = reader.read_uint16_le()
if length is not None:
data = reader.read_bytes(length)
elif op == OpCode.OP_PUSHDATA4:
data = reader.read_bytes(reader.read_int32_le())
length = reader.read_uint32_le()
if length is not None:
data = reader.read_bytes(length)
chunk.data = data
self.chunks.append(chunk)

Expand Down
164 changes: 164 additions & 0 deletions tests/test_script_chunk_oppushdata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import pytest
from bsv.script.script import Script
from bsv.constants import OpCode


def test_script_build_chunks_pushdata_opcodes():
"""
Test that the Script._build_chunks method correctly handles PUSHDATA opcodes
when changing the reading method from byte-by-int to unit-based reading.
"""

# Test PUSHDATA1 with a length value that would be negative if incorrectly interpreted as signed
# 0xff = 255 bytes of data
pushdata1_high_length = b'\x4c\xff' + b'\x42' * 255
script_pushdata1 = Script(pushdata1_high_length)
assert len(script_pushdata1.chunks) == 1
assert script_pushdata1.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_pushdata1.chunks[0].data == b'\x42' * 255
assert len(script_pushdata1.chunks[0].data) == 255

# Test with smaller data sizes to ensure consistent behavior
pushdata1_75 = b'\x4c\xff' + b'\x42' * 75
script_pushdata1_75 = Script(pushdata1_75)
assert len(script_pushdata1_75.chunks) == 1
assert script_pushdata1_75.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_pushdata1_75.chunks[0].data == b'\x42' * 75

pushdata1_76 = b'\x4c\xff' + b'\x42' * 76
script_pushdata1_76 = Script(pushdata1_76)
assert len(script_pushdata1_76.chunks) == 1
assert script_pushdata1_76.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_pushdata1_76.chunks[0].data == b'\x42' * 76

# Test PUSHDATA2 with a length value that would be negative if incorrectly interpreted as signed
# 0xffff = 65535 bytes of data
pushdata2_high_length = b'\x4d\xff\xff' + b'\x42' * 65535
script_pushdata2 = Script(pushdata2_high_length)
assert len(script_pushdata2.chunks) == 1
assert script_pushdata2.chunks[0].op == OpCode.OP_PUSHDATA2
assert script_pushdata2.chunks[0].data == b'\x42' * 65535
assert len(script_pushdata2.chunks[0].data) == 65535

# Test with smaller data sizes for PUSHDATA2
pushdata2_255 = b'\x4d\xff\xff' + b'\x42' * 255
script_pushdata2_255 = Script(pushdata2_255)
assert len(script_pushdata2_255.chunks) == 1
assert script_pushdata2_255.chunks[0].op == OpCode.OP_PUSHDATA2
assert script_pushdata2_255.chunks[0].data == b'\x42' * 255

pushdata2_256 = b'\x4d\xff\xff' + b'\x42' * 256
script_pushdata2_256 = Script(pushdata2_256)
assert len(script_pushdata2_256.chunks) == 1
assert script_pushdata2_256.chunks[0].op == OpCode.OP_PUSHDATA2
assert script_pushdata2_256.chunks[0].data == b'\x42' * 256

# Test PUSHDATA4 with values that would be negative if interpreted as signed integers
# Test with very large value - 0x80000001 = 2,147,483,649 (would be -2,147,483,647 as signed int32)
# Note: This test may require significant memory
pushdata4_large_value = b'\x4e\x01\x00\x00\x80' + b'\x42' * 2147483649
script_pushdata4_large = Script(pushdata4_large_value)
assert len(script_pushdata4_large.chunks) == 1
assert script_pushdata4_large.chunks[0].op == OpCode.OP_PUSHDATA4
assert len(script_pushdata4_large.chunks[0].data) == 2147483649

# Test with smaller data sizes for PUSHDATA4
pushdata4_upper_half = b'\x4e\x00\x00\x00\xC0' + b'\x43' * 65535
script_pushdata4_upper_half = Script(pushdata4_upper_half)
assert len(script_pushdata4_upper_half.chunks) == 1
assert script_pushdata4_upper_half.chunks[0].op == OpCode.OP_PUSHDATA4
assert len(script_pushdata4_upper_half.chunks[0].data) == 65535

# Test with slightly larger data size
pushdata4_upper_half_2 = b'\x4e\x00\x00\x00\xC0' + b'\x43' * 65536
script_pushdata4_upper_half_2 = Script(pushdata4_upper_half_2)
assert len(script_pushdata4_upper_half_2.chunks) == 1
assert script_pushdata4_upper_half_2.chunks[0].op == OpCode.OP_PUSHDATA4
assert len(script_pushdata4_upper_half_2.chunks[0].data) == 65536

# Test boundary cases where the length is exactly at important thresholds
# PUSHDATA1 with length 0
pushdata1_zero = b'\x4c\x00'
script_pushdata1_zero = Script(pushdata1_zero)
assert len(script_pushdata1_zero.chunks) == 1
assert script_pushdata1_zero.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_pushdata1_zero.chunks[0].data == b''
assert len(script_pushdata1_zero.chunks[0].data) == 0

# Edge case: PUSHDATA with incomplete length specification
incomplete_pushdata1 = b'\x4c' # PUSHDATA1 without length byte
script_incomplete1 = Script(incomplete_pushdata1)
assert len(script_incomplete1.chunks) == 1
assert script_incomplete1.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_incomplete1.chunks[0].data is None

incomplete_pushdata2 = b'\x4d\xff' # PUSHDATA2 with incomplete length (only one byte)
script_incomplete2 = Script(incomplete_pushdata2)
assert len(script_incomplete2.chunks) == 1
assert script_incomplete2.chunks[0].op == OpCode.OP_PUSHDATA2
assert script_incomplete2.chunks[0].data == b''

# Edge case: PUSHDATA with specified length but insufficient data
insufficient_data1 = b'\x4c\x0A\x01\x02\x03' # PUSHDATA1 expecting 10 bytes but only 3 are provided
script_insufficient1 = Script(insufficient_data1)
assert len(script_insufficient1.chunks) == 1
assert script_insufficient1.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_insufficient1.chunks[0].data == b'\x01\x02\x03' # Should get the available data

# Multiple PUSHDATA opcodes in sequence to test parsing continuity
mixed_pushdata = (
b'\x4c\x03\x01\x02\x03' # PUSHDATA1 with 3 bytes
b'\x4d\x04\x00\x04\x05\x06\x07' # PUSHDATA2 with 4 bytes
b'\x02\x08\x09' # Direct push of 2 bytes
)
script_mixed = Script(mixed_pushdata)
assert len(script_mixed.chunks) == 3
assert script_mixed.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_mixed.chunks[0].data == b'\x01\x02\x03'
assert script_mixed.chunks[1].op == OpCode.OP_PUSHDATA2
assert script_mixed.chunks[1].data == b'\x04\x05\x06\x07'
assert script_mixed.chunks[2].op == b'\x02'
assert script_mixed.chunks[2].data == b'\x08\x09'


def test_script_serialization_with_pushdata():
"""
Test that serialization and deserialization of scripts with PUSHDATA opcodes work correctly.
This test verifies that scripts containing PUSHDATA opcodes can be:
1. Serialized back to their original binary form
2. Deserialized from binary to produce identical Script objects with properly parsed chunks
This ensures the round-trip integrity of Script objects with various PUSHDATA operations.
"""
# Create a script with various PUSHDATA opcodes and direct push data
original_script = (
b'\x4c\x03\x01\x02\x03' # PUSHDATA1 with 3 bytes
b'\x4d\x04\x00\x04\x05\x06\x07' # PUSHDATA2 with 4 bytes
b'\x02\x08\x09' # Direct push of 2 bytes
)

script = Script(original_script)

# Serialize and deserialize the script
serialized = script.serialize()
deserialized = Script(serialized)

# Verify the scripts are equivalent
assert serialized == original_script
assert deserialized.serialize() == original_script

# Check that the chunks are correctly parsed
assert len(deserialized.chunks) == 3
assert deserialized.chunks[0].op == OpCode.OP_PUSHDATA1
assert deserialized.chunks[0].data == b'\x01\x02\x03'
assert deserialized.chunks[1].op == OpCode.OP_PUSHDATA2
assert deserialized.chunks[1].data == b'\x04\x05\x06\x07'
assert deserialized.chunks[2].op == b'\x02'
assert deserialized.chunks[2].data == b'\x08\x09'


if __name__ == "__main__":
test_script_build_chunks_pushdata_opcodes()
test_script_serialization_with_pushdata()
print("All tests passed!")

0 comments on commit 6dcb56d

Please sign in to comment.