Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Use unsigned integer reading methods for PUSHDATA opcode lengths #19

Merged
merged 2 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,27 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
### Security
- (Notify of any improvements related to security vulnerabilities or potential risks.)

---
## [1.0.2] - 2025-02-28

### Added
- BIP32_DERIVATION_PATH environment variable for customizing default derivation paths
- Dedicated BIP32 derivation functions with clearer interface
- BIP44-specific functions that build on the BIP32 foundation
- Comprehensive tests for HD wallet key derivation consistency
- Enhanced error messages for hardened key derivation attempts from xpub keys

### Fixed
- PUSHDATA opcode length parsing now uses unsigned integer reading methods (read_uint8, read_uint16_le, read_uint32_le) instead of signed integer methods to prevent incorrect chunk parsing with large data lengths
- Proper handling of edge cases in Script parsing including length 0 and incomplete length specifications
- Serialization/deserialization consistency for various PUSHDATA operations

### Changed
- Refined HD wallet key derivation interface while maintaining backward compatibility
- Improved error messages for invalid derivation attempts
- Marked legacy derivation functions as deprecated while maintaining compatibility


---
## [1.0.1] - 2025-01-09

Expand Down
2 changes: 2 additions & 0 deletions bsv/keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ def verify_recoverable(
def sign_text(self, text: str) -> Tuple[str, str]:
"""sign arbitrary text with bitcoin private key
:returns: (p2pkh_address, stringified_recoverable_ecdsa_signature)
This function follows Bitcoin Signed Message Format.
For BRC-77, use signed_message.py instead.
"""
message: bytes = text_digest(text)
return self.address(), stringify_ecdsa_recoverable(self.sign_recoverable(message), self.compressed)
Expand Down
14 changes: 10 additions & 4 deletions bsv/script/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,18 @@ def _build_chunks(self):
data = None
if b'\x01' <= op <= b'\x4b':
data = reader.read_bytes(int.from_bytes(op, 'big'))
elif op == OpCode.OP_PUSHDATA1:
data = reader.read_bytes(reader.read_int8())
elif op == OpCode.OP_PUSHDATA1: # 0x4c
length = reader.read_uint8()
if length is not None:
data = reader.read_bytes(length)
elif op == OpCode.OP_PUSHDATA2:
data = reader.read_bytes(reader.read_int16_le())
length = reader.read_uint16_le()
if length is not None:
data = reader.read_bytes(length)
elif op == OpCode.OP_PUSHDATA4:
data = reader.read_bytes(reader.read_int32_le())
length = reader.read_uint32_le()
if length is not None:
data = reader.read_bytes(length)
chunk.data = data
self.chunks.append(chunk)

Expand Down
164 changes: 164 additions & 0 deletions tests/test_script_chunk_oppushdata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import pytest
from bsv.script.script import Script
from bsv.constants import OpCode


def test_script_build_chunks_pushdata_opcodes():
"""
Test that the Script._build_chunks method correctly handles PUSHDATA opcodes
when changing the reading method from byte-by-int to unit-based reading.
"""

# Test PUSHDATA1 with a length value that would be negative if incorrectly interpreted as signed
# 0xff = 255 bytes of data
pushdata1_high_length = b'\x4c\xff' + b'\x42' * 255
script_pushdata1 = Script(pushdata1_high_length)
assert len(script_pushdata1.chunks) == 1
assert script_pushdata1.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_pushdata1.chunks[0].data == b'\x42' * 255
assert len(script_pushdata1.chunks[0].data) == 255

# Test with smaller data sizes to ensure consistent behavior
pushdata1_75 = b'\x4c\xff' + b'\x42' * 75
script_pushdata1_75 = Script(pushdata1_75)
assert len(script_pushdata1_75.chunks) == 1
assert script_pushdata1_75.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_pushdata1_75.chunks[0].data == b'\x42' * 75

pushdata1_76 = b'\x4c\xff' + b'\x42' * 76
script_pushdata1_76 = Script(pushdata1_76)
assert len(script_pushdata1_76.chunks) == 1
assert script_pushdata1_76.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_pushdata1_76.chunks[0].data == b'\x42' * 76

# Test PUSHDATA2 with a length value that would be negative if incorrectly interpreted as signed
# 0xffff = 65535 bytes of data
pushdata2_high_length = b'\x4d\xff\xff' + b'\x42' * 65535
script_pushdata2 = Script(pushdata2_high_length)
assert len(script_pushdata2.chunks) == 1
assert script_pushdata2.chunks[0].op == OpCode.OP_PUSHDATA2
assert script_pushdata2.chunks[0].data == b'\x42' * 65535
assert len(script_pushdata2.chunks[0].data) == 65535

# Test with smaller data sizes for PUSHDATA2
pushdata2_255 = b'\x4d\xff\xff' + b'\x42' * 255
script_pushdata2_255 = Script(pushdata2_255)
assert len(script_pushdata2_255.chunks) == 1
assert script_pushdata2_255.chunks[0].op == OpCode.OP_PUSHDATA2
assert script_pushdata2_255.chunks[0].data == b'\x42' * 255

pushdata2_256 = b'\x4d\xff\xff' + b'\x42' * 256
script_pushdata2_256 = Script(pushdata2_256)
assert len(script_pushdata2_256.chunks) == 1
assert script_pushdata2_256.chunks[0].op == OpCode.OP_PUSHDATA2
assert script_pushdata2_256.chunks[0].data == b'\x42' * 256

# Test PUSHDATA4 with values that would be negative if interpreted as signed integers
# Test with very large value - 0x80000001 = 2,147,483,649 (would be -2,147,483,647 as signed int32)
# Note: This test may require significant memory
pushdata4_large_value = b'\x4e\x01\x00\x00\x80' + b'\x42' * 2147483649
script_pushdata4_large = Script(pushdata4_large_value)
assert len(script_pushdata4_large.chunks) == 1
assert script_pushdata4_large.chunks[0].op == OpCode.OP_PUSHDATA4
assert len(script_pushdata4_large.chunks[0].data) == 2147483649

# Test with smaller data sizes for PUSHDATA4
pushdata4_upper_half = b'\x4e\x00\x00\x00\xC0' + b'\x43' * 65535
script_pushdata4_upper_half = Script(pushdata4_upper_half)
assert len(script_pushdata4_upper_half.chunks) == 1
assert script_pushdata4_upper_half.chunks[0].op == OpCode.OP_PUSHDATA4
assert len(script_pushdata4_upper_half.chunks[0].data) == 65535

# Test with slightly larger data size
pushdata4_upper_half_2 = b'\x4e\x00\x00\x00\xC0' + b'\x43' * 65536
script_pushdata4_upper_half_2 = Script(pushdata4_upper_half_2)
assert len(script_pushdata4_upper_half_2.chunks) == 1
assert script_pushdata4_upper_half_2.chunks[0].op == OpCode.OP_PUSHDATA4
assert len(script_pushdata4_upper_half_2.chunks[0].data) == 65536

# Test boundary cases where the length is exactly at important thresholds
# PUSHDATA1 with length 0
pushdata1_zero = b'\x4c\x00'
script_pushdata1_zero = Script(pushdata1_zero)
assert len(script_pushdata1_zero.chunks) == 1
assert script_pushdata1_zero.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_pushdata1_zero.chunks[0].data == b''
assert len(script_pushdata1_zero.chunks[0].data) == 0

# Edge case: PUSHDATA with incomplete length specification
incomplete_pushdata1 = b'\x4c' # PUSHDATA1 without length byte
script_incomplete1 = Script(incomplete_pushdata1)
assert len(script_incomplete1.chunks) == 1
assert script_incomplete1.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_incomplete1.chunks[0].data is None

incomplete_pushdata2 = b'\x4d\xff' # PUSHDATA2 with incomplete length (only one byte)
script_incomplete2 = Script(incomplete_pushdata2)
assert len(script_incomplete2.chunks) == 1
assert script_incomplete2.chunks[0].op == OpCode.OP_PUSHDATA2
assert script_incomplete2.chunks[0].data == b''

# Edge case: PUSHDATA with specified length but insufficient data
insufficient_data1 = b'\x4c\x0A\x01\x02\x03' # PUSHDATA1 expecting 10 bytes but only 3 are provided
script_insufficient1 = Script(insufficient_data1)
assert len(script_insufficient1.chunks) == 1
assert script_insufficient1.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_insufficient1.chunks[0].data == b'\x01\x02\x03' # Should get the available data

# Multiple PUSHDATA opcodes in sequence to test parsing continuity
mixed_pushdata = (
b'\x4c\x03\x01\x02\x03' # PUSHDATA1 with 3 bytes
b'\x4d\x04\x00\x04\x05\x06\x07' # PUSHDATA2 with 4 bytes
b'\x02\x08\x09' # Direct push of 2 bytes
)
script_mixed = Script(mixed_pushdata)
assert len(script_mixed.chunks) == 3
assert script_mixed.chunks[0].op == OpCode.OP_PUSHDATA1
assert script_mixed.chunks[0].data == b'\x01\x02\x03'
assert script_mixed.chunks[1].op == OpCode.OP_PUSHDATA2
assert script_mixed.chunks[1].data == b'\x04\x05\x06\x07'
assert script_mixed.chunks[2].op == b'\x02'
assert script_mixed.chunks[2].data == b'\x08\x09'


def test_script_serialization_with_pushdata():
"""
Test that serialization and deserialization of scripts with PUSHDATA opcodes work correctly.

This test verifies that scripts containing PUSHDATA opcodes can be:
1. Serialized back to their original binary form
2. Deserialized from binary to produce identical Script objects with properly parsed chunks

This ensures the round-trip integrity of Script objects with various PUSHDATA operations.
"""
# Create a script with various PUSHDATA opcodes and direct push data
original_script = (
b'\x4c\x03\x01\x02\x03' # PUSHDATA1 with 3 bytes
b'\x4d\x04\x00\x04\x05\x06\x07' # PUSHDATA2 with 4 bytes
b'\x02\x08\x09' # Direct push of 2 bytes
)

script = Script(original_script)

# Serialize and deserialize the script
serialized = script.serialize()
deserialized = Script(serialized)

# Verify the scripts are equivalent
assert serialized == original_script
assert deserialized.serialize() == original_script

# Check that the chunks are correctly parsed
assert len(deserialized.chunks) == 3
assert deserialized.chunks[0].op == OpCode.OP_PUSHDATA1
assert deserialized.chunks[0].data == b'\x01\x02\x03'
assert deserialized.chunks[1].op == OpCode.OP_PUSHDATA2
assert deserialized.chunks[1].data == b'\x04\x05\x06\x07'
assert deserialized.chunks[2].op == b'\x02'
assert deserialized.chunks[2].data == b'\x08\x09'


if __name__ == "__main__":
test_script_build_chunks_pushdata_opcodes()
test_script_serialization_with_pushdata()
print("All tests passed!")