Skip to content

Commit 542876b

Browse files
committed
feat: extend OutputStream, AvroOutputFile and ManifestWriter with __len__ method
1 parent 19809d0 commit 542876b

File tree

4 files changed

+21
-3
lines changed

4 files changed

+21
-3
lines changed

pyiceberg/avro/file.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ class AvroOutputFile(Generic[D]):
228228
encoder: BinaryEncoder
229229
sync_bytes: bytes
230230
writer: Writer
231+
closed: bool
231232

232233
def __init__(
233234
self,
@@ -247,6 +248,7 @@ def __init__(
247248
else resolve_writer(record_schema=record_schema, file_schema=self.file_schema)
248249
)
249250
self.metadata = metadata
251+
self.closed = False
250252

251253
def __enter__(self) -> AvroOutputFile[D]:
252254
"""
@@ -267,6 +269,7 @@ def __exit__(
267269
) -> None:
268270
"""Perform cleanup when exiting the scope of a 'with' statement."""
269271
self.output_stream.close()
272+
self.closed = True
270273

271274
def _write_header(self) -> None:
272275
json_schema = json.dumps(AvroSchemaConversion().iceberg_to_avro(self.file_schema, schema_name=self.schema_name))
@@ -285,3 +288,9 @@ def write_block(self, objects: List[D]) -> None:
285288
self.encoder.write_int(len(block_content))
286289
self.encoder.write(block_content)
287290
self.encoder.write(self.sync_bytes)
291+
292+
def __len__(self) -> int:
293+
"""Returns the total number number of bytes written."""
294+
if self.closed:
295+
return len(self.output_file)
296+
return len(self.output_stream)

pyiceberg/io/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ def __exit__(
124124
) -> None:
125125
"""Perform cleanup when exiting the scope of a 'with' statement."""
126126

127+
@abstractmethod
128+
def __len__(self) -> int:
129+
"""Returns the total number number of bytes written to the stream."""
130+
127131

128132
class InputFile(ABC):
129133
"""A base class for InputFile implementations.

pyiceberg/manifest.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,11 @@ def add_entry(self, entry: ManifestEntry) -> ManifestWriter:
777777
self._writer.write_block([self.prepare_entry(entry)])
778778
return self
779779

780+
def __len__(self) -> int:
781+
"""Returns the total number number of bytes written."""
782+
return len(self._writer)
783+
784+
780785

781786
class RollingManifestWriter:
782787
closed: bool
@@ -829,7 +834,7 @@ def _should_roll_to_new_file(self) -> bool:
829834
return False
830835
return (
831836
self._current_file_rows >= self._target_number_of_rows
832-
or len(self._current_writer._output_file) >= self._target_file_size_in_bytes
837+
or len(self._current_writer) >= self._target_file_size_in_bytes
833838
)
834839

835840
def _close_current_writer(self) -> None:

tests/utils/test_manifest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,8 +484,8 @@ def test_write_manifest(
484484
[
485485
(19514, 388873, 1), # should not roll over
486486
(19513, 388873, 2), # should roll over due to target_rows
487-
(19514, 388872, 2), # should roll over due target_bytes
488-
(19513, 388872, 2), # should roll over due to target_rows and target_bytes
487+
(4000, 388872, 2), # should roll over due target_bytes
488+
(4000, 388872, 2), # should roll over due to target_rows and target_bytes
489489
],
490490
)
491491
def test_rolling_manifest_writer(

0 commit comments

Comments
 (0)