11import asyncio
2- import copy
32import io
43import wave
54from enum import Enum
@@ -168,6 +167,61 @@ def __init__(
168167 self .time_base : Optional [float ] = time_base
169168 self .channels : int = channels
170169
170+ def __repr__ (self ) -> str :
171+ """
172+ Return a string representation of the PcmData object.
173+
174+ Returns:
175+ str: String representation
176+ """
177+ return str (self )
178+
179+ def __str__ (self ) -> str :
180+ """
181+ Return a user-friendly string representation of the PcmData object.
182+
183+ Returns:
184+ str: Human-readable description of the audio data
185+ """
186+ # Get sample count
187+ if self .samples .ndim == 2 :
188+ sample_count = (
189+ self .samples .shape [1 ]
190+ if self .samples .shape [0 ] == self .channels
191+ else self .samples .shape [0 ]
192+ )
193+ else :
194+ sample_count = len (self .samples )
195+
196+ # Get channel description
197+ if self .channels == 1 :
198+ channel_desc = "Mono"
199+ elif self .channels == 2 :
200+ channel_desc = "Stereo"
201+ else :
202+ channel_desc = f"{ self .channels } -channel"
203+
204+ # Get duration
205+ duration_s = self .duration
206+ if duration_s >= 1.0 :
207+ duration_str = f"{ duration_s :.2f} s"
208+ else :
209+ duration_str = f"{ self .duration_ms :.1f} ms"
210+
211+ # Format description
212+ format_desc = (
213+ "16-bit PCM"
214+ if self .format == "s16"
215+ else "32-bit float"
216+ if self .format == "f32"
217+ else self .format
218+ )
219+
220+ return (
221+ f"{ channel_desc } audio: { self .sample_rate } Hz, { format_desc } , "
222+ f"{ sample_count } samples, { duration_str } "
223+ )
224+
171225 @property
172226 def stereo (self ) -> bool :
173227 return self .channels == 2
@@ -180,53 +234,27 @@ def duration(self) -> float:
180234 Returns:
181235 float: Duration in seconds.
182236 """
183- # The samples field contains a numpy array of audio samples
237+ # The samples field is always a numpy array of audio samples
184238 # For s16 format, each element in the array is one sample (int16)
185239 # For f32 format, each element in the array is one sample (float32)
186240
187- if isinstance (self .samples , np .ndarray ):
188- # If array has shape (channels, samples) or (samples, channels), duration uses the samples dimension
189- if self .samples .ndim == 2 :
190- # Determine which dimension is samples vs channels
191- # Standard format is (channels, samples), but we need to handle both
192- ch = self .channels if self .channels else 1
193- if self .samples .shape [0 ] == ch :
194- # Shape is (channels, samples) - correct format
195- num_samples = self .samples .shape [1 ]
196- elif self .samples .shape [1 ] == ch :
197- # Shape is (samples, channels) - transposed format
198- num_samples = self .samples .shape [0 ]
199- else :
200- # Ambiguous or unknown - assume (channels, samples) and pick larger dimension
201- # This handles edge cases like (2, 2) arrays
202- num_samples = max (self .samples .shape [0 ], self .samples .shape [1 ])
203- else :
204- num_samples = len (self .samples )
205- elif isinstance (self .samples , bytes ):
206- # If samples is bytes, calculate based on format
207- if self .format == "s16" :
208- # For s16 format, each sample is 2 bytes (16 bits)
209- # For multi-channel, divide by channels to get sample count
210- num_samples = len (self .samples ) // (
211- 2 * (self .channels if self .channels else 1 )
212- )
213- elif self .format == "f32" :
214- # For f32 format, each sample is 4 bytes (32 bits)
215- num_samples = len (self .samples ) // (
216- 4 * (self .channels if self .channels else 1 )
217- )
241+ # If array has shape (channels, samples) or (samples, channels), duration uses the samples dimension
242+ if self .samples .ndim == 2 :
243+ # Determine which dimension is samples vs channels
244+ # Standard format is (channels, samples), but we need to handle both
245+ ch = self .channels if self .channels else 1
246+ if self .samples .shape [0 ] == ch :
247+ # Shape is (channels, samples) - correct format
248+ num_samples = self .samples .shape [1 ]
249+ elif self .samples .shape [1 ] == ch :
250+ # Shape is (samples, channels) - transposed format
251+ num_samples = self .samples .shape [0 ]
218252 else :
219- # Default assumption for other formats (treat as raw bytes)
220- num_samples = len (self .samples )
253+ # Ambiguous or unknown - assume (channels, samples) and pick larger dimension
254+ # This handles edge cases like (2, 2) arrays
255+ num_samples = max (self .samples .shape [0 ], self .samples .shape [1 ])
221256 else :
222- # Fallback: try to get length
223- try :
224- num_samples = len (self .samples )
225- except TypeError :
226- logger .warning (
227- f"Cannot determine sample count for type { type (self .samples )} "
228- )
229- return 0.0
257+ num_samples = len (self .samples )
230258
231259 # Calculate duration based on sample rate
232260 return num_samples / self .sample_rate
@@ -607,38 +635,14 @@ def to_float32(self) -> "PcmData":
607635 # If already f32 format, return self without modification
608636 if self .format in (AudioFormat .F32 , "f32" , "float32" ):
609637 # Additional check: verify the samples are actually float32
610- if (
611- isinstance (self .samples , np .ndarray )
612- and self .samples .dtype == np .float32
613- ):
638+ if self .samples .dtype == np .float32 :
614639 return self
615640
616641 arr = self .samples
617642
618- # Normalize to a numpy array for conversion
619- if not isinstance (arr , np .ndarray ):
620- try :
621- # Round-trip through bytes to reconstruct canonical ndarray shape
622- arr = PcmData .from_bytes (
623- self .to_bytes (),
624- sample_rate = self .sample_rate ,
625- format = self .format ,
626- channels = self .channels ,
627- ).samples
628- except Exception :
629- # Fallback to from_data for robustness
630- arr = PcmData .from_data (
631- self .samples ,
632- sample_rate = self .sample_rate ,
633- format = self .format ,
634- channels = self .channels ,
635- ).samples
636-
637643 # Convert to float32 and scale if needed
638644 fmt = (self .format or "" ).lower ()
639- if fmt in ("s16" , "int16" ) or (
640- isinstance (arr , np .ndarray ) and arr .dtype == np .int16
641- ):
645+ if fmt in ("s16" , "int16" ) or arr .dtype == np .int16 :
642646 arr_f32 = arr .astype (np .float32 ) / 32768.0
643647 else :
644648 # Ensure dtype float32; values assumed already in [-1, 1]
@@ -672,35 +676,14 @@ def to_int16(self) -> "PcmData":
672676 # If already s16 format, return self without modification
673677 if self .format in (AudioFormat .S16 , "s16" , "int16" ):
674678 # Additional check: verify the samples are actually int16
675- if isinstance ( self . samples , np . ndarray ) and self .samples .dtype == np .int16 :
679+ if self .samples .dtype == np .int16 :
676680 return self
677681
678682 arr = self .samples
679683
680- # Normalize to a numpy array for conversion
681- if not isinstance (arr , np .ndarray ):
682- try :
683- # Round-trip through bytes to reconstruct canonical ndarray shape
684- arr = PcmData .from_bytes (
685- self .to_bytes (),
686- sample_rate = self .sample_rate ,
687- format = self .format ,
688- channels = self .channels ,
689- ).samples
690- except Exception :
691- # Fallback to from_data for robustness
692- arr = PcmData .from_data (
693- self .samples ,
694- sample_rate = self .sample_rate ,
695- format = self .format ,
696- channels = self .channels ,
697- ).samples
698-
699684 # Convert to int16 and scale if needed
700685 fmt = (self .format or "" ).lower ()
701- if fmt in ("f32" , "float32" ) or (
702- isinstance (arr , np .ndarray ) and arr .dtype == np .float32
703- ):
686+ if fmt in ("f32" , "float32" ) or arr .dtype == np .float32 :
704687 # Convert float32 in [-1, 1] to int16
705688 arr_s16 = (np .clip (arr , - 1.0 , 1.0 ) * 32767.0 ).astype (np .int16 )
706689 else :
@@ -738,16 +721,9 @@ def _is_empty(arr: Any) -> bool:
738721 except Exception :
739722 return False
740723
741- # Normalize numpy arrays from bytes-like if needed
724+ # Samples are always numpy arrays
742725 def _ensure_ndarray (pcm : "PcmData" ) -> np .ndarray :
743- if isinstance (pcm .samples , np .ndarray ):
744- return pcm .samples
745- return PcmData .from_bytes (
746- pcm .to_bytes (),
747- sample_rate = pcm .sample_rate ,
748- format = pcm .format ,
749- channels = pcm .channels ,
750- ).samples
726+ return pcm .samples
751727
752728 # Adjust other to match sample rate and channels first
753729 other_adj = other
@@ -868,9 +844,7 @@ def copy(self) -> "PcmData":
868844 return PcmData (
869845 sample_rate = self .sample_rate ,
870846 format = self .format ,
871- samples = self .samples .copy ()
872- if isinstance (self .samples , np .ndarray )
873- else copy .deepcopy (self .samples ),
847+ samples = self .samples .copy (),
874848 pts = self .pts ,
875849 dts = self .dts ,
876850 time_base = self .time_base ,
@@ -1055,29 +1029,19 @@ def chunks(
10551029 >>> len(chunks) # [0:4], [2:6], [4:8], [6:10], [8:10]
10561030 5
10571031 """
1058- # Ensure we have a 1D array for simpler chunking
1059- if isinstance (self .samples , np .ndarray ):
1060- if self .samples .ndim == 2 and self .channels == 1 :
1061- samples = self .samples .flatten ()
1062- elif self .samples .ndim == 2 :
1063- # For multi-channel, work with channel-major format
1064- samples = self .samples
1065- else :
1066- samples = self .samples
1032+ # Normalize sample array shape
1033+ if self .samples .ndim == 2 and self .channels == 1 :
1034+ samples = self .samples .flatten ()
1035+ elif self .samples .ndim == 2 :
1036+ # For multi-channel, work with channel-major format
1037+ samples = self .samples
10671038 else :
1068- # Convert bytes/other to ndarray first
1069- temp = PcmData .from_bytes (
1070- self .to_bytes (),
1071- sample_rate = self .sample_rate ,
1072- format = self .format ,
1073- channels = self .channels ,
1074- )
1075- samples = temp .samples
1039+ samples = self .samples
10761040
10771041 # Handle overlap
10781042 step = max (1 , chunk_size - overlap )
10791043
1080- if self .channels > 1 and isinstance ( samples , np . ndarray ) and samples .ndim == 2 :
1044+ if self .channels > 1 and samples .ndim == 2 :
10811045 # Multi-channel case: chunk along the samples axis
10821046 num_samples = samples .shape [1 ]
10831047 for i in range (0 , num_samples , step ):
@@ -1116,9 +1080,7 @@ def chunks(
11161080 )
11171081 else :
11181082 # Mono or 1D case
1119- samples_1d = (
1120- samples .flatten () if isinstance (samples , np .ndarray ) else samples
1121- )
1083+ samples_1d = samples .flatten () if samples .ndim > 1 else samples
11221084 total_samples = len (samples_1d )
11231085
11241086 for i in range (0 , total_samples , step ):
@@ -1152,9 +1114,7 @@ def chunks(
11521114 pts = chunk_pts ,
11531115 dts = self .dts ,
11541116 time_base = self .time_base ,
1155- channels = 1
1156- if isinstance (chunk_samples , np .ndarray ) and chunk_samples .ndim == 1
1157- else self .channels ,
1117+ channels = 1 if chunk_samples .ndim == 1 else self .channels ,
11581118 )
11591119
11601120 def sliding_window (
@@ -1220,16 +1180,8 @@ def tail(
12201180 if pad_at not in ("start" , "end" ):
12211181 raise ValueError (f"pad_at must be 'start' or 'end', got { pad_at !r} " )
12221182
1223- # Get samples array
1183+ # Get samples array (always ndarray)
12241184 samples = self .samples
1225- if not isinstance (samples , np .ndarray ):
1226- # Convert to ndarray first
1227- samples = PcmData .from_bytes (
1228- self .to_bytes (),
1229- sample_rate = self .sample_rate ,
1230- format = self .format ,
1231- channels = self .channels ,
1232- ).samples
12331185
12341186 # Handle multi-channel audio
12351187 if samples .ndim == 2 and self .channels > 1 :
@@ -1328,16 +1280,8 @@ def head(
13281280 if pad_at not in ("start" , "end" ):
13291281 raise ValueError (f"pad_at must be 'start' or 'end', got { pad_at !r} " )
13301282
1331- # Get samples array
1283+ # Get samples array (always ndarray)
13321284 samples = self .samples
1333- if not isinstance (samples , np .ndarray ):
1334- # Convert to ndarray first
1335- samples = PcmData .from_bytes (
1336- self .to_bytes (),
1337- sample_rate = self .sample_rate ,
1338- format = self .format ,
1339- channels = self .channels ,
1340- ).samples
13411285
13421286 # Handle multi-channel audio
13431287 if samples .ndim == 2 and self .channels > 1 :
0 commit comments