Skip to content

Commit

Permalink
compression/lowpass augmentation: better docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
racoiaws committed Feb 5, 2025
1 parent 7aeda0a commit dd8f8e0
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 12 deletions.
8 changes: 8 additions & 0 deletions lhotse/augmentation/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@

@dataclass
class Compress(AudioTransform):
"""
Modifies audio by running it through a lossy codec.
:param codec: Used lossy audio codec. One of ``"opus"``, ``"mp3"``, or ``"vorbis"``.
:param compression_level: The level of compression to apply. 0.0 is for the lowest amount of compression, 1.0 is for highest.
:return: The modified audio samples.
"""

codec: Literal["opus", "mp3", "vorbis"]
compression_level: float

Expand Down
6 changes: 6 additions & 0 deletions lhotse/augmentation/lowpass.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@

@dataclass
class Lowpass(AudioTransform):
"""
Apply a low-pass filter to signal.
:param frequency: The cutoff frequency of the low-pass filter.
"""

frequency: float

def __call__(
Expand Down
17 changes: 14 additions & 3 deletions lhotse/cut/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,10 +1059,13 @@ def compress(
compress_custom_fields: bool = False,
) -> "DataCut":
"""
Return a new ``DataCut`` that will lazily compress and decode audio with a lossy codec.
Return a copy of this Cut that has its Recordings processed by a lossy audio encoder.
:param codec: The new sampling rate.
:return: a modified copy of the current ``DataCut``.
:param codec: The codec to use for compression. Supported codecs are Opus, MP3, Vorbis.
:param compression_level: The level of compression (from 0.0 to 1.0, higher values correspond to higher compression).
:param compress_custom_fields: Whether to also compress any custom recording fields in the Cut.
:return: A modified :class:`~lhotse.DataCut` containing audio processed by a codec
"""
assert self.has_recording, "Cannot compress a DataCut without a Recording."

Expand All @@ -1085,6 +1088,14 @@ def compress(
)

def lowpass(self, frequency: float) -> "DataCut":
"""
Return a copy of this Cut that has its Recordings lowpassed.
:param frequency: Corner frequency for the lowpass filter.
:return: A modified :class:`~lhotse.DataCut` containing lowpassed audio
"""
assert self.has_recording, "Cannot lowpass a DataCut without a Recording."
return fastcopy(
self,
recording=self.recording.lowpass(frequency),
Expand Down
19 changes: 18 additions & 1 deletion lhotse/cut/mixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,16 @@ def compress(
compression_level: float = 0.99,
compress_custom_fields: bool = False,
):
assert self.has_recording, "Cannot compress a DataCut without a Recording."
"""
Return a copy of this Cut that has Recordings in its sub-Cuts processed by a lossy encoding.
:param codec: The codec to use for compression.
:param compression_level: The level of compression (from 0.0 to 1.0, higher values correspond to higher compression).
:param compress_custom_fields: Whether to also compress any custom recording fields in sub-Cuts.
:return: A modified :class:`~lhotse.MixedCut` containing audio processed by a codec
"""
assert self.has_recording, "Cannot compress a MixedCut without a Recording."

return MixedCut(
id=self.id,
Expand All @@ -708,6 +717,14 @@ def compress(
)

def lowpass(self, frequency: float) -> "MixedCut":
"""
Return a copy of this Cut that has its sub-Cut lowpassed.
:param frequency: Corner frequency for the lowpass filter.
:return: A modified :class:`~lhotse.MixedCut` containing lowpassed audio in its sub-Cuts
"""
assert self.has_recording, "Cannot lowpass a MixedCut without a Recording."
return MixedCut(
tracks=[
fastcopy(
Expand Down
18 changes: 13 additions & 5 deletions lhotse/dataset/cut_transforms/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,19 @@
@dataclass
class Compress:
"""
For every Cut,
1) randomly choose a codec from the codec list,
2) take compression level (uniformly sampled from interval if specified),
3) then apply the codec with chosen compression level with probability :attr:`p`
4) decode back to raw waveform
Applies a lossy compression algorithm filter to each Cut in a CutSet. The audio is decompressed back to raw waveforms.
The compression is applied with a probability of ``p``. The codec is
randomly selected the list of provided codecs,
with optional weights controlling the selection.
If compression level is provided as an interval,
then the actual value is sampled uniformly from the provided interval.
:param codecs: A list of codecs (supported: opus, mp3, vorbis)
:param compression_level: A single value or an interval. 0.0 = lowest compression (highest bitrate), 1.0 = highest compression (lowest bitrate)
:param codec_weights: Optional weights for each codec (default: equal weights).
:param p: The probability of applying the low-pass filter (default: 0.5).
:param randgen: An optional random number generator (default: a new instance).
"""

codecs: List[Literal["opus", "mp3", "vorbis"]]
Expand Down
13 changes: 10 additions & 3 deletions lhotse/dataset/cut_transforms/lowpass.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,16 @@
@dataclass
class Lowpass:
"""
For every Cut,
1) randomly choose a corner frequency from provided list
2) apply the lowpass filter with chosen frequency with probability p
Applies a low-pass filter to each Cut in a CutSet.
The filter is applied with a probability of ``p``. When applied, the filter
randomly selects a cutoff frequency from the list of provided frequencies,
with optional weights controlling the selection.
:param frequencies: A list of cutoff frequencies.
:param weights: Optional weights for each frequency (default: equal weights).
:param p: The probability of applying the low-pass filter (default: 0.5).
:param randgen: An optional random number generator (default: a new instance).
"""

frequencies: List[float]
Expand Down

0 comments on commit dd8f8e0

Please sign in to comment.