Skip to content

Commit

Permalink
fixed reading files into energy
Browse files Browse the repository at this point in the history
  • Loading branch information
drfeinberg committed Aug 11, 2022
1 parent 3606e0a commit 56ff188
Show file tree
Hide file tree
Showing 2 changed files with 499 additions and 1,353 deletions.
308 changes: 154 additions & 154 deletions Voicelab/toolkits/Voicelab/MeasureEnergyNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ def process(self):
frameshift: int = 1 # variables.frameshift
time_praat: pd.DataFrame
f0_praat: pd.DataFrame
time_praat, f0_praat = get_raw_pitch(audio_file_path)
f0: np.array = refine_pitch_voice_sauce(time_praat, f0_praat)
time_praat, f0_praat = self.get_raw_pitch(audio_file_path)
f0: np.array = self.refine_pitch_voice_sauce(time_praat, f0_praat)
self.f0: np.array = f0
signal, sampling_rate = self.args['voice']
sound: parselmouth.Sound = parselmouth.Sound(signal, sampling_rate)
Expand All @@ -96,7 +96,7 @@ def process(self):
# Calculate Energy

try:
energy: Union[np.array, str, list] = get_energy_voice_sauce(audio_file_path)
energy: Union[np.array, str, list] = self.get_energy_voice_sauce(audio_file_path)
except Exception as e:
energy = str(e)

Expand Down Expand Up @@ -129,159 +129,159 @@ def process(self):
}


def get_energy_voice_sauce(audio_file_path: str) -> Union[np.array, str]:
"""Get energy from Voice Sauce formula
def get_energy_voice_sauce(self, audio_file_path: str) -> Union[np.array, str]:
"""Get energy from Voice Sauce formula
:param audio_file_path: path to audio file
:type audio_file_path: str
:return: energy: Energy values or error message
:rtype: Union[np.array, str]
"""
:param audio_file_path: path to audio file
:type audio_file_path: str
:return: energy: Energy values or error message
:rtype: Union[np.array, str]
"""

# Get the number of periods in the signal
n_periods: int = 5 # Nperiods_EC
frameshift: int = 1 # variables.frameshift
time_praat, f0_praat = get_raw_pitch(audio_file_path)
f0 = refine_pitch_voice_sauce(time_praat, f0_praat)
signal, sampling_rate = self.args['voice']
sound: parselmouth.Sound = parselmouth.Sound(signal, sampling_rate)
sound.resample(16000)
y = sound.values.T
fs = sound.sampling_frequency
sampleshift: float = (fs / 1000 * frameshift)

# Calculate Energy
energy: np.array = np.full(len(f0), np.nan)
for k, f0_curr in enumerate(f0):
ks: Union[float, int] = round_half_away_from_zero(k * sampleshift)
if ks <= 0:
continue
if ks >= len(y):
continue

f0_curr: Union[float, int] = f0[k]
if np.isnan(f0_curr):
continue
if f0_curr == 0:
continue
n0_curr: Union[float, int] = fs / f0_curr
ystart: int = int(round_half_away_from_zero(ks - n_periods / 2 * n0_curr))
yend: int = int(round_half_away_from_zero(ks + n_periods / 2 * n0_curr) - 1)

if ystart <= 0:
continue

if yend > len(y):
continue

yseg: np.array = y[ystart:yend]
energy[k] = np.sum(yseg ** 2)
return energy


def get_raw_pitch(audio_file_path: str) -> tuple[pd.DataFrame, pd.DataFrame]:
"""Get raw pitch from Praat. This is used to set the window length for the energy calculation.
:argument: audio_file_path: path to the audio file
:type: str
:return: time, f0
:rtype: tuple[pd.DataFrame, pd.DataFrame]
"""
signal, sampling_rate = self.args['voice']
sound: parselmouth.Sound = parselmouth.Sound(signal, sampling_rate)
sound.resample(16000)
pitch: parselmouth.Pitch = sound.to_pitch_cc(
time_step=0.001,
pitch_floor=40,
pitch_ceiling=500,
)
pitch_tier: parselmouth.Data = call(pitch, "Down to PitchTier")
call(pitch_tier, "Write to headerless spreadsheet file", "parselmouth_cc.txt")
df: pd.DataFrame = pd.read_csv('parselmouth_cc.txt', sep='\t', header=None)
df.columns = ['Time', 'Frequency']
return df.Time.values, df.Frequency.values


def refine_pitch_voice_sauce(times: pd.DataFrame, frequencies: pd.DataFrame) -> np.array:
"""Refine praat Pitch to remove undefined values, and interpolate values to match our time step.
:argument: times: np.array
:type: times: np.array
:argument: frequencies: np.array
:type: frequencies: np.array
:return: f0: refined fundamental frequency values
:rtype: np.array
# Get the number of periods in the signal
n_periods: int = 5 # Nperiods_EC
frameshift: int = 1 # variables.frameshift
time_praat, f0_praat = self.get_raw_pitch(audio_file_path)
f0 = self.refine_pitch_voice_sauce(time_praat, f0_praat)
signal, sampling_rate = self.args['voice']
sound: parselmouth.Sound = parselmouth.Sound(signal, sampling_rate)
sound.resample(16000)
y = sound.values.T
fs = sound.sampling_frequency
sampleshift: float = (fs / 1000 * frameshift)

# Calculate Energy
energy: np.array = np.full(len(f0), np.nan)
for k, f0_curr in enumerate(f0):
ks: Union[float, int] = self.round_half_away_from_zero(k * sampleshift)
if ks <= 0:
continue
if ks >= len(y):
continue

f0_curr: Union[float, int] = f0[k]
if np.isnan(f0_curr):
continue
if f0_curr == 0:
continue
n0_curr: Union[float, int] = fs / f0_curr
ystart: int = int(self.round_half_away_from_zero(ks - n_periods / 2 * n0_curr))
yend: int = int(self.round_half_away_from_zero(ks + n_periods / 2 * n0_curr) - 1)

if ystart <= 0:
continue

if yend > len(y):
continue

yseg: np.array = y[ystart:yend]
energy[k] = np.sum(yseg ** 2)
return energy


def get_raw_pitch(self, audio_file_path: str) -> tuple[pd.DataFrame, pd.DataFrame]:
"""Get raw pitch from Praat. This is used to set the window length for the energy calculation.
:argument: audio_file_path: path to the audio file
:type: str
:return: time, f0
:rtype: tuple[pd.DataFrame, pd.DataFrame]
"""
signal, sampling_rate = self.args['voice']
sound: parselmouth.Sound = parselmouth.Sound(signal, sampling_rate)
sound.resample(16000)
pitch: parselmouth.Pitch = sound.to_pitch_cc(
time_step=0.001,
pitch_floor=40,
pitch_ceiling=500,
)
pitch_tier: parselmouth.Data = call(pitch, "Down to PitchTier")
call(pitch_tier, "Write to headerless spreadsheet file", "parselmouth_cc.txt")
df: pd.DataFrame = pd.read_csv('parselmouth_cc.txt', sep='\t', header=None)
df.columns = ['Time', 'Frequency']
return df.Time.values, df.Frequency.values


def refine_pitch_voice_sauce(self, times: pd.DataFrame, frequencies: pd.DataFrame) -> np.array:
"""Refine praat Pitch to remove undefined values, and interpolate values to match our time step.
:argument: times: np.array
:type: times: np.array
:argument: frequencies: np.array
:type: frequencies: np.array
:return: f0: refined fundamental frequency values
:rtype: np.array
"""
"""

# Licensed under Apache v2 (see LICENSE)
# Based on VoiceSauce files func_PraatPitch.m (authored by Yen-Liang Shue
# and Kristine Yu) and func_PraatFormants.m (authored by Yen-Liang Shue and
# Kristine Yu)


# Praat will sometimes set numerical values to the string '--undefined--'
# But NumPy can't have a string in a float array, so we convert the
# '--undefined--' values to NaN
# Python 3 reads the undefined strings as byte literals, so we also have to
# check for the byte literal b'--undefined--'
# undef = lambda x: np.nan if x == '--undefined--' or x == b'--undefined--' else x ### this function is not used
frame_shift: Union[float, int] = 1
frame_precision: Union[float, int] = 1
# Gather raw Praat f0 estimates
t_raw: np.array
f0_raw: np.array
t_raw, f0_raw = np.array(times), np.array(frequencies)
data_len: int = len(t_raw)
# Initialize f0 measurement vector with NaN
f0: np.array = np.full(data_len, 0, dtype=float)
# Convert time from seconds to nearest whole millisecond
t_raw_ms: np.int_ = np.int_(round_half_away_from_zero(t_raw * 1000))

# Raw Praat estimates are at time points that don't completely match
# the time points in our measurement vectors, so we need to interpolate.
# We use a crude interpolation method, that has precision set by
# frame_precision.

# Determine start and stop times
start: int = 0
if t_raw_ms[-1] % frame_shift == 0:
stop: Union[float, int] = t_raw_ms[-1] + frame_shift
else:
stop = t_raw_ms[-1]
# Iterate through timepoints corresponding to each frame in time range
for idx_f, t_f in enumerate(range(start, stop, frame_shift)):
# Find closest time point among calculated Praat values
min_idx: np.ndarray[int] = np.argmin(np.abs(t_raw_ms - t_f))

# If closest time point is too far away, skip
if np.abs(t_raw_ms[min_idx] - t_f) > frame_precision * frame_shift:
continue

# If index is in range, set value of f0
if (idx_f >= 0) and (idx_f < data_len): # pragma: no branch
f0[idx_f] = f0_raw[min_idx]
return f0


def round_half_away_from_zero(x) -> np.int_:
"""Rounds a number according to round half away from zero method
:argument x: number to round
:type x: Union[float, int]
:return: rounded number
:rtype: np.int_
For example:
- round_half_away_from_zero(3.5) = 4
- round_half_away_from_zero(3.2) = 3
- round_half_away_from_zero(-2.7) = -3
- round_half_away_from_zero(-4.3) = -4
The reason for writing our own rounding function is that NumPy uses the round-half-to-even method. There is a Python round() function, but it doesn't work on NumPy vectors. So we wrote our own round-half-away-from-zero method here.
"""
q: np.int_ = np.int_(np.sign(x) * np.floor(np.abs(x) + 0.5))
# Licensed under Apache v2 (see LICENSE)
# Based on VoiceSauce files func_PraatPitch.m (authored by Yen-Liang Shue
# and Kristine Yu) and func_PraatFormants.m (authored by Yen-Liang Shue and
# Kristine Yu)


# Praat will sometimes set numerical values to the string '--undefined--'
# But NumPy can't have a string in a float array, so we convert the
# '--undefined--' values to NaN
# Python 3 reads the undefined strings as byte literals, so we also have to
# check for the byte literal b'--undefined--'
# undef = lambda x: np.nan if x == '--undefined--' or x == b'--undefined--' else x ### this function is not used
frame_shift: Union[float, int] = 1
frame_precision: Union[float, int] = 1
# Gather raw Praat f0 estimates
t_raw: np.array
f0_raw: np.array
t_raw, f0_raw = np.array(times), np.array(frequencies)
data_len: int = len(t_raw)
# Initialize f0 measurement vector with NaN
f0: np.array = np.full(data_len, 0, dtype=float)
# Convert time from seconds to nearest whole millisecond
t_raw_ms: np.int_ = np.int_(self.round_half_away_from_zero(t_raw * 1000))

# Raw Praat estimates are at time points that don't completely match
# the time points in our measurement vectors, so we need to interpolate.
# We use a crude interpolation method, that has precision set by
# frame_precision.

# Determine start and stop times
start: int = 0
if t_raw_ms[-1] % frame_shift == 0:
stop: Union[float, int] = t_raw_ms[-1] + frame_shift
else:
stop = t_raw_ms[-1]
# Iterate through timepoints corresponding to each frame in time range
for idx_f, t_f in enumerate(range(start, stop, frame_shift)):
# Find closest time point among calculated Praat values
min_idx: np.ndarray[int] = np.argmin(np.abs(t_raw_ms - t_f))

# If closest time point is too far away, skip
if np.abs(t_raw_ms[min_idx] - t_f) > frame_precision * frame_shift:
continue

# If index is in range, set value of f0
if (idx_f >= 0) and (idx_f < data_len): # pragma: no branch
f0[idx_f] = f0_raw[min_idx]
return f0


def round_half_away_from_zero(self, x) -> np.int_:
"""Rounds a number according to round half away from zero method
:argument x: number to round
:type x: Union[float, int]
:return: rounded number
:rtype: np.int_
For example:
- round_half_away_from_zero(3.5) = 4
- round_half_away_from_zero(3.2) = 3
- round_half_away_from_zero(-2.7) = -3
- round_half_away_from_zero(-4.3) = -4
The reason for writing our own rounding function is that NumPy uses the round-half-to-even method. There is a Python round() function, but it doesn't work on NumPy vectors. So we wrote our own round-half-away-from-zero method here.
"""
q: np.int_ = np.int_(np.sign(x) * np.floor(np.abs(x) + 0.5))

return q
return q
Loading

0 comments on commit 56ff188

Please sign in to comment.