Skip to content

Commit 4b56be8

Browse files
committed
vfp: support pitch and audio track generation
1 parent 5f53137 commit 4b56be8

File tree

4 files changed

+110
-18
lines changed

4 files changed

+110
-18
lines changed

libresvip/plugins/ustx/ustx_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def parse_notes(self, notes: list[UNote], tick_prefix: int) -> list[Note]:
168168
note.pronunciation = to_romaji(ustx_note.lyric)
169169
elif (chinese_char := CHINESE_RE.search(ustx_note.lyric)) is not None:
170170
note.pronunciation = " ".join(pypinyin.lazy_pinyin(chinese_char.group()))
171-
else:
171+
elif not ustx_note.lyric.startswith("+"):
172172
note.pronunciation = ustx_note.lyric.removeprefix("?")
173173
if prev_ustx_note is not None:
174174
if prev_ustx_note.end > ustx_note.position:

libresvip/plugins/vfp/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ class VOXFactoryClipBase(BaseModel):
4242
audio_data_key: Optional[str] = Field(None, alias="audioDataKey")
4343
audio_data_order: list[str] = Field(default_factory=list, alias="audioDataOrder")
4444
audio_data_quarter: float = Field(0, alias="audioDataQuarter")
45-
note_bank: dict[str, VOXFactoryNote] = Field(alias="noteBank")
46-
note_order: list[str] = Field(alias="noteOrder")
45+
note_bank: dict[str, VOXFactoryNote] = Field(default_factory=dict, alias="noteBank")
46+
note_order: list[str] = Field(default_factory=list, alias="noteOrder")
4747
next_note_index: int = Field(0, alias="nextNoteIndex")
4848
pinned_audio_data_order: list[str] = Field(default_factory=list, alias="pinnedAudioDataOrder")
4949
metadata: Optional[VOXFactoryMetadata] = None

libresvip/plugins/vfp/vox_factory_generator.py

Lines changed: 106 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,32 @@
11
import dataclasses
2+
import functools
23
import math
34
import pathlib
45
import secrets
56

6-
from libresvip.core.constants import DEFAULT_BPM, TICKS_IN_BEAT
7+
import more_itertools
8+
import portion
9+
10+
from libresvip.core.constants import DEFAULT_BPM, DEFAULT_PHONEME, TICKS_IN_BEAT
11+
from libresvip.core.time_interval import PiecewiseIntervalDict
712
from libresvip.core.time_sync import TimeSynchronizer
8-
from libresvip.model.base import Note, Project, SingingTrack, SongTempo, TimeSignature, Track
13+
from libresvip.model.base import (
14+
InstrumentalTrack,
15+
Note,
16+
ParamCurve,
17+
Params,
18+
Project,
19+
SongTempo,
20+
TimeSignature,
21+
Track,
22+
)
23+
from libresvip.utils.audio import audio_track_info
24+
from libresvip.utils.music_math import linear_interpolation
925

1026
from .model import (
27+
VOXFactoryAudioClip,
28+
VOXFactoryAudioData,
29+
VOXFactoryAudioTrack,
1130
VOXFactoryNote,
1231
VOXFactoryProject,
1332
VOXFactoryTrack,
@@ -23,17 +42,20 @@ class VOXFactoryGenerator:
2342
prefix: str = dataclasses.field(init=False)
2443
audio_paths: dict[str, pathlib.Path] = dataclasses.field(default_factory=dict)
2544
synchronizer: TimeSynchronizer = dataclasses.field(init=False)
45+
first_bar_length: int = dataclasses.field(init=False)
2646

2747
def generate_project(self, project: Project) -> VOXFactoryProject:
2848
self.prefix = secrets.token_hex(5)
49+
self.first_bar_length = int(project.time_signature_list[0].bar_length())
2950
self.synchronizer = TimeSynchronizer(project.song_tempo_list)
30-
vox_project = VOXFactoryProject(
51+
track_bank, audio_data_bank = self.generate_tracks(project.track_list)
52+
return VOXFactoryProject(
3153
tempo=self.generate_tempo(project.song_tempo_list),
3254
time_signature=self.generate_time_signature(project.time_signature_list),
33-
track_bank=self.generate_tracks(project.track_list),
55+
track_bank=track_bank,
56+
track_order=sorted(track_bank.keys()),
57+
audio_data_bank=audio_data_bank,
3458
)
35-
vox_project.track_order = sorted(vox_project.track_bank.keys())
36-
return vox_project
3759

3860
def generate_tempo(self, tempos: list[SongTempo]) -> float:
3961
return tempos[0].bpm if tempos else DEFAULT_BPM
@@ -44,25 +66,60 @@ def generate_time_signature(self, time_signatures: list[TimeSignature]) -> list[
4466
else:
4567
return [4, 4]
4668

47-
def generate_tracks(self, tracks: list[Track]) -> dict[str, VOXFactoryTrack]:
69+
def generate_tracks(
70+
self, tracks: list[Track]
71+
) -> tuple[dict[str, VOXFactoryTrack], dict[str, VOXFactoryAudioData]]:
4872
track_bank = {}
73+
audio_data_bank = {}
4974
for i, track in enumerate(tracks):
50-
if isinstance(track, SingingTrack):
51-
clip_bank = self.generate_notes(track.note_list)
75+
if isinstance(track, InstrumentalTrack):
76+
audio_path = pathlib.Path(track.audio_file_path)
77+
if (track_info := audio_track_info(track.audio_file_path)) is not None:
78+
source_audio_data_key = f"{self.prefix}-au{i}{audio_path.suffix}"
79+
self.audio_paths[source_audio_data_key] = audio_path
80+
audio_data_bank[source_audio_data_key] = VOXFactoryAudioData(
81+
sample_rate=track_info.sampling_rate,
82+
sample_length=int(track_info.duration * track_info.sampling_rate / 1000),
83+
number_of_channels=track_info.channel_s,
84+
)
85+
clip_bank = {
86+
f"{self.prefix}-cl0": VOXFactoryAudioClip(
87+
name=audio_path.stem,
88+
offset_quarter=0,
89+
start_quarter=track.offset / TICKS_IN_BEAT,
90+
length=track_info.duration / 1000,
91+
source_audio_data_key=source_audio_data_key,
92+
)
93+
}
94+
clip_order = [f"{self.prefix}-cl0"]
95+
track_bank[f"{self.prefix}-tr{i}"] = VOXFactoryAudioTrack(
96+
clip_bank=clip_bank,
97+
clip_order=clip_order,
98+
name=track.title,
99+
mute=track.mute,
100+
solo=track.solo,
101+
pan=track.pan,
102+
)
103+
else:
104+
clip_bank = self.generate_notes(track.note_list, track.edited_params)
52105
clip_order = sorted(clip_bank.keys())
53106
track_bank[f"{self.prefix}-tr{i}"] = VOXFactoryVocalTrack(
54107
clip_bank=clip_bank,
55108
clip_order=clip_order,
109+
name=track.title,
110+
mute=track.mute,
111+
solo=track.solo,
112+
pan=track.pan,
56113
)
57-
return track_bank
114+
return track_bank, audio_data_bank
58115

59-
def generate_notes(self, notes: list[Note]) -> dict[str, VOXFactoryVocalClip]:
116+
def generate_notes(self, notes: list[Note], params: Params) -> dict[str, VOXFactoryVocalClip]:
60117
note_bank = {}
61118
note_order = []
62119
max_ticks = notes[-1].end_pos if notes else 0
63120
max_quarter = max_ticks / TICKS_IN_BEAT
64121
for i, note in enumerate(notes):
65-
note_bank[f"{self.prefix}-no{i}"] = self.generate_note(note)
122+
note_bank[f"{self.prefix}-no{i}"] = self.generate_note(note, params)
66123
note_order.append(f"{self.prefix}-no{i}")
67124
clip_count = math.ceil(max_quarter / 32)
68125
clip_bank = {}
@@ -76,13 +133,48 @@ def generate_notes(self, notes: list[Note]) -> dict[str, VOXFactoryVocalClip]:
76133
)
77134
return clip_bank
78135

79-
def generate_note(self, note: Note) -> VOXFactoryNote:
136+
def generate_note(self, note: Note, params: Params) -> VOXFactoryNote:
80137
note_start_time = self.synchronizer.get_actual_secs_from_ticks(note.start_pos)
81138
return VOXFactoryNote(
82139
time=note_start_time,
83140
ticks=note.start_pos,
84141
duration_ticks=note.length,
85142
midi=note.key_number,
86143
name=note.lyric,
87-
syllable=note.pronunciation,
144+
syllable=note.pronunciation or DEFAULT_PHONEME,
145+
pitch_bends=self.generate_note_pitch(note, params.pitch),
88146
)
147+
148+
def generate_note_pitch(self, note: Note, pitch: ParamCurve) -> list[float]:
149+
note_start_time = self.synchronizer.get_actual_secs_from_ticks(note.start_pos)
150+
note_end_time = self.synchronizer.get_actual_secs_from_ticks(note.end_pos)
151+
key_interval_dict = PiecewiseIntervalDict()
152+
secs_step = 1024 / 44100
153+
prev_secs = None
154+
prev_key: float = -1
155+
for point in pitch.points.root:
156+
if point.x - self.first_bar_length < note.start_pos:
157+
continue
158+
elif point.x - self.first_bar_length > note.end_pos:
159+
break
160+
if point.y == -100:
161+
prev_secs = None
162+
prev_key = 0
163+
else:
164+
secs = self.synchronizer.get_actual_secs_from_ticks(point.x - self.first_bar_length)
165+
key = point.y / 100
166+
if prev_secs is not None:
167+
key_interval_dict[portion.openclosed(prev_secs, secs)] = functools.partial(
168+
linear_interpolation,
169+
start=(prev_secs, prev_key - note.key_number),
170+
end=(secs, key - note.key_number),
171+
)
172+
else:
173+
key_interval_dict[portion.singleton(secs)] = key - note.key_number
174+
prev_secs = secs
175+
prev_key = key
176+
pitch_bends = [
177+
key_interval_dict.get(secs, 0)
178+
for secs in more_itertools.numeric_range(note_start_time, note_end_time, secs_step)
179+
]
180+
return pitch_bends if any(pitch_bends) else []

libresvip/plugins/vpr/vpr_generator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def generate_tracks(self, track_list: list[Track]) -> list[VocaloidTracks]:
164164
is_solo_mode=track.solo,
165165
)
166166
)
167-
elif isinstance(track, SingingTrack):
167+
else:
168168
singing_track_found = True
169169
notes = [
170170
VocaloidNotes(

0 commit comments

Comments
 (0)