-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
Copy pathstyle2music.yaml
59 lines (53 loc) · 1.2 KB
/
style2music.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# @package __global__
classifier_free_guidance:
training_dropout: 0.1
inference_coef: 3.0
attribute_dropout:
args:
active_on_eval: false
text:
description: 0.4
wav:
self_wav: 0.4
fuser:
cross_attention_pos_emb: false
cross_attention_pos_emb_scale: 1
sum: []
prepend: [self_wav, description]
cross: []
input_interpolate: []
conditioners:
self_wav:
model: style
style:
model_name: mert
transformer_scale: default
sample_rate: ${sample_rate}
encodec_checkpoint: '//pretrained/facebook/encodec_32khz'
encodec_n_q: 3
length: 3.0
ds_factor: 15 # Since MERT is 75Hz, 75/15 results into 5Hz representations
n_q_out: 6
eval_q: 3
q_dropout: true
bins: 1024
varying_lengths: [1.5, 4.5]
batch_norm: true
compute_mask: true
num_codebooks_lm: ${transformer_lm.n_q}
ds_rate_compression: 640
use_middle_of_segment: false
rvq_threshold_ema_dead_code: 0.1
description:
model: t5
t5:
name: t5-base
finetune: false
word_dropout: 0.2
normalize_text: false
dataset:
train:
merge_text_p: 0.25
drop_desc_p: 0.5
drop_other_p: 0.5
shuffle: true