Skip to content

Commit 515a0da

Browse files
committed
wip
1 parent b7b04fb commit 515a0da

File tree

15 files changed

+531
-520
lines changed

15 files changed

+531
-520
lines changed

test/tts/components-rust/test-tts/src/lib.rs

Lines changed: 243 additions & 179 deletions
Large diffs are not rendered by default.

tts/deepgram/src/bindings.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
// Options used:
33
// * runtime_path: "wit_bindgen_rt"
44
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::advanced"
5-
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::voices"
65
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::synthesis"
76
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::types"
7+
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::voices"
88
// * generate_unused_types
99
use golem_tts::golem::tts::types as __with_name0;
1010
use golem_tts::golem::tts::voices as __with_name1;

tts/deepgram/src/deepgram.rs

Lines changed: 5 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ use serde::{Deserialize, Serialize};
1818
use crate::{
1919
error::{from_http_error, unsupported},
2020
resources::{
21-
DeepgramLongFormOperation, DeepgramPronunciationLexicon, VoiceResponse,
21+
DeepgramLongFormOperation, DeepgramPronunciationLexicon,
22+
VoiceResponse,
2223
},
2324
utils::estimate_duration,
2425
};
@@ -27,32 +28,7 @@ pub struct Deepgram {
2728
client: ApiClient,
2829
}
2930

30-
impl Deepgram {
31-
pub fn create_lexicon_unwrapped(
32-
&self,
33-
name: String,
34-
language: LanguageCode,
35-
entries: Option<Vec<PronunciationEntry>>,
36-
) -> Result<DeepgramPronunciationLexicon, TtsError> {
37-
let _ = (name, language, entries);
38-
unsupported("Deepgram does not support pronunciation lexicon")
39-
}
40-
41-
pub fn synthesize_long_form_unwrapped(
42-
&self,
43-
content: String,
44-
voice: String,
45-
output_location: String,
46-
chapter_breaks: Option<Vec<u32>>,
47-
) -> Result<DeepgramLongFormOperation, TtsError> {
48-
let _ = (content, voice, output_location, chapter_breaks);
49-
unsupported("Deepgram does not supported Async synthesis.")
50-
}
51-
}
52-
5331
impl TtsClient for Deepgram {
54-
type ClientSynthesisStream = ();
55-
type ClientVoiceConversionStream = ();
5632
type ClientLongFormOperation = DeepgramLongFormOperation;
5733
type ClientPronunciationLexicon = DeepgramPronunciationLexicon;
5834

@@ -351,9 +327,8 @@ impl TtsClient for Deepgram {
351327
_name: String,
352328
_language: LanguageCode,
353329
_entries: Option<Vec<PronunciationEntry>>,
354-
) -> Result<PronunciationLexicon, TtsError> {
355-
self.create_lexicon_unwrapped(_name, _language, _entries)
356-
.map(PronunciationLexicon::new)
330+
) -> Result<Self::ClientPronunciationLexicon, TtsError> {
331+
unsupported("Deepgram does not support pronunciation lexicon")
357332
}
358333

359334
fn synthesize_long_form(
@@ -363,12 +338,7 @@ impl TtsClient for Deepgram {
363338
_output_location: String,
364339
_chapter_breaks: Option<Vec<u32>>,
365340
) -> Result<Self::ClientLongFormOperation, TtsError> {
366-
self.synthesize_long_form_unwrapped(
367-
_content,
368-
_voice,
369-
_output_location,
370-
_chapter_breaks,
371-
)
341+
unsupported("Deepgram does not supported Async synthesis.")
372342
}
373343
}
374344

tts/deepgram/src/lib.rs

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,7 @@ use golem_tts::{
66
AudioSample, Guest as AdvancedGuest, LanguageCode, LongFormOperation,
77
PronunciationEntry, PronunciationLexicon, TtsError, Voice, VoiceDesignParams,
88
},
9-
synthesis::{
10-
Guest as SynthesisGuest, SynthesisOptions, SynthesisResult, TextInput, TimingInfo,
11-
ValidationResult,
12-
},
9+
synthesis::{Guest as SynthesisGuest, SynthesisOptions, SynthesisResult, TextInput, TimingInfo, ValidationResult},
1310
voices::{Guest as VoicesGuest, LanguageInfo, VoiceFilter},
1411
},
1512
};
@@ -115,7 +112,8 @@ impl AdvancedGuest for DeepgramComponent {
115112
entries: Option<Vec<PronunciationEntry>>,
116113
) -> Result<PronunciationLexicon, TtsError> {
117114
let deepgram = Deepgram::new()?;
118-
deepgram.create_lexicon(name, language, entries)
115+
let lexicon = deepgram.create_lexicon(name, language, entries)?;
116+
Ok(PronunciationLexicon::new(lexicon))
119117
}
120118

121119
#[doc = " Long-form content synthesis with optimization (removed async)"]
@@ -133,28 +131,6 @@ impl AdvancedGuest for DeepgramComponent {
133131
}
134132
}
135133

136-
impl ExtendedAdvancedTrait for DeepgramComponent {
137-
fn unwrappered_created_lexicon(
138-
name: String,
139-
language: LanguageCode,
140-
entries: Option<Vec<PronunciationEntry>>,
141-
) -> Result<Self::PronunciationLexicon, TtsError> {
142-
let deepgram = Deepgram::new()?;
143-
deepgram.create_lexicon_unwrapped(name, language, entries)
144-
}
145-
146-
fn unwrappered_synthesize_long_form(
147-
content: String,
148-
voice: Voice,
149-
output_location: String,
150-
chapter_breaks: Option<Vec<u32>>,
151-
) -> Result<Self::LongFormOperation, TtsError> {
152-
let deepgram = Deepgram::new()?;
153-
let voice_id = voice.id.clone();
154-
deepgram.synthesize_long_form_unwrapped(content, voice_id, output_location, chapter_breaks)
155-
}
156-
}
157-
158134
impl VoicesGuest for DeepgramComponent {
159135
#[doc = " List available voices with filtering and pagination"]
160136
fn list_voices(filter: Option<VoiceFilter>) -> Result<Vec<Voice>, TtsError> {
@@ -175,10 +151,31 @@ impl VoicesGuest for DeepgramComponent {
175151
}
176152
}
177153

178-
#[cfg(not(feature = "durability"))]
179-
type DurableDeepgramComponent = DurableTTS<Deepgram>;
154+
impl ExtendedAdvancedTrait for DeepgramComponent {
155+
fn unwrappered_created_lexicon(
156+
name: String,
157+
language: LanguageCode,
158+
entries: Option<Vec<PronunciationEntry>>,
159+
) -> Result<Self::PronunciationLexicon, golem_tts::golem::tts::types::TtsError> {
160+
let client = Deepgram::new()?;
161+
client.create_lexicon(name, language, entries)
162+
}
163+
164+
fn unwrappered_synthesize_long_form(
165+
content: String,
166+
voice: Voice,
167+
output_location: String,
168+
chapter_breaks: Option<Vec<u32>>,
169+
) -> Result<Self::LongFormOperation, golem_tts::golem::tts::types::TtsError> {
170+
let client = Deepgram::new()?;
171+
let voice_id = voice.id.clone();
172+
client.synthesize_long_form(content, voice_id, output_location, chapter_breaks)
173+
}
174+
}
175+
176+
177+
180178

181-
#[cfg(feature = "durability")]
182179
type DurableDeepgramComponent = DurableTTS<DeepgramComponent>;
183180

184181
golem_tts::export_tts!(DurableDeepgramComponent with_types_in golem_tts);

tts/elevenlabs/src/bindings.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT!
22
// Options used:
33
// * runtime_path: "wit_bindgen_rt"
4-
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::synthesis"
54
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::types"
65
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::voices"
6+
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::synthesis"
77
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::advanced"
88
// * generate_unused_types
99
use golem_tts::golem::tts::types as __with_name0;

tts/elevenlabs/src/elevenlabs.rs

Lines changed: 75 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ pub struct Elevenlabs {
3838

3939

4040
impl TtsClient for Elevenlabs {
41-
type ClientSynthesisStream = ();
42-
type ClientVoiceConversionStream = ();
41+
4342
type ClientLongFormOperation = ElLongFormSynthesis;
4443
type ClientPronunciationLexicon = ElPronunciationLexicon;
4544

@@ -772,18 +771,84 @@ impl TtsClient for Elevenlabs {
772771
name: String,
773772
language: LanguageCode,
774773
entries: Option<Vec<PronunciationEntry>>,
775-
) -> Result<PronunciationLexicon, TtsError> {
776-
let lexicon = self.create_lexicon_unwrapped(name, language, entries)?;
777-
Ok(PronunciationLexicon::new(lexicon))
774+
) -> Result<Self::ClientPronunciationLexicon, TtsError> {
775+
let description = Some(format!(
776+
"Pronunciation dictionary for {} language",
777+
match language.as_str() {
778+
"en" => "English",
779+
"es" => "Spanish",
780+
"fr" => "French",
781+
"de" => "German",
782+
"hi" => "Hindi",
783+
_ => "multilingual",
784+
}
785+
));
786+
787+
let rules = match entries {
788+
Some(entries) => entries
789+
.into_iter()
790+
.map(|entry| {
791+
// Check if pronunciation looks like IPA (contains special characters)
792+
if entry
793+
.pronunciation
794+
.chars()
795+
.any(|c| "əɪɛɔʊʌɑɒæɜɪʏøœɯɤɐɞɘɵɨɵʉɪʊ".contains(c))
796+
{
797+
PronunciationRule {
798+
string_to_replace: entry.word,
799+
rule_type: "phoneme".to_string(),
800+
alias: None,
801+
phoneme: Some(entry.pronunciation),
802+
alphabet: Some("ipa".to_string()),
803+
}
804+
} else {
805+
// Treat as alias if no IPA characters detected
806+
PronunciationRule {
807+
string_to_replace: entry.word,
808+
rule_type: "alias".to_string(),
809+
alias: Some(entry.pronunciation),
810+
phoneme: None,
811+
alphabet: None,
812+
}
813+
}
814+
})
815+
.collect(),
816+
None => vec![],
817+
};
818+
let request = CreateLexiconFromRulesRequest {
819+
rules,
820+
name: name.clone(),
821+
description: description
822+
.or_else(|| Some(format!("Pronunciation dictionary for {}", name))),
823+
workspace_access: Some("admin".to_string()),
824+
};
825+
826+
let response = self
827+
.client
828+
.retry_request::<CreateLexiconResponse, CreateLexiconFromRulesRequest, (), _>(
829+
Method::POST,
830+
"/v1/pronunciation-dictionaries/add-from-rules",
831+
request,
832+
None,
833+
from_http_error,
834+
)?;
835+
836+
Ok(ElPronunciationLexicon {
837+
id: response.id,
838+
name: response.name,
839+
language,
840+
version_id: RefCell::new(response.version_id),
841+
rules_count: RefCell::new(response.version_rules_num),
842+
})
778843
}
779844

780845
fn synthesize_long_form(
781846
&self,
782-
content: String,
783-
voice: String,
784-
output_location: String,
785-
chapter_breaks: Option<Vec<u32>>,
847+
_content: String,
848+
_voice: String,
849+
_output_location: String,
850+
_chapter_breaks: Option<Vec<u32>>,
786851
) -> Result<ElLongFormSynthesis, TtsError> {
787-
self.synthesize_long_form_unwrapped(content, voice, output_location, chapter_breaks)
852+
unsupported("Long-form synthesis not yet implemented for ElevenLabs TTS")
788853
}
789854
}

tts/elevenlabs/src/lib.rs

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ impl AdvancedGuest for ElevenLabsTtsComponent {
115115
entries: Option<Vec<PronunciationEntry>>,
116116
) -> Result<PronunciationLexicon, TtsError> {
117117
let client = Elevenlabs::new()?;
118-
client.create_lexicon(name, language, entries)
118+
let lexicon = client.create_lexicon(name, language, entries)?;
119+
Ok(PronunciationLexicon::new(lexicon))
119120
}
120121

121122
#[doc = " Long-form content synthesis with optimization (removed async)"]
@@ -129,27 +130,6 @@ impl AdvancedGuest for ElevenLabsTtsComponent {
129130
}
130131
}
131132

132-
impl ExtendedAdvancedTrait for ElevenLabsTtsComponent {
133-
fn unwrappered_created_lexicon(
134-
name: String,
135-
language: LanguageCode,
136-
entries: Option<Vec<PronunciationEntry>>,
137-
) -> Result<Self::PronunciationLexicon, TtsError> {
138-
let client = Elevenlabs::new()?;
139-
client.create_lexicon(name, language, entries)
140-
}
141-
142-
fn unwrappered_synthesize_long_form(
143-
content: String,
144-
voice: Voice,
145-
output_location: String,
146-
chapter_breaks: Option<Vec<u32>>,
147-
) -> Result<Self::LongFormOperation, TtsError> {
148-
let client = Elevenlabs::new()?;
149-
let voice_name = voice.id.clone();
150-
client.synthesize_long_form(content, voice_name, output_location, chapter_breaks)
151-
}
152-
}
153133

154134
impl VoicesGuest for ElevenLabsTtsComponent {
155135
#[doc = " List available voices with filtering and pagination"]
@@ -170,11 +150,28 @@ impl VoicesGuest for ElevenLabsTtsComponent {
170150
client.list_languages()
171151
}
172152
}
153+
impl ExtendedAdvancedTrait for ElevenLabsTtsComponent {
154+
fn unwrappered_created_lexicon(
155+
name: String,
156+
language: LanguageCode,
157+
entries: Option<Vec<PronunciationEntry>>,
158+
) -> Result<Self::PronunciationLexicon,TtsError> {
159+
let client = Elevenlabs::new()?;
160+
client.create_lexicon(name, language, entries)
161+
}
173162

174-
#[cfg(not(feature = "durability"))]
175-
type DurableElevenLabsTtsComponent = DurableTTS<Elevenlabs>;
163+
fn unwrappered_synthesize_long_form(
164+
content: String,
165+
voice: Voice,
166+
output_location: String,
167+
chapter_breaks: Option<Vec<u32>>,
168+
) -> Result<Self::LongFormOperation, TtsError> {
169+
let client = Elevenlabs::new()?;
170+
let voice_id = voice.id.clone();
171+
client.synthesize_long_form(content, voice_id, output_location, chapter_breaks)
172+
}
173+
}
176174

177-
#[cfg(feature = "durability")]
178175
type DurableElevenLabsTtsComponent = DurableTTS<ElevenLabsTtsComponent>;
179176

180177
golem_tts::export_tts!(DurableElevenLabsTtsComponent with_types_in golem_tts);

tts/google/src/bindings.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// Generated by `wit-bindgen` 0.41.0. DO NOT EDIT!
22
// Options used:
33
// * runtime_path: "wit_bindgen_rt"
4-
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::advanced"
54
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::synthesis"
65
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::voices"
76
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::types"
7+
// * with "golem:tts/[email protected]" = "golem_tts::golem::tts::advanced"
88
// * generate_unused_types
99
use golem_tts::golem::tts::types as __with_name0;
1010
use golem_tts::golem::tts::voices as __with_name1;

0 commit comments

Comments
 (0)