Skip to content

Commit

Permalink
<change> api json format
Browse files Browse the repository at this point in the history
  • Loading branch information
kirianguiller committed Nov 13, 2020
1 parent 7ab88eb commit 28c8ce6
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 15 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ z_sandbox/

# databases
app/*.sqlite
app/klang/data_dev
app/klang/data_test

.idea/
*.pyc
Expand Down
3 changes: 2 additions & 1 deletion app/klang/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def get(self) :
class ConllNameServiceResource(Resource):
"ConllService"

def get(self, conll_name) :
def get(self, conll_name):

conll_string = ConllService.get_by_name(conll_name)
sentences_string = ConllService.seperate_conll_sentences(conll_string)
sentences_audio_token = []
Expand Down
12 changes: 4 additions & 8 deletions app/klang/data_test/John_Doe/John_Doe.intervals.conll
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
# sent_id = John_Doe.intervals.conll__1
# text = it is a test conll
# sound_url = Veronique_Jezewski.wav
1 it it _ _ _ _ _ _ AlignBegin=162330|AlignEnd=162590
2 is is _ _ _ _ _ _ AlignBegin=162590|AlignEnd=162849
3 a a _ _ _ _ _ _ AlignBegin=162849|AlignEnd=163109
4 test test _ _ _ _ _ _ AlignBegin=163368|AlignEnd=163628
5 que que _ _ _ _ _ _ AlignBegin=163628|AlignEnd=163887
6 conll conll _ _ _ _ _ _ AlignBegin=163887|AlignEnd=164147
# text = it is
# sound_url = John_Doe.wav
1 it it _ _ _ _ _ _ AlignBegin=100|AlignEnd=500
2 is is _ _ _ _ _ _ AlignBegin=600|AlignEnd=1000
23 changes: 20 additions & 3 deletions app/klang/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
r"^\d+\t(.+?)\t.*AlignBegin=(\d+).*AlignEnd=(\d+)"
)


class ConllService:
@staticmethod
def get_path_data():
Expand Down Expand Up @@ -45,11 +46,27 @@ def seperate_conll_sentences(conll_string: str) -> List[str]:

@staticmethod
def sentence_to_audio_tokens(sentence_string: str):
audio_tokens = []
audio_tokens = {}
for line in sentence_string.split("\n"):
if line:
if not line.startswith("#"):
m = align_begin_and_end_regex.search(line)
audio_tokens += [(m.group(1), int(m.group(2)), int(m.group(3)))]

audio_token = {
"token": m.group(1),
"alignBegin": int(m.group(2)),
"alignEnd": int(m.group(3)),
}
audio_tokens[int(line.split("\t")[0])] = audio_token

print(audio_tokens)
return audio_tokens

@staticmethod
def process_sentences_audio_token(conll_name: str):
conll_string = ConllService.get_by_name(conll_name)
sentences_string = ConllService.seperate_conll_sentences(conll_string)
sentences_audio_token = []
for sentence_string in sentences_string:
audio_tokens = ConllService.sentence_to_audio_tokens(sentence_string)
sentences_audio_token.append(audio_tokens)
return sentences_audio_token
20 changes: 17 additions & 3 deletions app/klang/service_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,46 @@ def test_get_path_data():
print("KK path_data", path_data)
assert os.path.isdir(path_data)


def test_get_path_conll():
path_conll = ConllService.get_path_conll(file_name)
assert os.path.isfile(path_conll)


def test_read_conll():
path_conll = ConllService.get_path_conll(file_name)
conll_string = ConllService.read_conll(path_conll)
assert type(conll_string) == str


def test_get_all():
conlls = ConllService.get_all()
assert conlls == ["John_Doe"]


def test_get_by_name():
path_conll = ConllService.get_path_conll(file_name)
conll_string = ConllService.read_conll(path_conll)
assert conll_string == ConllService.get_by_name(file_name)


def test_seperate_conll_sentences():
conll_string = "# sent_id = test_sentence_1\n1\ttest_token\ntest_lemma\ntest_upos\n\n# sent_id = test_sentence_2\n1\ttest_token\ntest_lemma\ntest_upos\n\n"
sentences = ConllService.seperate_conll_sentences(conll_string)
assert len(sentences) == 2


def test_sentence_to_audio_tokens():
sentence = "# sent_id = test_sentence_1\n1\tdonc\tdonc\t_\t_\t_\t_\t_\t_\tAlignBegin=0|AlignEnd=454"
audio_tokens = ConllService.sentence_to_audio_tokens(sentence)
assert audio_tokens[0][0] == "donc"
assert audio_tokens[0][1] == 0
assert audio_tokens[0][2] == 454
assert audio_tokens[1]["token"] == "donc"
assert audio_tokens[1]["alignBegin"] == 0
assert audio_tokens[1]["alignEnd"] == 454


def test_process_sentences_audio_token():
sentences_audio_token = ConllService.process_sentences_audio_token(file_name)
assert sentences_audio_token == [{
1: {"token": "it", "alignBegin": 100, "alignEnd": 500},
2: {"token": "is", "alignBegin": 600, "alignEnd": 1000}
}]

0 comments on commit 28c8ce6

Please sign in to comment.