diff --git a/app/lexicon/controller.py b/app/lexicon/controller.py index a81de9c..2ba7ac7 100644 --- a/app/lexicon/controller.py +++ b/app/lexicon/controller.py @@ -4,7 +4,7 @@ from app.projects.service import ProjectService from app.user.service import UserService -from app.utils.grew_utils import GrewService, grew_request +from app.utils.grew_utils import GrewService, SampleExportService, grew_request from flask import Response, abort, current_app, request from flask_login import current_user from flask_restx import Namespace, Resource, reqparse @@ -13,7 +13,6 @@ from app.samples.service import ( SampleEvaluationService, SampleExerciseLevelService, - SampleExportService, SampleRoleService, SampleUploadService, ) @@ -39,7 +38,6 @@ def post(self, project_name: str): "getLexicon", data={"project_id": project_name, "sample_ids": json.dumps(sample_names)}, ) - print("KK reply", reply) for i in reply["data"]: x = {"key": i["form"] + i["lemma"] + i["POS"] + i["features"] + i["gloss"]} i.update(x) diff --git a/app/samples/controller.py b/app/samples/controller.py index 53fafe5..b39bef0 100644 --- a/app/samples/controller.py +++ b/app/samples/controller.py @@ -8,7 +8,7 @@ from app.projects.service import ProjectService from app.user.service import UserService -from app.utils.grew_utils import GrewService, grew_request +from app.utils.grew_utils import GrewService, SampleExportService, grew_request from app.config import Config from flask import Response, abort, current_app, request, send_from_directory from flask_restx import Namespace, Resource, reqparse @@ -18,7 +18,6 @@ from .service import ( SampleEvaluationService, SampleExerciseLevelService, - SampleExportService, SampleRoleService, SampleUploadService, add_or_keep_timestamps, @@ -169,38 +168,11 @@ def post(self, project_name: str): data = request.get_json(force=True) sample_names = data["samples"] print("requested zip", sample_names, project_name) - sampletrees = list() - samplecontentfiles = list() - - for sample_name in sample_names: - reply = grew_request( - "getConll", - data={"project_id": project_name, "sample_id": sample_name}, - ) - if reply.get("status") == "OK": - - # {"sent_id_1":{"conlls":{"user_1":"conllstring"}}} - sample_tree = SampleExportService.servSampleTrees(reply.get("data", {})) - sample_tree_nots_noui = SampleExportService.servSampleTrees(reply.get("data", {}), timestamps=False, user_ids=False) - print("sample_tree", sample_tree) - sample_content = SampleExportService.sampletree2contentfile(sample_tree_nots_noui) - for sent_id in sample_tree: - last = SampleExportService.get_last_user( - sample_tree[sent_id]["conlls"] - ) - sample_content["last"] = sample_content.get("last", []) + [ - sample_tree_nots_noui[sent_id]["conlls"][last] - ] - - # gluing back the trees - sample_content["last"] = "\n".join(sample_content.get("last", "")) - samplecontentfiles.append(sample_content) - - else: - print("Error: {}".format(reply.get("message"))) + + sample_names, samples_with_string_content = GrewService.get_samples_with_string_contents(project_name, sample_names) memory_file = SampleExportService.contentfiles2zip( - sample_names, samplecontentfiles + sample_names, samples_with_string_content ) resp = Response( @@ -235,39 +207,6 @@ def get(self, project_name: str): # reply = requests.get("http://127.0.0.1:8001/testBoot/") return reply.text - def __getSamples(self, sample_names, project_name): - samplecontentfiles = list() - - for sample_name in sample_names: - reply = grew_request( - "getConll", - data={"project_id": project_name, "sample_id": sample_name}, - ) - if reply.get("status") == "OK": - # {"sent_id_1":{"conlls":{"user_1":"conllstring"}}} - sample_tree = SampleExportService.servSampleTrees(reply.get("data", {})) - sample_tree_nots_noui = SampleExportService.servSampleTrees(reply.get("data", {}), timestamps=False, user_ids=False) - # print("sample_tree", sample_tree) - sample_content = SampleExportService.sampletree2contentfile(sample_tree_nots_noui) - for sent_id in sample_tree: - last = SampleExportService.get_last_user( - sample_tree[sent_id]["conlls"] - ) - # print(sample_content.keys()) - sample_content["last"] = sample_content.get("last", []) + [ - sample_tree_nots_noui[sent_id]["conlls"][last] - ] - # print(sample_content["last"]) - - # gluing back the trees - sample_content["last"] = "\n".join(sample_content["last"]) - samplecontentfiles.append(sample_content["last"]) - - else: - print("Error: {}".format(reply.get("message"))) - - return [sample_names, samplecontentfiles] - def post(self, project_name: str): param = request.get_json(force=True) @@ -278,12 +217,11 @@ def post(self, project_name: str): default_to_parse = [ n for n in all_sample_names if n not in train_samp_names ] if param['to_parse'] == 'ALL' else param['to_parse'] #get samples - train_name, train_set = self.__getSamples(train_samp_names, project_name) + train_name, train_set = GrewService.get_samples_with_string_contents(project_name, train_samp_names) #TODO assure parse_name not empty - parse_name, to_parse = self.__getSamples(default_to_parse, project_name) + parse_name, to_parse = GrewService.get_samples_with_string_contents(project_name, default_to_parse) # return to_parse - data = { 'project_name': project_name, 'train_name': train_name, diff --git a/app/samples/service.py b/app/samples/service.py index f98c325..adcf723 100644 --- a/app/samples/service.py +++ b/app/samples/service.py @@ -1,20 +1,15 @@ from typing import List from app.utils.conllup import ConllProcessor -import io -import json import os import re -import time -import zipfile + from datetime import datetime from app import db from app.config import MAX_TOKENS, Config from app.user.model import User -from app.utils.conll3 import conllFile2trees, trees2conllFile -from app.utils.grew_utils import GrewService, grew_request -from flask import abort, current_app -from sqlalchemy.sql.operators import startswith_op +from app.utils.grew_utils import GrewService +from flask import abort from werkzeug.utils import secure_filename from werkzeug.datastructures import FileStorage from .model import SampleExerciseLevel, SampleRole @@ -50,71 +45,6 @@ def upload( GrewService.save_sample(project_name, sample_name, file_to_save) -# TODO : refactor this -class SampleExportService: - @staticmethod - def servSampleTrees(samples, timestamps=True, user_ids=True): - """ get samples in form of json trees """ - trees = {} - for sentId, users in samples.items(): - for user_id, conll in users.items(): - # tree = conll3.ConllProcessor.sentence_conll_to_sentence_json(conll) - if sentId not in trees: - trees[sentId] = {"conlls": {}} - - # Adapt user_id or timestamps lines depending on options - if not user_ids: conll = re.sub("# user_id = .+\n", '', conll) - if not timestamps: conll = re.sub("# timestamp = .+\n", '', conll) - - trees[sentId]["conlls"][user_id] = conll - return trees - - @staticmethod - def sampletree2contentfile(tree): - if isinstance(tree, str): - tree = json.loads(tree) - usertrees = dict() - for sentId in tree.keys(): - for user, conll in tree[sentId]["conlls"].items(): - if user not in usertrees: - usertrees[user] = list() - usertrees[user].append(conll) - for user, content in usertrees.items(): - usertrees[user] = "\n".join(usertrees[user]) - return usertrees - - @staticmethod - def get_last_user(tree): - timestamps = [(user, get_timestamp(conll)) for (user, conll) in tree.items()] - if len(timestamps) == 1: - last = timestamps[0][0] - else: - # print(timestamps) - last = sorted(timestamps, key=lambda x: x[1])[-1][0] - # print(last) - return last - - @staticmethod - def contentfiles2zip(sample_names, sampletrees): - memory_file = io.BytesIO() - with zipfile.ZipFile(memory_file, "w") as zf: - for sample_name, sample in zip(sample_names, sampletrees): - for fuser, filecontent in sample.items(): - data = zipfile.ZipInfo("{}.{}.conllu".format(sample_name, fuser)) - data.date_time = time.localtime(time.time())[:6] - data.compress_type = zipfile.ZIP_DEFLATED - zf.writestr(data, filecontent) - memory_file.seek(0) - return memory_file - - -# TODO : refactor this -def get_timestamp(conll): - t = re.search("# timestamp = (\d+(?:\.\d+)?)\n", conll).groups() - if t: - return t[0] - else: - return False class SampleRoleService: diff --git a/app/utils/grew_utils.py b/app/utils/grew_utils.py index 4105a71..69009b2 100644 --- a/app/utils/grew_utils.py +++ b/app/utils/grew_utils.py @@ -4,9 +4,11 @@ import requests from flask import abort, current_app -from werkzeug.utils import secure_filename from app import grew_config - +import re +import io +import time +import zipfile def grew_request(fct_name, data={}, files={}): try: @@ -186,4 +188,102 @@ def try_package(project_id: str, package: str, sample_ids: List[str] ,passed_use "sample_ids": json.dumps(sample_ids) } reply = grew_request("tryPackage", data=data) - return reply \ No newline at end of file + return reply + + @staticmethod + def get_samples_with_string_contents(project_name: str, sample_names: List[str]): + samplecontentfiles = list() + for sample_name in sample_names: + reply = grew_request( + "getConll", + data={"project_id": project_name, "sample_id": sample_name}, + ) + if reply.get("status") == "OK": + + # {"sent_id_1":{"conlls":{"user_1":"conllstring"}}} + sample_tree = SampleExportService.servSampleTrees(reply.get("data", {})) + sample_tree_nots_noui = SampleExportService.servSampleTrees(reply.get("data", {}), timestamps=False, user_ids=False) + sample_content = SampleExportService.sampletree2contentfile(sample_tree_nots_noui) + for sent_id in sample_tree: + last = SampleExportService.get_last_user( + sample_tree[sent_id]["conlls"] + ) + sample_content["last"] = sample_content.get("last", []) + [ + sample_tree_nots_noui[sent_id]["conlls"][last] + ] + + # gluing back the trees + sample_content["last"] = "\n".join(sample_content.get("last", "")) + samplecontentfiles.append(sample_content) + + else: + print("Error: {}".format(reply.get("message"))) + return sample_names, samplecontentfiles + + + +# TODO : refactor this +def get_timestamp(conll): + t = re.search("# timestamp = (\d+(?:\.\d+)?)\n", conll).groups() + if t: + return t[0] + else: + return False + +# TODO : refactor this + +class SampleExportService: + @staticmethod + def servSampleTrees(samples, timestamps=True, user_ids=True): + """ get samples in form of json trees """ + trees = {} + for sentId, users in samples.items(): + for user_id, conll in users.items(): + # tree = conll3.ConllProcessor.sentence_conll_to_sentence_json(conll) + if sentId not in trees: + trees[sentId] = {"conlls": {}} + + # Adapt user_id or timestamps lines depending on options + if not user_ids: conll = re.sub("# user_id = .+\n", '', conll) + if not timestamps: conll = re.sub("# timestamp = .+\n", '', conll) + + trees[sentId]["conlls"][user_id] = conll + return trees + + @staticmethod + def sampletree2contentfile(tree): + if isinstance(tree, str): + tree = json.loads(tree) + usertrees = dict() + for sentId in tree.keys(): + for user, conll in tree[sentId]["conlls"].items(): + if user not in usertrees: + usertrees[user] = list() + usertrees[user].append(conll) + for user, content in usertrees.items(): + usertrees[user] = "\n".join(usertrees[user]) + return usertrees + + @staticmethod + def get_last_user(tree): + timestamps = [(user, get_timestamp(conll)) for (user, conll) in tree.items()] + if len(timestamps) == 1: + last = timestamps[0][0] + else: + # print(timestamps) + last = sorted(timestamps, key=lambda x: x[1])[-1][0] + # print(last) + return last + + @staticmethod + def contentfiles2zip(sample_names, sampletrees): + memory_file = io.BytesIO() + with zipfile.ZipFile(memory_file, "w") as zf: + for sample_name, sample in zip(sample_names, sampletrees): + for fuser, filecontent in sample.items(): + data = zipfile.ZipInfo("{}.{}.conllu".format(sample_name, fuser)) + data.date_time = time.localtime(time.time())[:6] + data.compress_type = zipfile.ZIP_DEFLATED + zf.writestr(data, filecontent) + memory_file.seek(0) + return memory_file \ No newline at end of file