Skip to content

Commit

Permalink
refactor: GrewService.get_samples_with_string_contents()
Browse files Browse the repository at this point in the history
  • Loading branch information
kirianguiller committed Oct 18, 2022
1 parent 0bfa13d commit 125b530
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 147 deletions.
4 changes: 1 addition & 3 deletions app/lexicon/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from app.projects.service import ProjectService
from app.user.service import UserService
from app.utils.grew_utils import GrewService, grew_request
from app.utils.grew_utils import GrewService, SampleExportService, grew_request
from flask import Response, abort, current_app, request
from flask_login import current_user
from flask_restx import Namespace, Resource, reqparse
Expand All @@ -13,7 +13,6 @@
from app.samples.service import (
SampleEvaluationService,
SampleExerciseLevelService,
SampleExportService,
SampleRoleService,
SampleUploadService,
)
Expand All @@ -39,7 +38,6 @@ def post(self, project_name: str):
"getLexicon",
data={"project_id": project_name, "sample_ids": json.dumps(sample_names)},
)
print("KK reply", reply)
for i in reply["data"]:
x = {"key": i["form"] + i["lemma"] + i["POS"] + i["features"] + i["gloss"]}
i.update(x)
Expand Down
74 changes: 6 additions & 68 deletions app/samples/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from app.projects.service import ProjectService
from app.user.service import UserService
from app.utils.grew_utils import GrewService, grew_request
from app.utils.grew_utils import GrewService, SampleExportService, grew_request
from app.config import Config
from flask import Response, abort, current_app, request, send_from_directory
from flask_restx import Namespace, Resource, reqparse
Expand All @@ -18,7 +18,6 @@
from .service import (
SampleEvaluationService,
SampleExerciseLevelService,
SampleExportService,
SampleRoleService,
SampleUploadService,
add_or_keep_timestamps,
Expand Down Expand Up @@ -169,38 +168,11 @@ def post(self, project_name: str):
data = request.get_json(force=True)
sample_names = data["samples"]
print("requested zip", sample_names, project_name)
sampletrees = list()
samplecontentfiles = list()

for sample_name in sample_names:
reply = grew_request(
"getConll",
data={"project_id": project_name, "sample_id": sample_name},
)
if reply.get("status") == "OK":

# {"sent_id_1":{"conlls":{"user_1":"conllstring"}}}
sample_tree = SampleExportService.servSampleTrees(reply.get("data", {}))
sample_tree_nots_noui = SampleExportService.servSampleTrees(reply.get("data", {}), timestamps=False, user_ids=False)
print("sample_tree", sample_tree)
sample_content = SampleExportService.sampletree2contentfile(sample_tree_nots_noui)
for sent_id in sample_tree:
last = SampleExportService.get_last_user(
sample_tree[sent_id]["conlls"]
)
sample_content["last"] = sample_content.get("last", []) + [
sample_tree_nots_noui[sent_id]["conlls"][last]
]

# gluing back the trees
sample_content["last"] = "\n".join(sample_content.get("last", ""))
samplecontentfiles.append(sample_content)

else:
print("Error: {}".format(reply.get("message")))

sample_names, samples_with_string_content = GrewService.get_samples_with_string_contents(project_name, sample_names)

memory_file = SampleExportService.contentfiles2zip(
sample_names, samplecontentfiles
sample_names, samples_with_string_content
)

resp = Response(
Expand Down Expand Up @@ -235,39 +207,6 @@ def get(self, project_name: str):
# reply = requests.get("http://127.0.0.1:8001/testBoot/")
return reply.text

def __getSamples(self, sample_names, project_name):
samplecontentfiles = list()

for sample_name in sample_names:
reply = grew_request(
"getConll",
data={"project_id": project_name, "sample_id": sample_name},
)
if reply.get("status") == "OK":
# {"sent_id_1":{"conlls":{"user_1":"conllstring"}}}
sample_tree = SampleExportService.servSampleTrees(reply.get("data", {}))
sample_tree_nots_noui = SampleExportService.servSampleTrees(reply.get("data", {}), timestamps=False, user_ids=False)
# print("sample_tree", sample_tree)
sample_content = SampleExportService.sampletree2contentfile(sample_tree_nots_noui)
for sent_id in sample_tree:
last = SampleExportService.get_last_user(
sample_tree[sent_id]["conlls"]
)
# print(sample_content.keys())
sample_content["last"] = sample_content.get("last", []) + [
sample_tree_nots_noui[sent_id]["conlls"][last]
]
# print(sample_content["last"])

# gluing back the trees
sample_content["last"] = "\n".join(sample_content["last"])
samplecontentfiles.append(sample_content["last"])

else:
print("Error: {}".format(reply.get("message")))

return [sample_names, samplecontentfiles]

def post(self, project_name: str):
param = request.get_json(force=True)

Expand All @@ -278,12 +217,11 @@ def post(self, project_name: str):
default_to_parse = [ n for n in all_sample_names if n not in train_samp_names ] if param['to_parse'] == 'ALL' else param['to_parse']

#get samples
train_name, train_set = self.__getSamples(train_samp_names, project_name)
train_name, train_set = GrewService.get_samples_with_string_contents(project_name, train_samp_names)
#TODO assure parse_name not empty
parse_name, to_parse = self.__getSamples(default_to_parse, project_name)
parse_name, to_parse = GrewService.get_samples_with_string_contents(project_name, default_to_parse)

# return to_parse

data = {
'project_name': project_name,
'train_name': train_name,
Expand Down
76 changes: 3 additions & 73 deletions app/samples/service.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
from typing import List
from app.utils.conllup import ConllProcessor
import io
import json
import os
import re
import time
import zipfile

from datetime import datetime

from app import db
from app.config import MAX_TOKENS, Config
from app.user.model import User
from app.utils.conll3 import conllFile2trees, trees2conllFile
from app.utils.grew_utils import GrewService, grew_request
from flask import abort, current_app
from sqlalchemy.sql.operators import startswith_op
from app.utils.grew_utils import GrewService
from flask import abort
from werkzeug.utils import secure_filename
from werkzeug.datastructures import FileStorage
from .model import SampleExerciseLevel, SampleRole
Expand Down Expand Up @@ -50,71 +45,6 @@ def upload(
GrewService.save_sample(project_name, sample_name, file_to_save)


# TODO : refactor this
class SampleExportService:
@staticmethod
def servSampleTrees(samples, timestamps=True, user_ids=True):
""" get samples in form of json trees """
trees = {}
for sentId, users in samples.items():
for user_id, conll in users.items():
# tree = conll3.ConllProcessor.sentence_conll_to_sentence_json(conll)
if sentId not in trees:
trees[sentId] = {"conlls": {}}

# Adapt user_id or timestamps lines depending on options
if not user_ids: conll = re.sub("# user_id = .+\n", '', conll)
if not timestamps: conll = re.sub("# timestamp = .+\n", '', conll)

trees[sentId]["conlls"][user_id] = conll
return trees

@staticmethod
def sampletree2contentfile(tree):
if isinstance(tree, str):
tree = json.loads(tree)
usertrees = dict()
for sentId in tree.keys():
for user, conll in tree[sentId]["conlls"].items():
if user not in usertrees:
usertrees[user] = list()
usertrees[user].append(conll)
for user, content in usertrees.items():
usertrees[user] = "\n".join(usertrees[user])
return usertrees

@staticmethod
def get_last_user(tree):
timestamps = [(user, get_timestamp(conll)) for (user, conll) in tree.items()]
if len(timestamps) == 1:
last = timestamps[0][0]
else:
# print(timestamps)
last = sorted(timestamps, key=lambda x: x[1])[-1][0]
# print(last)
return last

@staticmethod
def contentfiles2zip(sample_names, sampletrees):
memory_file = io.BytesIO()
with zipfile.ZipFile(memory_file, "w") as zf:
for sample_name, sample in zip(sample_names, sampletrees):
for fuser, filecontent in sample.items():
data = zipfile.ZipInfo("{}.{}.conllu".format(sample_name, fuser))
data.date_time = time.localtime(time.time())[:6]
data.compress_type = zipfile.ZIP_DEFLATED
zf.writestr(data, filecontent)
memory_file.seek(0)
return memory_file


# TODO : refactor this
def get_timestamp(conll):
t = re.search("# timestamp = (\d+(?:\.\d+)?)\n", conll).groups()
if t:
return t[0]
else:
return False


class SampleRoleService:
Expand Down
106 changes: 103 additions & 3 deletions app/utils/grew_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

import requests
from flask import abort, current_app
from werkzeug.utils import secure_filename
from app import grew_config

import re
import io
import time
import zipfile

def grew_request(fct_name, data={}, files={}):
try:
Expand Down Expand Up @@ -186,4 +188,102 @@ def try_package(project_id: str, package: str, sample_ids: List[str] ,passed_use
"sample_ids": json.dumps(sample_ids)
}
reply = grew_request("tryPackage", data=data)
return reply
return reply

@staticmethod
def get_samples_with_string_contents(project_name: str, sample_names: List[str]):
samplecontentfiles = list()
for sample_name in sample_names:
reply = grew_request(
"getConll",
data={"project_id": project_name, "sample_id": sample_name},
)
if reply.get("status") == "OK":

# {"sent_id_1":{"conlls":{"user_1":"conllstring"}}}
sample_tree = SampleExportService.servSampleTrees(reply.get("data", {}))
sample_tree_nots_noui = SampleExportService.servSampleTrees(reply.get("data", {}), timestamps=False, user_ids=False)
sample_content = SampleExportService.sampletree2contentfile(sample_tree_nots_noui)
for sent_id in sample_tree:
last = SampleExportService.get_last_user(
sample_tree[sent_id]["conlls"]
)
sample_content["last"] = sample_content.get("last", []) + [
sample_tree_nots_noui[sent_id]["conlls"][last]
]

# gluing back the trees
sample_content["last"] = "\n".join(sample_content.get("last", ""))
samplecontentfiles.append(sample_content)

else:
print("Error: {}".format(reply.get("message")))
return sample_names, samplecontentfiles



# TODO : refactor this
def get_timestamp(conll):
t = re.search("# timestamp = (\d+(?:\.\d+)?)\n", conll).groups()
if t:
return t[0]
else:
return False

# TODO : refactor this

class SampleExportService:
@staticmethod
def servSampleTrees(samples, timestamps=True, user_ids=True):
""" get samples in form of json trees """
trees = {}
for sentId, users in samples.items():
for user_id, conll in users.items():
# tree = conll3.ConllProcessor.sentence_conll_to_sentence_json(conll)
if sentId not in trees:
trees[sentId] = {"conlls": {}}

# Adapt user_id or timestamps lines depending on options
if not user_ids: conll = re.sub("# user_id = .+\n", '', conll)
if not timestamps: conll = re.sub("# timestamp = .+\n", '', conll)

trees[sentId]["conlls"][user_id] = conll
return trees

@staticmethod
def sampletree2contentfile(tree):
if isinstance(tree, str):
tree = json.loads(tree)
usertrees = dict()
for sentId in tree.keys():
for user, conll in tree[sentId]["conlls"].items():
if user not in usertrees:
usertrees[user] = list()
usertrees[user].append(conll)
for user, content in usertrees.items():
usertrees[user] = "\n".join(usertrees[user])
return usertrees

@staticmethod
def get_last_user(tree):
timestamps = [(user, get_timestamp(conll)) for (user, conll) in tree.items()]
if len(timestamps) == 1:
last = timestamps[0][0]
else:
# print(timestamps)
last = sorted(timestamps, key=lambda x: x[1])[-1][0]
# print(last)
return last

@staticmethod
def contentfiles2zip(sample_names, sampletrees):
memory_file = io.BytesIO()
with zipfile.ZipFile(memory_file, "w") as zf:
for sample_name, sample in zip(sample_names, sampletrees):
for fuser, filecontent in sample.items():
data = zipfile.ZipInfo("{}.{}.conllu".format(sample_name, fuser))
data.date_time = time.localtime(time.time())[:6]
data.compress_type = zipfile.ZIP_DEFLATED
zf.writestr(data, filecontent)
memory_file.seek(0)
return memory_file

0 comments on commit 125b530

Please sign in to comment.