From 63fee2a192ec089209958bcb41354101791e8647 Mon Sep 17 00:00:00 2001 From: khansadaoudi Date: Tue, 17 Dec 2024 17:11:59 +0100 Subject: [PATCH] add docstring for the most of services in the backend --- app/constructicon/interface.py | 1 + app/constructicon/model.py | 1 + app/constructicon/schema.py | 1 + app/constructicon/service.py | 34 +++ app/github/controller.py | 34 ++- app/github/model.py | 8 +- app/github/schema.py | 1 + app/github/service.py | 369 +++++++++++++++++++++++++--- app/grew/controller.py | 56 ++++- app/history/controller.py | 9 +- app/history/interface.py | 1 + app/history/model.py | 1 + app/history/schema.py | 2 +- app/history/service.py | 43 ++++ app/lexicon/controller.py | 18 +- app/parser/controller.py | 11 + app/shared/service.py | 8 + app/shared/service_test.py | 9 - app/stats/controller.py | 1 + app/stats/interface.py | 4 + app/tags/controller.py | 10 +- app/tags/model.py | 1 + app/tags/service.py | 9 +- app/trees/controller.py | 58 ++++- app/trees/service.py | 81 +++++- app/utils/arborator_parser_utils.py | 56 +++++ app/utils/grew_utils.py | 271 ++++++++++++++++++-- 27 files changed, 993 insertions(+), 105 deletions(-) delete mode 100644 app/shared/service_test.py diff --git a/app/constructicon/interface.py b/app/constructicon/interface.py index 324b783..78f5229 100644 --- a/app/constructicon/interface.py +++ b/app/constructicon/interface.py @@ -2,6 +2,7 @@ from mypy_extensions import TypedDict class ConstructiconInterfce(TypedDict, total=False): + """Typed constructicon interface""" id: str title: str description: str diff --git a/app/constructicon/model.py b/app/constructicon/model.py index facd180..13ccdde 100644 --- a/app/constructicon/model.py +++ b/app/constructicon/model.py @@ -6,6 +6,7 @@ from app.shared.model import BaseM class Constructicon(db.Model, BaseM): + """Constructicon representation in the db""" __tablename__ = 'constructicon' id = Column(String(256), primary_key=True) diff --git a/app/constructicon/schema.py b/app/constructicon/schema.py index 9e81d94..70d7826 100644 --- a/app/constructicon/schema.py +++ b/app/constructicon/schema.py @@ -2,6 +2,7 @@ class ConstructiconSchema(Schema): + """Constructicon schema shared with the db""" id = fields.String(attribute="id") title = fields.String(attribute="title") description = fields.String(attribute="description") diff --git a/app/constructicon/service.py b/app/constructicon/service.py index e0cdc6f..ec410fe 100644 --- a/app/constructicon/service.py +++ b/app/constructicon/service.py @@ -6,19 +6,48 @@ class ConstructiconService: @staticmethod def get_by_id(entry_id: str) -> Constructicon: + """Get constructicon entity by id + + Args: + entry_id (str) + + Returns: + Constructicon + """ return Constructicon.query.get(entry_id) @staticmethod def delete_by_id(entry_id: str): + """Delete constructicon by id + + Args: + entry_id (str) + """ Constructicon.query.filter_by(id=entry_id).delete() db.session.commit() @staticmethod def get_all_by_project_id(project_id): + """Get all constructicon entities related to an object + + Args: + project_id (int) + + Returns: + constructicon_list(List[Constructicon]) + """ return Constructicon.query.filter_by(project_id=project_id).all() @staticmethod def create(new_attrs) -> Constructicon: + """Create new constructicon entity + + Args: + new_attrs (dict) + + Returns: + Constructicon + """ new_constructicon_entry = Constructicon(**new_attrs) db.session.add(new_constructicon_entry) db.session.commit() @@ -26,6 +55,11 @@ def create(new_attrs) -> Constructicon: @staticmethod def create_or_update(new_attrs): + """Create or update constructicon entity + + Args: + new_attrs (dict) + """ entry_if_exists = ConstructiconService.get_by_id(new_attrs["id"]) if entry_if_exists: print("KK updating values in db") diff --git a/app/github/controller.py b/app/github/controller.py index cd38fdd..3ae721f 100644 --- a/app/github/controller.py +++ b/app/github/controller.py @@ -13,14 +13,23 @@ @api.route("//synchronize") class GithubSynchronizationResource(Resource): - + """Class contains endpoints that deals with the synchronization""" @responds(schema=GithubRepositorySchema, api=api) def get(self, project_name): + """Get the synchronized repository""" project = ProjectService.get_by_name(project_name) ProjectService.check_if_project_exist(project) return GithubRepositoryService.get_by_project_id(project.id) def post(self, project_name): + """Create synchronization + + Args: + project_name (str) + full_name(str): the name of the repository to be synchronized + branch_import(str): branch used for the import + branch_sync(str): branch to be used for the synchronization + """ data = request.get_json() full_name = data.get("fullName") branch_import = data.get("branchImport") @@ -35,20 +44,22 @@ def post(self, project_name): GithubRepositoryService.create(data) def delete(self, project_name): + """Delete synchronization""" project = ProjectService.get_by_name(project_name) GithubRepositoryService.delete_by_project_id(project.id) return { "status": "ok" } @api.route("/github") class UserGithubRepositories(Resource): - + """Class contains the endpoint to get user repositories""" def get(self): + """List user github repos""" github_access_token = UserService.get_by_id(current_user.id).github_access_token return GithubService.get_repositories(github_access_token) @api.route("/github/branch") class GithubRepositoryBranch(Resource): - + """class contains the endpoint to get the branch of specific repo""" def get(self): data = request.args full_name = data.get("full_name") @@ -57,12 +68,14 @@ def get(self): @api.route("//synchronize/commit") class GithubCommitResource(Resource): - + """Class contains endpoints related to commit""" def get(self, project_name): + """Get the number of changes to be committed""" project = ProjectService.get_by_name(project_name) return GithubCommitStatusService.get_changes_number(project.id) def post(self, project_name): + """Create and push a commit""" data = request.get_json() commit_message = data.get("commitMessage") project = ProjectService.get_by_name(project_name) @@ -75,20 +88,29 @@ def post(self, project_name): @api.route("//synchronize/pull") class GithubPullResource(Resource): - + """Class contains methods deals with the pulls""" def get(self, project_name): + """Check if there is changes to pull""" github_access_token = UserService.get_by_id(current_user.id).github_access_token return GithubWorkflowService.check_pull(github_access_token, project_name) def post(self, project_name): + """Pull changes""" GithubWorkflowService.pull_changes(project_name) LastAccessService.update_last_access_per_user_and_project(current_user.id, project_name, "write") return { "status": "ok" } @api.route("//synchronize/pull-request") class GithubPullRequestResource(Resource): - + """Class deals with pull requests""" def post(self,project_name): + """_summary_ + + Args: + project_name (str) + branch (str) + title (str) + """ data = request.get_json() branch = data.get("branch") title = data.get("title") diff --git a/app/github/model.py b/app/github/model.py index bba9fd6..f1c3374 100644 --- a/app/github/model.py +++ b/app/github/model.py @@ -3,12 +3,13 @@ class GithubRepository(db.Model): + """Object represents the synchronized github repository""" __tablename__ = "github_repositories" id = Column(Integer, primary_key=True) project_id = Column(Integer, db.ForeignKey("projects.id")) - user_id = Column(String(256), db.ForeignKey("users.id")) - repository_name = Column(String(256)) - branch = Column(String(256)) + user_id = Column(String(256), db.ForeignKey("users.id")) + repository_name = Column(String(256)) + branch = Column(String(256)) # github branch synchronized with AG project base_sha = Column(String(256)) # hash of the last commit of the synchronized branch def update(self, changes): @@ -18,6 +19,7 @@ def update(self, changes): class GithubCommitStatus(db.Model): + """entity represents the number of changes that needs to be committed by sample""" __tablename__ = "commit_status" id = Column(Integer, primary_key=True) sample_name = Column(String(256), nullable=False) diff --git a/app/github/schema.py b/app/github/schema.py index 73ff486..542ac26 100644 --- a/app/github/schema.py +++ b/app/github/schema.py @@ -2,6 +2,7 @@ class GithubRepositorySchema(Schema): + """Schema of the synchronized github that will be shared with the frontend""" id = fields.Integer(attribute="id") projectId = fields.Integer(attribute="project_id") userId = fields.String(attribute="user_id") diff --git a/app/github/service.py b/app/github/service.py index da47b0a..fc169c0 100644 --- a/app/github/service.py +++ b/app/github/service.py @@ -1,6 +1,5 @@ import os import requests -import base64 import json import re import shutil @@ -25,19 +24,35 @@ USERNAME = 'validated' CONLL = '.conllu' class GithubRepositoryService: - + """Class contains the methods that deal with GithubRepository entity """ @staticmethod def get_by_project_id(project_id): + """Get GithubRepository entity by project_id + + Args: + project_id (int) + + Returns: + GithubRepository + """ return GithubRepository.query.filter(GithubRepository.project_id == project_id).first() @staticmethod def create(new_attrs): + """Create new GithubRepository entity""" github_repository = GithubRepository(**new_attrs) db.session.add(github_repository) db.session.commit() @staticmethod def update_sha(project_id, sha): + """ + Every commit or pull the value of sha (last commit hash) is changed + and for that we need to update the sha of the synchronized repo + Args: + - project_id(int) + - sha(str) + """ github_repository = GithubRepository.query.filter_by(project_id=project_id).first() if github_repository: github_repository.base_sha = sha @@ -45,14 +60,28 @@ def update_sha(project_id, sha): @staticmethod def delete_by_project_id(project_id): + """Delete synchronized github repository by the project id + + Args: + project_id (int) + """ github_repository = GithubRepository.query.filter_by(project_id=project_id).first() db.session.delete(github_repository) db.session.commit() class GithubCommitStatusService: - + """Class that deals with commit status (changes number by sample)""" @staticmethod def create(project_id, sample_name): + """Create new CommitStatus entity + + Args: + project_id (int) + sample_name (str) + + Returns: + new GithubCommitStatus + """ new_attrs = { "project_id": project_id, "sample_name": sample_name, @@ -65,6 +94,12 @@ def create(project_id, sample_name): @staticmethod def update_changes(project_id, sample_name): + """Every time the data changed the number of changes in the GithubCommitStatus is incremented + + Args: + project_id (int) + sample_name (str) + """ github_commit_status = GithubCommitStatus.query.filter_by(project_id=project_id, sample_name=sample_name).first() if github_commit_status: github_commit_status.update({"changes_number": github_commit_status.changes_number + 1}) @@ -72,17 +107,41 @@ def update_changes(project_id, sample_name): @staticmethod def get_modified_samples(project_id) -> List[str]: + """ + In order to commit only the modified samples we use this method + to get samples that have changes_number > 0 + + Args: + project_id (id) + + Returns: + List[str]: list of sample_names + """ modified_samples = GithubCommitStatus.query.filter(GithubCommitStatus.project_id == project_id).filter(GithubCommitStatus.changes_number > 0) return [modified_sample.sample_name for modified_sample in modified_samples] @staticmethod def get_changes_number(project_id): + """Get the total number of changes done in the project + + Args: + project_id (int) + + Returns: + changes_number(int) + """ modified_samples = GithubCommitStatus.query.filter(GithubCommitStatus.project_id == project_id).filter(GithubCommitStatus.changes_number > 0) return sum(modified_sample.changes_number for modified_sample in modified_samples) @staticmethod def reset_samples(project_id, modified_samples): + """ + After a commit all the changes number in the modified samples will be reset to 0 + Args: + project_id(int) + modified_samples(List[str]) + """ for sample_name in modified_samples: github_commit_status = GithubCommitStatus.query.filter_by(project_id=project_id, sample_name=sample_name).first() github_commit_status.changes_number = 0 @@ -91,27 +150,45 @@ def reset_samples(project_id, modified_samples): @staticmethod def delete(project_id, sample_name): + """ + Delete commit status for specific sample + Args: + project_id(int) + sample_name(str) + """ github_commit_status = GithubCommitStatus.query.filter_by(project_id=project_id, sample_name=sample_name).first() if github_commit_status: db.session.delete(github_commit_status) db.session.commit() class GithubService: - + """ + This class concerns all methods that deal with github API + Here is the link of the documentation of the endpoints used in the following methods + https://docs.github.com/en/rest/git + """ @staticmethod def base_header(access_token): + """Base header is key-value pair that is used to send requests to github api + + Args: + access_token (str): access token generated after loggin with github + + Returns: + authorization dict(str, str) + """ return {"Authorization": "bearer " + access_token} - @staticmethod - def get_user_information(access_token): - url = "https://api.github.com/user" - headers = GithubService.base_header(access_token) - response = requests.get(url, headers=headers) - data = response.json() - return data - @staticmethod def get_user_email(access_token) -> str: + """Afte + + Args: + access_token (_type_): _description_ + + Returns: + str: _description_ + """ url = "https://api.github.com/user/emails" headers = GithubService.base_header(access_token) response = requests.get(url, headers=headers) @@ -120,6 +197,15 @@ def get_user_email(access_token) -> str: @staticmethod def get_repositories(access_token): + """ + List user repositories, the repositories are paginated + 100 repos per page + + Args: + access_token (str) + Returns: + list_repos: list of {"name": str, "owner_name": str, "owner_avatar": str} + """ repositories = [] data = [] url = "https://api.github.com/user/repos?per_page=100" @@ -143,6 +229,15 @@ def get_repositories(access_token): @staticmethod def list_repository_branches(access_token, full_name) -> List[str]: + """List of repository branches, without dependbot branches + + Args: + access_token (str) + full_name (str): full_name is "github_username/repository_name" + + Returns: + List[str]: list of branches + """ url = "https://api.github.com/repos/{}/branches".format(full_name) headers = GithubService.base_header(access_token) response = requests.get(url, headers=headers ) @@ -151,6 +246,16 @@ def list_repository_branches(access_token, full_name) -> List[str]: @staticmethod def get_repository_files_of_branch(access_token, full_name, branch): + """Get list of files of specific repo in specific branch + + Args: + access_token (str) + full_name (str) + branch (str) + + Returns: + list_files(List[files]) + """ url = "https://api.github.com/repos/{}/contents/?ref={}".format(full_name, branch) headers = GithubService.base_header(access_token) response = requests.get(url , headers=headers) @@ -159,22 +264,35 @@ def get_repository_files_of_branch(access_token, full_name, branch): @staticmethod def get_file_sha(access_token, full_name, file_path, branch): + """Get file sha hash of the last of commit of specific file + + Args: + access_token (str) + full_name (str) + file_path (str) + branch (_type_) + + Returns: + sha(str) + """ url = "https://api.github.com/repos/{}/contents/{}?ref={}".format(full_name, file_path, branch) headers = GithubService.base_header(access_token) response = requests.get(url, headers=headers) data = response.json() return data.get("sha") - @staticmethod - def get_default_branch(access_token, full_name): - url = "https://api.github.com/repos/{}".format(full_name) - headers = GithubService.base_header(access_token) - response = requests.get(url, headers=headers ) - data = response.json() - return data.get("default_branch") - @staticmethod def get_sha_base_tree(access_token, full_name, branch): + """Get the hash of the tree object of the github repo + + Args: + access_token (str) + full_name (str) + branch (str) + + Returns: + sha(str) + """ url = "https://api.github.com/repos/{}/git/refs/heads/{}".format(full_name, branch) headers = GithubService.base_header(access_token) response = requests.get(url, headers=headers) @@ -186,6 +304,16 @@ def get_sha_base_tree(access_token, full_name, branch): @staticmethod def create_blob_for_updated_file(access_token, full_name, content): + """Git blob is the object used to store the content of each file in a repository + + Args: + access_token (str) + full_name (str) + content (str) + + Returns: + blob_sha(str) + """ data = {"content": content, "encoding": "utf-8"} url = "https://api.github.com/repos/{}/git/blobs".format(full_name) headers = GithubService.base_header(access_token) @@ -195,6 +323,16 @@ def create_blob_for_updated_file(access_token, full_name, content): @staticmethod def download_github_repository(access_token, full_name, branch): + """Download a github repository in tmp.zip file + + Args: + access_token (str) + full_name (str) + branch (str) + + Returns: + path_file(str) + """ url = 'https://api.github.com/repos/{}/zipball/{}'.format(full_name, branch) headers = GithubService.base_header(access_token) response = requests.get(url, headers=headers, stream=True) @@ -205,15 +343,19 @@ def download_github_repository(access_token, full_name, branch): file.write(response.content) return path_file - @staticmethod - def delete_branch(access_token, full_name, base): - url = "https://api.github.com/repos/{}/git/refs/heads/{}".format(full_name, base) - headers = GithubService.base_header(access_token) - response = requests.delete(url, headers=headers) - return response - @staticmethod def create_new_branch_arborator(access_token, full_name, branch_to_create, default_branch): + """Create new branch to be synchronized with AG + + Args: + access_token (str) + full_name (str) + branch_to_create (str) + default_branch (str) + + Returns: + response + """ url = "https://api.github.com/repos/{}/git/refs".format(full_name) headers = GithubService.base_header(access_token) sha = GithubService.get_sha_base_tree(access_token, full_name, default_branch) @@ -226,6 +368,12 @@ def create_new_branch_arborator(access_token, full_name, branch_to_create, defau @staticmethod def extract_repository(file_path): + """ + extract the zip folder that compress github repository we extract files with specefic size + + Args: + file_path (str) + """ with zipfile.ZipFile(file_path, 'r') as zip_file: for file in zip_file.namelist(): filename = os.path.basename(file) @@ -241,12 +389,31 @@ def extract_repository(file_path): @staticmethod def check_large_file(file_path): + """check if file is large if it's bigger then 13MB + + Args: + file_path (str) + """ file_size = (os.stat(file_path).st_size)/(1024*1024) if file_size > 13: abort(413, "it contains a large file") @staticmethod def create_tree(access_token, full_name, updated_samples, project_name, base_tree): + """ + In order to commit changes we need to create a tree which that contains + blobs of modified files + + Args: + access_token (str) + full_name (str) + updated_samples (str) list of modified samples + project_name (str) + base_tree (str): the sha of an existing tree object which will be used as the base for the new tree + + Returns: + new_base_sha(str) + """ tree = [] sample_names, sample_content_files = GrewService.get_samples_with_string_contents(project_name, updated_samples) for sample_name, sample in zip(sample_names,sample_content_files): @@ -265,6 +432,18 @@ def create_tree(access_token, full_name, updated_samples, project_name, base_tre @staticmethod def create_commit(access_token, tree, parent, message, full_name): + """create a commit + + Args: + access_token (str) + tree (str) + parent (str): base_tree sha + message (str): commit message + full_name (str) + + Returns: + tree_sha(str): sha of the new tree + """ url = "https://api.github.com/repos/{}/git/commits".format(full_name) headers = GithubService.base_header(access_token) data = {"tree": tree, "parents": [parent], "message": message} @@ -274,6 +453,14 @@ def create_commit(access_token, tree, parent, message, full_name): @staticmethod def update_sha(access_token, full_name, branch, sha): + """_summary_ + + Args: + access_token (str) + full_name (str) + branch (str) + sha (str): new sha + """ url = "https://api.github.com/repos/{}/git/refs/heads/{}".format(full_name, branch) headers = GithubService.base_header(access_token) data = {"sha": sha} @@ -283,6 +470,17 @@ def update_sha(access_token, full_name, branch, sha): @staticmethod def compare_two_commits(access_token, full_name, previous_commit, new_commit): + """Compare between commits in order to get the modified to use it later for the pull + + Args: + access_token (str) + full_name (str) + previous_commit (str): sha of actual base tree in AG + new_commit (str): the new base tree sha + + Returns: + list_files: list of updated files {"filename": str, "status": 'modified' | 'added' | 'removed' } + """ url = 'https://api.github.com/repos/{}/compare/{}...{}'.format(full_name, previous_commit, new_commit) headers = GithubService.base_header(access_token) response = requests.get(url, headers=headers) @@ -292,6 +490,14 @@ def compare_two_commits(access_token, full_name, previous_commit, new_commit): @staticmethod def get_file_content_by_commit_sha(access_token, full_name, file_path, sha): + """Get content of a file in repo based on the commit sha + + Args: + access_token (str) + full_name (str) + file_path (str) + sha (str) + """ url = "https://api.github.com/repos/{}/contents/{}?ref={}".format(full_name, file_path, sha) headers = GithubService.base_header(access_token) @@ -301,6 +507,16 @@ def get_file_content_by_commit_sha(access_token, full_name, file_path, sha): @staticmethod def create_pull_request(access_token, full_name, username, arborator_branch, branch, title): + """ Create a pull request + + Args: + access_token (str) + full_name (str) + username (str) + arborator_branch (str) + branch (str) + title (str): the title of the pull request + """ url = "https://api.github.com/repos/{}/pulls".format(full_name) headers = GithubService.base_header(access_token) head = username + ":" + arborator_branch @@ -312,6 +528,14 @@ def create_pull_request(access_token, full_name, username, arborator_branch, bra @staticmethod def delete_file(access_token, full_name, file_path, branch): + """ Delete file + + Args: + access_token (str) + full_name (str) + file_path (str) + branch (str) + """ url = "https://api.github.com/repos/{}/contents/{}".format(full_name, file_path) headers = GithubService.base_header(access_token) sha = GithubService.get_file_sha(access_token, full_name, file_path, branch) @@ -325,7 +549,19 @@ class GithubWorkflowService: @staticmethod def import_files_from_github(full_name, project_name, branch, branch_syn): - + """Import files from github: + - Get repository files names of specific branch + - For non existing samples we create commit status for every new file + - In order to not download file by file we import directly repo in zip file + - We extract the zip file and create new samples from the extracted file + - Create new branch if user choosed to use branch dedicated for the sync + + Args: + full_name (str) + project_name (str) + branch (str): branch used for the import + branch_syn (str): branch used for the synchronization + """ project = ProjectService.get_by_name(project_name) access_token = UserService.get_by_id(current_user.id).github_access_token repository_files = GithubService.get_repository_files_of_branch(access_token, full_name, branch) @@ -347,6 +583,14 @@ def import_files_from_github(full_name, project_name, branch, branch_syn): @staticmethod def clone_github_repository(files, project_name): + """ + Clone github repository means create new samples from the files + of sync repo and create commit status for each sample + + Args: + files (List[str]) + project_name (str) + """ for file in files: path_file = os.path.join(Config.UPLOAD_FOLDER, file) sample_name = file.split(CONLL)[0] @@ -357,7 +601,13 @@ def clone_github_repository(files, project_name): @staticmethod def create_sample(sample_name, path_file, project_name): + """Create new sample + Args: + sample_name (str) + path_file (str) + project_name (str) + """ if not SampleService.check_sentences_without_sent_ids(path_file): SampleService.add_new_sent_ids(path_file, sample_name) @@ -375,6 +625,15 @@ def create_sample(sample_name, path_file, project_name): @staticmethod def commit_changes(updated_samples, project_name, message): + """Commit changes + Args: + updated_samples (List[str]):modified samples + project_name (int) + message (str): commit message + + Returns: + sha(str): new sha after the commit + """ access_token = UserService.get_by_id(current_user.id).github_access_token project = ProjectService.get_by_name(project_name) sync_repository = GithubRepositoryService.get_by_project_id(project.id) @@ -388,6 +647,15 @@ def commit_changes(updated_samples, project_name, message): @staticmethod def check_pull(access_token, project_name): + """Check if there is changes to pull, if the base_tree sha of AG is different from base_tree in Github + + Args: + access_token (str) + project_name (str) + + Returns: + boolean + """ project = ProjectService.get_by_name(project_name) sync_repository = GithubRepositoryService.get_by_project_id(project.id) base_tree = GithubService.get_sha_base_tree(access_token, sync_repository.repository_name, sync_repository.branch) @@ -395,7 +663,17 @@ def check_pull(access_token, project_name): @staticmethod def pull_changes(project_name): - + """Pull changes: + - compare between two commits + - get the modified files + - from every status: + - added: create new sample from new added file + - modified: pull changes + - removed: deleted the sample of the removed file from AG project + + Args: + project_name (str) + """ project = ProjectService.get_by_name(project_name) sync_repository = GithubRepositoryService.get_by_project_id(project.id) github_access_token = UserService.get_by_id(current_user.id).github_access_token @@ -416,7 +694,14 @@ def pull_changes(project_name): GithubRepositoryService.update_sha(project.id, base_tree) @staticmethod - def create_sample_from_github_file(file, download_url,project_name): + def create_sample_from_github_file(file, download_url, project_name): + """Create new sample after a pull using the download url + + Args: + file (str): sample name + download_url (str) + project_name (str) + """ project = ProjectService.get_by_name(project_name) sample_name, path_file = GithubWorkflowService.download_github_file_content(file, download_url) GithubWorkflowService.create_sample(sample_name, path_file, project_name) @@ -425,6 +710,15 @@ def create_sample_from_github_file(file, download_url,project_name): @staticmethod def download_github_file_content(file_name, download_url): + """Download modified file and save it in AG + + Args: + file_name (str) + download_url (str) + + Returns: + sample_name, path_file(Tuple(str, str)) + """ sample_name = file_name.split(CONLL)[0] raw_content = requests.get(download_url) path_file = os.path.join(Config.UPLOAD_FOLDER, file_name) @@ -434,7 +728,13 @@ def download_github_file_content(file_name, download_url): @staticmethod def pull_change_existing_sample(project_name, sample_name, download_url): - + """pull changes of an existing file + + Args: + project_name (str) + sample_name (str) + download_url (str) + """ content = requests.get(download_url).text file_name = sample_name + "_modified.conllu" path_file = os.path.join(Config.UPLOAD_FOLDER, file_name) @@ -464,6 +764,13 @@ def pull_change_existing_sample(project_name, sample_name, download_url): @staticmethod def delete_files_from_github(access_token, project_name, sample_names): + """delete files from github + + Args: + access_token (str) + project_name (str) + sample_names (List[str]) + """ for sample_name in sample_names: file_path = sample_name + CONLL project_id = ProjectService.get_by_name(project_name).id diff --git a/app/grew/controller.py b/app/grew/controller.py index 0d3d025..184baaa 100644 --- a/app/grew/controller.py +++ b/app/grew/controller.py @@ -19,7 +19,11 @@ @api.route("//apply-rule") class ApplyRuleResource(Resource): def post(self, project_name: str): + """Apply rule, save selected rewrite results + Args: + project_name (str) + """ project =ProjectService.get_by_name(project_name) ProjectService.check_if_freezed(project) @@ -52,9 +56,20 @@ def post(self, project_name: str): @api.route("//search") class SearchResource(Resource): - "Search" + def post(self, project_name: str): - + """ + Grew search + + Args: + project_name (str) + pattern (str) + user_type (str) + sample_ids (List[str] | []) + otherUser (str | "" ) + Returns: + trees + """ args = request.get_json() pattern = args.get("pattern") trees_type = args.get("userType") @@ -75,7 +90,18 @@ def post(self, project_name: str): class TryPackageResource(Resource): "rewrite" def post(self, project_name: str): - + """ + Grew rewrite + + Args: + project_name (str) + pattern (str) + user_type (str) + sample_ids (List[str] | []) + otherUser (str | "" ) + Returns: + trees + """ args = request.get_json() package = args.get("query") @@ -96,7 +122,17 @@ def post(self, project_name: str): @api.route("//relation-table") class RelationTableResource(Resource): def post(self, project_name): - + """ + Relation table + + Args: + project_name (str) + table_type (str): same as user_type + sample_ids (List[str] | []) + otherUser (str | "" ) + Returns: + trees + """ args = request.get_json() sample_ids = args.get("sampleIds") table_type = args.get("tableType") @@ -112,7 +148,17 @@ def post(self, project_name): class ExportGrewResultsResource(Resource): def post(self, project_name: str): - + """ + Export search results + + Args: + project_name (str) + users (List[str]) + search_results (tree type) + + Returns: + rsponse: zip file attachement + """ args = request.get_json() search_results = args.get("searchResults") users = args.get("users") diff --git a/app/history/controller.py b/app/history/controller.py index 7cd266b..da2ba0e 100644 --- a/app/history/controller.py +++ b/app/history/controller.py @@ -16,15 +16,17 @@ @api.route("//history") class HistoryResource(Resource): - + """Class contains endpoints of user search and rewrite history inside a specific project""" @responds(schema=GrewHistorySchema(many=True), api=api) def get(self, project_name): + """Get all user history""" project = ProjectService.get_by_name(project_name) return HistoryService.get_all_user_history(project.id) @accepts(schema=GrewHistorySchema, api=api) @responds(schema=GrewHistorySchema, api=api) def post(self, project_name): + """Create history entry""" project = ProjectService.get_by_name(project_name) data: GrewHistoryInterface = request.parsed_obj data["project_id"] = project.id @@ -33,6 +35,7 @@ def post(self, project_name): return new_history_record def delete(self, project_name): + """delete all history""" project = ProjectService.get_by_name(project_name) HistoryService.delete_all_history(project.id) return { "status": "ok" } @@ -40,9 +43,10 @@ def delete(self, project_name): @api.route("//history/") class HistoryRecordResource(Resource): - + """Class contains endpoints for specific history entry""" @responds(schema=GrewHistorySchema, api=api) def put(self, project_name, history_uuid): + """Update history entry (favorite)""" changes: GrewHistoryInterface = request.get_json() project = ProjectService.get_by_name(project_name) history_record = HistoryService.get_by_uuid(project.id, history_uuid) @@ -50,6 +54,7 @@ def put(self, project_name, history_uuid): return updated_record def delete(self, project_name, history_uuid): + """Delete history entry""" project = ProjectService.get_by_name(project_name) history_record = HistoryService.get_by_uuid(project.id, history_uuid) HistoryService.delete_by_id(history_record.id) diff --git a/app/history/interface.py b/app/history/interface.py index f1d2bea..ba6f95e 100644 --- a/app/history/interface.py +++ b/app/history/interface.py @@ -1,6 +1,7 @@ from mypy_extensions import TypedDict class GrewHistoryInterface(TypedDict, total=False): + """Typed interface for grew history""" id: int uuid: str request: str diff --git a/app/history/model.py b/app/history/model.py index 13cf122..8b5e5b5 100644 --- a/app/history/model.py +++ b/app/history/model.py @@ -4,6 +4,7 @@ from app.shared.model import BaseM class History(db.Model, BaseM): + """History representation in the db""" __tablename__ = "history" id = Column(Integer, primary_key=True) diff --git a/app/history/schema.py b/app/history/schema.py index 2a80ee1..b133739 100644 --- a/app/history/schema.py +++ b/app/history/schema.py @@ -1,7 +1,7 @@ from marshmallow import fields, Schema class GrewHistorySchema(Schema): - + """HIstory schema used to communicated with the frontend""" id = fields.Integer(attribute="id") uuid = fields.String(attribute="uuid") request = fields.String(attribute="request") diff --git a/app/history/service.py b/app/history/service.py index 9ed6685..881a4e7 100644 --- a/app/history/service.py +++ b/app/history/service.py @@ -10,14 +10,38 @@ class HistoryService: @staticmethod def get_all_user_history(project_id) -> List[History]: + """Get all user history + + Args: + project_id (int) + + Returns: + List[History] + """ return History.query.filter_by(project_id=project_id, user_id=current_user.id).all() @staticmethod def get_by_uuid(project_id, uuid) -> History: + """Get specific history entry using uuid + + Args: + project_id (int) + uuid (str) + + Returns: + History + """ return History.query.filter_by(project_id=project_id, uuid= uuid).first() @staticmethod def create(new_attrs) -> History: + """Create new user entry + + Args: + new_attrs (dict) + Returns: + History + """ new_history_record = History(**new_attrs) db.session.add(new_history_record) db.session.commit() @@ -25,16 +49,35 @@ def create(new_attrs) -> History: @staticmethod def delete_by_id(record_id): + """Delete history entry by id + + Args: + record_id (int) + """ History.query.filter_by(id=record_id).delete() db.session.commit() @staticmethod def delete_all_history(project_id): + """ + Delete all user history + Args: + project_id (int) + """ History.query.filter_by(project_id=project_id, user_id=current_user.id).delete() db.session.commit() @staticmethod def update(history_record: History, new_attrs) -> History: + """Update history record + + Args: + history_record (History) + new_attrs (dict(HistoryInterface)) + + Returns: + History + """ if history_record: history_record.update(new_attrs) db.session.commit() diff --git a/app/lexicon/controller.py b/app/lexicon/controller.py index 8b7aa6b..99307f1 100644 --- a/app/lexicon/controller.py +++ b/app/lexicon/controller.py @@ -14,9 +14,20 @@ @api.route("//lexicon") class LexiconResource(Resource): "Lexicon" - def post(self, project_name: str): - + """Generate lexicon + + Args: + project_name (str) + sample_names (List[str] | []) + features (List[str]) + lexicon_type (str): Same as user_type in grew search and table_type in relation table + other_user (str) + prune (int): only the subset of depth = prune is reported as ambiguous strutures see grew server doc to understand more + + Returns: + lexicon + """ args = request.get_json() sample_ids = args.get("sampleNames") features = args.get("features") @@ -32,6 +43,7 @@ def post(self, project_name: str): class LexiconExportJson(Resource): def post(self): + """Export lexicon in json format""" args = request.get_json() lexicon = args.get("data") for element in lexicon: @@ -44,7 +56,7 @@ def post(self): @api.route("/lexicon/export-tsv") class LexiconExportTsv(Resource): def post(self): - + """Export lexicon as tsv format""" args = request.get_json() lexicon = args.get("data") diff --git a/app/parser/controller.py b/app/parser/controller.py index a7c5490..e5cafd3 100644 --- a/app/parser/controller.py +++ b/app/parser/controller.py @@ -17,6 +17,11 @@ @api.route("/list") class ParserModelsListResource(Resource): def get(self): + """Get list of available models in GPU server + + Returns: + { "status": "ok", "data": list_parser (List[parser]) } + """ print(" list/start request") response = ArboratorParserAPI.list() if response['status'] == 'failure': @@ -41,11 +46,13 @@ def get(self): @api.route("/list//") class ParserModelIdResource(Resource): def delete(self, project_name: str, model_id: str): + """Delete model from GPU server""" print(" list/model_id delete request", project_name, model_id) return ArboratorParserAPI.delete_model(project_name, model_id) class ParserTrainStart_ED(TypedDict): + """Typed interface to start train request""" project_name: str train_samples_names: List[str] train_user: str @@ -55,6 +62,7 @@ class ParserTrainStart_ED(TypedDict): @api.route("/train/start") class ParserTrainStartResource(Resource): def post(self): + """Start train request""" params: ParserTrainStart_ED = request.get_json(force=True) print(" train/start request :", params) project_name = params["project_name"] @@ -74,6 +82,7 @@ def post(self): @api.route("/train/status") class ParserTrainStatusResource(Resource): def post(self): + """Get training status this request is send every 10 seconds after starting the parsing""" params = request.get_json(force=True) print(" parser/info request :", params) @@ -92,6 +101,7 @@ def post(self): @api.route("/parse/start") class ParserParseStartResource(Resource): def post(self): + """Start parsing endpoint""" params = request.get_json(force=True) print(" parse/start request :", params) @@ -111,6 +121,7 @@ def post(self): @api.route("/parse/status") class ParserParseStatus(Resource): def post(self): + """Get parsing status, this request is also send every 10s after start parsing""" params = request.get_json(force=True) print(" parse/status request :", params) diff --git a/app/shared/service.py b/app/shared/service.py index fafc1a3..9b3b48c 100644 --- a/app/shared/service.py +++ b/app/shared/service.py @@ -4,6 +4,14 @@ class SharedService: @staticmethod def get_sendable_data(text_content): + """Send data in file format + + Args: + text_content (str) + + Returns: + sendable_data(File) + """ sendable_data = io.BytesIO() sendable_data.write(text_content.encode("utf-8")) sendable_data.seek(0) diff --git a/app/shared/service_test.py b/app/shared/service_test.py deleted file mode 100644 index b8da9ca..0000000 --- a/app/shared/service_test.py +++ /dev/null @@ -1,9 +0,0 @@ -from io import BytesIO -from .service import SharedService - -text_content_test = "this is a test content" - - -def test_get_sendable_data(): - sendable_data = SharedService.get_sendable_data(text_content_test) - assert type(sendable_data) == BytesIO \ No newline at end of file diff --git a/app/stats/controller.py b/app/stats/controller.py index 0f0c4cc..3669981 100644 --- a/app/stats/controller.py +++ b/app/stats/controller.py @@ -20,6 +20,7 @@ class StaticsProjectResource(Resource): @responds(schema=StatProjectSchema, api=api) def get(self, project_name): + """Get project statistics""" project_stats: StatProjectInterface = {} grew_projects = GrewService.get_projects() project = [project for project in grew_projects if project["name"] == project_name][0] diff --git a/app/stats/interface.py b/app/stats/interface.py index d0f3eff..ae1a7d7 100644 --- a/app/stats/interface.py +++ b/app/stats/interface.py @@ -2,19 +2,23 @@ from mypy_extensions import TypedDict class TopUserInterface(TypedDict, total=False): + """typed interface for top user info""" username: str trees_number: int user_avatar: str class LastReadAccessInterface(TypedDict, total=False): + """typed interface for last read access""" last_read: int last_read_username: str class LastWriteAccessInterface(TypedDict, total=False): + """typed interface for last write access""" last_write: int last_write_username: str class StatProjectInterface(TypedDict, total=False): + """typed interface for project statistics""" users: List[str] samples_number: int trees_number: int diff --git a/app/tags/controller.py b/app/tags/controller.py index 511a711..f469195 100644 --- a/app/tags/controller.py +++ b/app/tags/controller.py @@ -16,14 +16,14 @@ class TagsResource(Resource): def post(self, project_name, sample_name): - + """add new tags to the the tree""" data = request.get_json() tags = data.get("tags") tree = data.get("tree") return TagService.add_new_tags(project_name, sample_name, tags, tree) def put(self, project_name, sample_name): - + """Remove a tag from tree by updating tags metadata""" data = request.get_json() tag = data.get("tag") tree = data.get("tree") @@ -34,7 +34,7 @@ def put(self, project_name, sample_name): class UserTagsResource(Resource): def get(self, project_name, username): - + """Get user tags """ project = ProjectService.get_by_name(project_name) ProjectService.check_if_project_exist(project) user = UserService.get_by_username(username) @@ -43,7 +43,7 @@ def get(self, project_name, username): def post(self, project_name, username): - + """Create or add new user tag""" project = ProjectService.get_by_name(project_name) ProjectService.check_if_project_exist(project) data = request.get_json() @@ -59,7 +59,7 @@ def post(self, project_name, username): class UserTagValueResource(Resource): def delete(self, project_name, username, tag_value): - + """delete user tags""" project = ProjectService.get_by_name(project_name) ProjectService.check_if_project_exist(project) user = UserService.get_by_username(username) diff --git a/app/tags/model.py b/app/tags/model.py index 078c56f..84b592c 100644 --- a/app/tags/model.py +++ b/app/tags/model.py @@ -4,6 +4,7 @@ from app.shared.model import BaseM class UserTags(db.Model, BaseM): + """representation of user tags in the db""" __tablename__ = 'user_tags' id = Column(Integer, primary_key=True) user_id = Column(String(256), db.ForeignKey("users.id")) diff --git a/app/tags/service.py b/app/tags/service.py index ecf880b..d09c1b9 100644 --- a/app/tags/service.py +++ b/app/tags/service.py @@ -12,7 +12,7 @@ class TagService: @staticmethod def add_new_tags(project_name, sample_name, tags, conll): - + """Add new tag to the metadata tag # tag and save the graph""" tags_value = '' new_tags = ', '.join(tags) @@ -37,7 +37,7 @@ def add_new_tags(project_name, sample_name, tags, conll): @staticmethod def remove_tag(project_name, sample_name, tag, conll): - + """Remove the tag from the metadata # tags and save the graph""" sentence_json = sentenceConllToJson(conll) user_id = sentence_json["metaJson"]["user_id"] @@ -65,13 +65,15 @@ def remove_tag(project_name, sample_name, tag, conll): class UserTagsService: - + """Class deals with user tags entity""" @staticmethod def get_by_user_id(user_id) -> UserTags: + """Get json list of user tags""" return UserTags.query.filter(UserTags.user_id == user_id).first() @staticmethod def create_or_update(new_attrs) -> UserTags: + """Create or update new user tags entity in the db""" user_tags_entry = UserTagsService.get_by_user_id(new_attrs.get("user_id")) if user_tags_entry: existing_tags = user_tags_entry.tags @@ -86,6 +88,7 @@ def create_or_update(new_attrs) -> UserTags: @staticmethod def delete_tag(user_id, tag): + """Remove tag from tags list in user tags entity and update the entity""" user_tags_entry = UserTagsService.get_by_user_id(user_id) if user_tags_entry.tags: existing_tags = list(user_tags_entry.tags) diff --git a/app/trees/controller.py b/app/trees/controller.py index a0bc33a..fda6f9b 100644 --- a/app/trees/controller.py +++ b/app/trees/controller.py @@ -87,7 +87,18 @@ def get(self, project_name: str, sample_name: str): return data def post(self, project_name: str, sample_name: str): - + """ + Entrypoint to save new tree + Args: + project_name (str) + sample_name (str) + user_id (str) + conll (str) + update_commit (bool): if true we update changes number of the sample + sent_id (str) + Returns: + { "status": "success", "new_conll": with new changes to update frontend view} + """ args = request.get_json() user_id = args.get("userId") conll = args.get("conll") @@ -131,6 +142,7 @@ def post(self, project_name: str, sample_name: str): class UserTreesResource(Resource): def delete(self, project_name: str, sample_name: str, username: str): + """Remove trees of specific user """ data = {"project_id": project_name, "sample_id": sample_name, "sent_ids": "[]","user_id": username, } grew_request("eraseGraphs", data) LastAccessService.update_last_access_per_user_and_project(current_user.id, project_name, "write") @@ -139,7 +151,14 @@ def delete(self, project_name: str, sample_name: str, username: str): class ValidateSampleTrees(Resource): def post(self, project_name: str, sample_name: str): - + """ + Validate all sample + Args: + project_name (str) + sample_name (str) + Returns: + { message: {"user_1": {"sent_id": message, ....}}} + """ project = ProjectService.get_by_name(project_name) trees = GrewService.get_samples_with_string_contents(project_name, [sample_name])[1][0] mapped_languages = TreeValidationService.extract_ud_languages() @@ -160,7 +179,13 @@ def post(self, project_name: str, sample_name: str): class ValidateTree(Resource): def post(self, project_name: str): - + """ + Validate a specific tree when saving it + Args: + conll (str) + Returns: + { "message": "validation message", "passed": True | False } + """ args = request.get_json() data = args.get("conll") + '\n\n' @@ -187,6 +212,13 @@ def post(self, project_name: str): class SaveAllTreesResource(Resource): def post(self, project_name: str, sample_name: str): + """Save all trees of sample + + Args: + project_name (str) + sample_name (str) + conllGraph (str) + """ data = request.get_json() file_name = sample_name + "_save_all.conllu" @@ -204,14 +236,22 @@ def post(self, project_name: str, sample_name: str): class SplitTreeResource(Resource): def post(self, project_name: str, sample_name: str): - + """Save splitted sentences, insert new sentences and erase the last sentences + + Args: + project_name (str) + sample_name (str) + sent_id (str) + firstSents ({ "user_id1": sentence_json, ....}) + secondSents ({ "user_id1": sentence_json }) + """ project = ProjectService.get_by_name(project_name) data = request.get_json() sent_id = data.get("sentId") inserted_sentences = [] inserted_sentences.append(data.get("firstSents")) inserted_sentences.append(data.get("secondSents")) - + print(inserted_sentences) TreeSegmentationService.insert_new_sentences(project_name, sample_name, sent_id, inserted_sentences) GrewService.erase_sentence(project_name, sample_name, sent_id) @@ -223,7 +263,15 @@ def post(self, project_name: str, sample_name: str): class MergeTreesResource(Resource): def post(self, project_name: str, sample_name: str): + """Save merged sentences, insert new sentence and erase the two merged sentences + Args: + project_name (str) + sample_name (str) + sent_id (str) + firstSents ({ "user_id1": sentence_json, ....}) + secondSents ({ "user_id1": sentence_json }) + """ project = ProjectService.get_by_name(project_name) data = request.get_json() first_sent_id = data.get("firstSentId") diff --git a/app/trees/service.py b/app/trees/service.py index cc4bd03..90fa0e0 100644 --- a/app/trees/service.py +++ b/app/trees/service.py @@ -12,9 +12,17 @@ VALIDATED = "validated" class TreeService: - + """this class contains all methods that deal with trees""" @staticmethod def check_cycle(conll): + """check if there is a cycle in the graph + + Args: + conll (str): user tree + + Returns: + cycle_nodes (List[Tuple(node1, node2)]) + """ sentence_json = sentenceConllToJson(conll) nodes_json = sentence_json['treeJson']['nodesJson'] @@ -35,10 +43,17 @@ def check_cycle(conll): return list(set(tuple(sorted(nodes_tuple)) for nodes_tuple in cycle_nodes)) @staticmethod - def samples_to_trees(samples, sample_name): - """ transforms a list of samples into a trees object """ + def samples_to_trees(sample_trees, sample_name): + """ + transforms a list of samples into a trees object + Args: + samples_trees (grew_sample_trees) + sample_name (str) + Returns: + {"sent_id": {"sample_name": "", "sentence": "", "sent_id": "", "conlls": {"user_id1": ""}, "matches": {}}} + """ trees = {} - for sent_id, users in samples.items(): + for sent_id, users in sample_trees.items(): for user_id, conll in users.items(): sentence_json = sentenceConllToJson(conll) if 'text' in sentence_json["metaJson"].keys(): @@ -58,6 +73,13 @@ def samples_to_trees(samples, sample_name): @staticmethod def add_base_tree(trees): + """ + for blind annotation mode we add base tree in trees object + Args: + trees (trees_object): {"sent_id": {"sample_name": "", "sentence": "", "sent_id": "", "conlls": {"user_id1": ""}, "matches": {}}} + Returns: + trees (trees_object) + """ for sent_trees in trees.values(): sent_conlls = sent_trees["conlls"] list_users = list(sent_conlls.keys()) @@ -70,6 +92,15 @@ def add_base_tree(trees): @staticmethod def add_user_tree(trees, username): + """Add user tree in blind annotation mode all users start with empty base tree + + Args: + trees (tree_object) + username (str) + + Returns: + trees + """ for sent_trees in trees.values(): sent_conlls = sent_trees["conlls"] list_users = list(sent_conlls.keys()) @@ -79,6 +110,15 @@ def add_user_tree(trees, username): @staticmethod def restrict_trees(trees, restricted_users): + """Remove all users trees that are ont in restricted_users list + + Args: + trees (trees_object) + restricted_users (List[str]): list of username + + Returns: + trees + """ for sent_trees in trees.values(): sent_conlls = sent_trees["conlls"] for user_id in list(sent_conlls.keys()): @@ -88,7 +128,17 @@ def restrict_trees(trees, restricted_users): @staticmethod def update_sentence_trees_with_new_sent_id(project_name, sample_name, old_sent_id, new_sent_id): - + """ + This function is used when we update sent_id of sentence, + since saveGraph uses sent_id so we can't use it in our case + so we use saveConll instead and use change all the trees of a sentence + + Args: + project_name (str) + sample_name (str) + old_sent_id (str) + new_sent_id (str) + """ response = grew_request('getConll', { "project_id": project_name, "sample_id": sample_name, @@ -115,9 +165,17 @@ def update_sentence_trees_with_new_sent_id(project_name, sample_name, old_sent_i os.remove(path_file) class TreeSegmentationService: - + """this class used for tree segmentation feature""" @staticmethod def insert_new_sentences(project_name: str, sample_name, sent_id: str, inserted_sentences): + """Insert new sentences in specific position, this function is used for sentence split and merge + + Args: + project_name (str) + sample_name (str) + sent_id (str) + inserted_sentences (sentenceJson) + """ conll_to_insert = '' for sentences in inserted_sentences: for sentence_json in sentences.values(): @@ -135,9 +193,10 @@ def insert_new_sentences(project_name: str, sample_name, sent_id: str, inserted_ class TreeValidationService: - + """This class deals with trees validation features""" @staticmethod def extract_ud_languages(): + """extract ud languages list""" html_text = requests.get('https://quest.ms.mff.cuni.cz/udvalidator/cgi-bin/unidep/langspec/specify_feature.pl').text soup = BeautifulSoup(html_text, features="lxml") @@ -150,6 +209,14 @@ def extract_ud_languages(): @staticmethod def parse_validation_results(message): + """Parse validation result message + + Args: + message (str) + + Returns: + error_messages ({"sent_id": "message" }) + """ error_messages = {} messages = message.split("---") if len(messages) > 1: diff --git a/app/utils/arborator_parser_utils.py b/app/utils/arborator_parser_utils.py index 964968c..f22ed6c 100644 --- a/app/utils/arborator_parser_utils.py +++ b/app/utils/arborator_parser_utils.py @@ -13,6 +13,14 @@ class ModelInfo_t(TypedDict): class ArboratorParserAPI: @staticmethod def send_get_request(url_suffix: str): + """send get request to the parser GPU server + + Args: + url_suffix (str) + + Returns: + response + """ url = f"{parser_config.server}/parser/models{url_suffix}" try: reply = requests.get(url, timeout=10) @@ -30,6 +38,15 @@ def send_get_request(url_suffix: str): @staticmethod def send_post_request(url_suffix: str, data: Dict): + """Send post request + + Args: + url_suffix (str) + data (Dict) + + Returns: + response + """ url = f"{parser_config.server}/parser/models{url_suffix}" try: reply = requests.post(url, json=data, timeout=10) @@ -54,6 +71,13 @@ def send_post_request(url_suffix: str, data: Dict): @staticmethod def send_delete_request(url_suffix: str): + """Send delete request to GPU server + + Args: + url_suffix (str) + Returns: + response + """ url = f"{parser_config.server}/parser/models{url_suffix}" try: reply = requests.delete(url, timeout=10) @@ -67,14 +91,27 @@ def send_delete_request(url_suffix: str): @staticmethod def list(): + """Get the list of the parsers in the GPU server""" return ArboratorParserAPI.send_get_request("/list") @staticmethod def delete_model(project_name: str, model_id: str): + """Delete specific model of specific project""" return ArboratorParserAPI.send_delete_request("/list/{}/{}".format(project_name, model_id)) @staticmethod def train_start(project_name: str, train_samples: Dict[str, str], max_epoch: int, base_model: Union[ModelInfo_t, None]): + """start training + + Args: + project_name (str) + train_samples (Dict[str, str]) + max_epoch (int) + base_model (Union[ModelInfo_t, None]) + + Returns: + post_response + """ data = { "project_name": project_name, "train_samples": train_samples, @@ -85,6 +122,15 @@ def train_start(project_name: str, train_samples: Dict[str, str], max_epoch: int @staticmethod def train_status(model_info: ModelInfo_t, train_task_id: str): + """Send train status request + + Args: + model_info (ModelInfo_t): _description_ + train_task_id (str): _description_ + + Returns: + post_response + """ data = { "model_info": model_info, "train_task_id": train_task_id, @@ -93,6 +139,16 @@ def train_status(model_info: ModelInfo_t, train_task_id: str): @staticmethod def parse_start(model_info: ModelInfo_t, to_parse_samples: Dict[str, str], parsing_settings: Dict[str, str]): + """send start parsing request + + Args: + model_info (ModelInfo_t) + to_parse_samples (Dict[str, str]) + parsing_settings (Dict[str, str]) + + Returns: + post_response + """ data = { "model_info": model_info, "to_parse_samples": to_parse_samples, diff --git a/app/utils/grew_utils.py b/app/utils/grew_utils.py index dc86cae..f9ebb20 100644 --- a/app/utils/grew_utils.py +++ b/app/utils/grew_utils.py @@ -18,6 +18,16 @@ def grew_request(fct_name, data={}, files={}): + """Send grew request + + Args: + fct_name (str) + data (dict, optional) + files (dict, optional) + + Returns: + grew_response ({"status": "", "data": ..., "messages": ... }) + """ try: response = requests.post("%s/%s" % (grew_config.server, fct_name), files=files, data=data) @@ -59,8 +69,18 @@ class GrewProjectInterface(TypedDict): class GrewService: + """Class for grew request functions """ @staticmethod def get_sample_trees(project_name, sample_name) -> Dict[str, Dict[str, str]]: + """Get sample trees + + Args: + project_name (str) + sample_name (str) + + Returns: + Dict[str, Dict[str, str]] + """ response = grew_request( "getConll", data={"project_id": project_name, "sample_id": sample_name}, @@ -70,12 +90,25 @@ def get_sample_trees(project_name, sample_name) -> Dict[str, Dict[str, str]]: @staticmethod def get_projects(): + """Get list of projects stored in grew server + + Returns: + grew_projects(List(GrewProjectInterface)) + """ reply = grew_request("getProjects") grew_projects: List[GrewProjectInterface] = reply.get("data", []) return grew_projects @staticmethod def get_user_projects(username): + """Get list of user projects (grew projects where user has saved tree under their name) + + Args: + username (str) + + Returns: + user_grew_projects (List[GrewProjectInterface]) + """ response = grew_request("getUserProjects", data={ "user_id": username }) user_grew_projects: List[GrewProjectInterface] = response.get("data") return user_grew_projects @@ -83,6 +116,11 @@ def get_user_projects(username): @staticmethod def create_project(project_id: str): + """create new project in grew server + + Args: + project_id (str) + """ grew_request( "newProject", data={"project_id": project_id}, @@ -90,10 +128,21 @@ def create_project(project_id: str): @staticmethod def delete_project(project_id: str): + """Delete project from grew server + + Args: + project_id (str) + """ grew_request("eraseProject", data={"project_id": project_id}) @staticmethod - def rename_project(project_name: str, new_project_name): + def rename_project(project_name: str, new_project_name: str): + """Rename existing project + + Args: + project_name (str) + new_project_name (str) + """ grew_request("renameProject", data= { "project_id": project_name, "new_project_id": new_project_name @@ -101,6 +150,14 @@ def rename_project(project_name: str, new_project_name): @staticmethod def get_conll_schema(project_id: str): + """Get conll config schema from grew server + + Args: + project_id (str) + + Returns: + conll_schema ({"annotationFeatures": json dict}) + """ grew_reply = grew_request("getProjectConfig", data={"project_id": project_id}) data = grew_reply.get("data") @@ -116,6 +173,12 @@ def get_conll_schema(project_id: str): @staticmethod def update_project_config(project_id, dumped_project_config) -> None: + """update project schema + + Args: + project_id (str) + dumped_project_config (dict) + """ grew_request( "updateProjectConfig", data={ @@ -126,6 +189,14 @@ def update_project_config(project_id, dumped_project_config) -> None: @staticmethod def get_samples(project_id : str): + """Get samples from grew server + + Args: + project_id (str) + + Returns: + grew_samples (List[grew_sample]) + """ reply = grew_request( "getSamples", data={"project_id": project_id} ) @@ -135,6 +206,12 @@ def get_samples(project_id : str): @staticmethod def create_samples(project_id: str, sample_ids: List[str]): + """Create samples + + Args: + project_id (str) + sample_ids (List[str]): sample_ids can also contain only one sample_name + """ reply = grew_request( "newSamples", data={"project_id": project_id, "sample_ids": json.dumps(sample_ids)}, @@ -144,6 +221,13 @@ def create_samples(project_id: str, sample_ids: List[str]): @staticmethod def save_sample(project_id: str, sample_id: str, conll_file) -> None: + """Save sample content in grew + + Args: + project_id (str) + sample_id (str) + conll_file (File) + """ grew_request( "saveConll", data={"project_id": project_id, "sample_id": sample_id}, @@ -152,6 +236,12 @@ def save_sample(project_id: str, sample_id: str, conll_file) -> None: @staticmethod def delete_samples(project_id: str, sample_ids: List[str]) -> None: + """delete sample of specific project + + Args: + project_id (str) + sample_ids (List[str]) + """ grew_request( "eraseSamples", data={"project_id": project_id, "sample_ids": json.dumps(sample_ids)}, @@ -159,7 +249,25 @@ def delete_samples(project_id: str, sample_ids: List[str]) -> None: @staticmethod def search_request_in_graphs(project_id: str, request: str, sample_ids: List[str], user_type: str, other_user: str): - + """Grew search + + Args: + project_id (str) + request (str) + sample_ids (List[str]) + user_type (str) + other_user (str) + + Returns: + grew_search results ("sent_id": { + 'sample_id':…, + 'sent_id':…, + 'conll':…, + 'user_id':…, + 'nodes':…, + 'edges':… + }) + """ user_ids = GrewService.get_user_ids(user_type, other_user) data = { "project_id": project_id, @@ -171,8 +279,17 @@ def search_request_in_graphs(project_id: str, request: str, sample_ids: List[str return reply @staticmethod - def try_package(project_id: str, package: str, sample_ids, user_type, other_user): - + def try_package(project_id: str, package: str, sample_ids: List[str], user_type: str, other_user: str): + """Search rewrite + + Args: + project_id (str)_ + package (str) + sample_ids (List[str]) + user_type (str) + other_user (str) + + """ user_ids = GrewService.get_user_ids(user_type, other_user) data = { "project_id": project_id, @@ -185,7 +302,22 @@ def try_package(project_id: str, package: str, sample_ids, user_type, other_user @staticmethod def get_relation_table(project_id: str, sample_ids, user_type, other_user): - + """_summary_ + + Args: + project_id (str) + sample_ids (List[str]) + user_type (str) + other_user (str) + + Returns: + relation_table: { + "root": { "_": { "ADJ": 1 } }, + "punct": { "ADP": { "PUNCT": 2 } }, + "mod": { "ADJ": { "ADV": 1 } }, + "comp:obj": { "ADP": { "ADJ": 1 } } + } + """ if not sample_ids: sample_ids = [] user_ids = GrewService.get_user_ids(user_type, other_user) @@ -200,7 +332,16 @@ def get_relation_table(project_id: str, sample_ids, user_type, other_user): @staticmethod def get_lexicon(project_name: str, sample_ids, user_type, other_user, prune, features): - + """Get lexicon + + Args: + project_name (str) + sample_ids (List[str]) + user_type (str) + other_user (str) + prune (int) + features (str) + """ user_ids = GrewService.get_user_ids(user_type, other_user) prune = (None, prune) [prune != 0] reply = grew_request( @@ -217,6 +358,18 @@ def get_lexicon(project_name: str, sample_ids, user_type, other_user, prune, fea @staticmethod def get_user_ids(user_type: str, other_user: str): + """ + This function is used in grew search, try package, + relation table and lexicon to get user_ids parameter + based on the user type and other user value + + Args: + user_type (str) + other_user (str) + + Returns: + dict + """ if user_type == 'user': user_ids = { "one": [current_user.username] } elif user_type == 'user_recent': @@ -235,6 +388,14 @@ def get_user_ids(user_type: str, other_user: str): @staticmethod def insert_conll(project_id: str, sample_id: str, pivot_sent_id: str, conll_file): + """Insert conll in specific position + + Args: + project_id (str) + sample_id (str) + pivot_sent_id (str) + conll_file (File): contains conlls strings to insert + """ data = { "project_id": project_id, "sample_id": sample_id, @@ -245,6 +406,13 @@ def insert_conll(project_id: str, sample_id: str, pivot_sent_id: str, conll_file @staticmethod def erase_sentence(project_id: str, sample_id: str, sent_id: str): + """erase sentence + + Args: + project_id (str) + sample_id (str) + sent_id (str) + """ data = { "project_id": project_id, "sample_id": sample_id, @@ -254,6 +422,15 @@ def erase_sentence(project_id: str, sample_id: str, sent_id: str): @staticmethod def extract_tagset(project_id, sample_ids, grew_funct): + """Extract configuration tags based on conll + Args: + project_id (str) + sample_ids (List[str]) + grew_funct (str): getPos | getRelations | getFeatures + + Returns: + _type_ + """ data = { "project_id": project_id, "sample_ids": json.dumps(sample_ids) @@ -263,7 +440,15 @@ def extract_tagset(project_id, sample_ids, grew_funct): @staticmethod def get_config_from_samples(project_name, sample_ids): - + """Get all tags sets from list pf samples + + Args: + project_name (str) + sample_ids (str) + + Returns: + post_list (List[str]), relation_list (List[str]), feat_list (List[str]), misc_list (List[str]) + """ pos_list = GrewService.extract_tagset(project_name, sample_ids, "getPOS") relation_list = GrewService.extract_tagset(project_name, sample_ids, "getRelations") features = GrewService.extract_tagset(project_name, sample_ids, "getFeatures") @@ -273,6 +458,15 @@ def get_config_from_samples(project_name, sample_ids): @staticmethod def get_samples_with_string_contents(project_name: str, sample_names: List[str]): + """Get string content od samples based on each user + + Args: + project_name (str) + sample_names (List[str]) + + Returns: + samples_names (List[str]), sample_content_files [{'user_id': content_string }] + """ sample_content_files = list() for sample_name in sample_names: reply = grew_request( @@ -302,6 +496,16 @@ def get_samples_with_string_contents(project_name: str, sample_names: List[str]) @staticmethod def get_samples_with_string_contents_as_dict(project_name: str, sample_names: List[str], user: str) -> Dict[str, str]: + """Same as previous function but just for specific user + + Args: + project_name (str) + sample_names (List[str]) + user (str) + + Returns: + Dict[str, str] + """ samples_dict_for_user: Dict[str, str] = {} for sample_name in sample_names: reply = grew_request( @@ -330,24 +534,6 @@ def get_samples_with_string_contents_as_dict(project_name: str, sample_names: Li print("Error: {}".format(reply.get("message"))) return samples_dict_for_user - @staticmethod - def get_validated_trees_filled_up_with_owner_trees(project_name: str, sample_name: str, username: str): - reply = grew_request( - "getConll", - data={"project_id": project_name, "sample_id": sample_name}, - ) - validated_trees = "" - if reply.get("status") == "OK": - sample_tree = SampleExportService.serve_sample_trees(reply.get("data", {})) - sample_tree_nots_noui = SampleExportService.serve_sample_trees(reply.get("data", {}), timestamps=False, user_ids=False) - for sent_id in sample_tree: - if "validated" in sample_tree[sent_id]["conlls"].keys(): - validated_trees += "".join(sample_tree_nots_noui[sent_id]["conlls"]["validated"]) - else: - validated_trees += "".join(sample_tree_nots_noui[sent_id]["conlls"][username]) - - return validated_trees - @staticmethod def format_trees_new(m, trees, is_package: bool = False): """ @@ -403,6 +589,14 @@ def format_trees_new(m, trees, is_package: bool = False): return trees def get_timestamp(conll): + """Get timestamp metadat from conll string + + Args: + conll (str) + + Returns: + timestamp (str) | False + """ t = re.search(r"# timestamp = (\d+(?:\.\d+)?)\n", conll) if t and t.groups(): return t.groups()[0] @@ -411,6 +605,7 @@ def get_timestamp(conll): class SampleExportService: + """Class contains sample export functions""" @staticmethod def serve_sample_trees(samples, timestamps=True, user_ids=True): """ get samples in form of json trees """ @@ -430,6 +625,14 @@ def serve_sample_trees(samples, timestamps=True, user_ids=True): @staticmethod def sample_tree_to_content_file(tree) -> Dict[str, str]: + """ + + Args: + tree (tree_object) + + Returns: + Dict[str, str] + """ if isinstance(tree, str): tree = json.loads(tree) usertrees: Dict[str, List[str]] = {} @@ -446,6 +649,14 @@ def sample_tree_to_content_file(tree) -> Dict[str, str]: @staticmethod def get_last_user(tree): + """Get username of most recent tree + + Args: + tree (dict(str, str)) + + Returns: + username (str) + """ timestamps = [(user, get_timestamp(conll)) for (user, conll) in tree.items()] if len(timestamps) == 1: last = timestamps[0][0] @@ -455,6 +666,16 @@ def get_last_user(tree): @staticmethod def content_files_to_zip(sample_names, sample_trees, users): + """convert files to export in zip file format + + Args: + sample_names (List[str]) + sample_trees (List[{username: conll_content}, ...]): _description_ + users (List[str]) + + Returns: + memory_file (File) + """ memory_file = io.BytesIO() with zipfile.ZipFile(memory_file, "w") as zf: for user in users: