Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unquote non-ascii file names during annotation process #1308

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions server/src/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from common import ProtocolError
from message import Messager

import urllib.parse

'''
Functionality related to the annotation file format.

Expand Down Expand Up @@ -364,7 +366,7 @@ def __init__(self, document, read_only=False):
input_files = self._select_input_files(document)

if not input_files:
with open('{}.{}'.format(document, JOINED_ANN_FILE_SUFF), 'w'):
with open('{}.{}'.format(urllib.parse.unquote(document), JOINED_ANN_FILE_SUFF), 'w'):
pass

input_files = self._select_input_files(document)
Expand Down Expand Up @@ -1192,7 +1194,7 @@ def _read_document_text(self, document):
# TODO: this is too naive; document may be e.g. "PMID.a1",
# in which case the reasonable text file name guess is
# "PMID.txt", not "PMID.a1.txt"
textfn = document + '.' + TEXT_FILE_SUFFIX
textfn = urllib.parse.unquote(document) + '.' + TEXT_FILE_SUFFIX
try:
with open_textfile(textfn, 'r') as f:
return f.read()
Expand Down
13 changes: 11 additions & 2 deletions server/src/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
from projectconfig import (ENTITY_CATEGORY, EVENT_CATEGORY, RELATION_CATEGORY,
UNKNOWN_CATEGORY, ProjectConfiguration)

import urllib.parse

try:
from config import DEBUG
except ImportError:
Expand Down Expand Up @@ -327,7 +329,7 @@ def __create_span(ann_obj, mods, type, offsets, txt_file_path,
# Get a new ID
new_id = ann_obj.get_new_id('T') # XXX: Cons
# Get the text span
with open_textfile(txt_file_path, 'r') as txt_file:
with open_textfile(urllib.parse.unquote(txt_file_path), 'r') as txt_file:
text = txt_file.read()
text_span = _text_for_offsets(text, offsets)

Expand Down Expand Up @@ -447,7 +449,7 @@ def create_span(collection, document, offsets, type, attributes=None,
# end) pairs; convert once at this interface
offsets = _json_offsets_to_list(offsets)

return _create_span(collection, document, offsets, type, attributes,
return _create_span(collection, urllib.parse.unquote(document), offsets, type, attributes,
normalizations, id, comment)


Expand Down Expand Up @@ -866,6 +868,8 @@ def reverse_arc(collection, document, origin, target, type, attributes=None):
real_dir = real_directory(directory)
# mods = ModificationTracker() # TODO
projectconf = ProjectConfiguration(real_dir)

document = urllib.parse.unquote(document)
document = path_join(real_dir, document)
with TextAnnotations(document) as ann_obj:
# bail as quick as possible if read-only
Expand Down Expand Up @@ -912,6 +916,7 @@ def create_arc(collection, document, origin, target, type, attributes=None,

projectconf = ProjectConfiguration(real_dir)

document = urllib.parse.unquote(document)
document = path_join(real_dir, document)

with TextAnnotations(document) as ann_obj:
Expand Down Expand Up @@ -1040,6 +1045,7 @@ def delete_arc(collection, document, origin, target, type):

projectconf = ProjectConfiguration(real_dir)

document = urllib.parse.unquote(document)
document = path_join(real_dir, document)

with TextAnnotations(document) as ann_obj:
Expand All @@ -1063,6 +1069,7 @@ def delete_span(collection, document, id):

real_dir = real_directory(directory)

document = urllib.parse.unquote(document)
document = path_join(real_dir, document)

with TextAnnotations(document) as ann_obj:
Expand Down Expand Up @@ -1116,6 +1123,7 @@ def split_span(collection, document, args, id):
directory = collection

real_dir = real_directory(directory)
document = urllib.parse.unquote(document)
document = path_join(real_dir, document)
# TODO don't know how to pass an array directly, so doing extra catenate
# and split
Expand Down Expand Up @@ -1246,6 +1254,7 @@ def split_span(collection, document, args, id):

def set_status(directory, document, status=None):
real_dir = real_directory(directory)
document = urllib.parse.unquote(document)

with TextAnnotations(path_join(real_dir, document)) as ann:
# Erase all old status annotations
Expand Down
3 changes: 2 additions & 1 deletion server/src/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
visual_options_get_arc_bundle,
visual_options_get_text_direction)
from stats import get_statistics
import urllib.parse


def _fill_type_configuration(
Expand Down Expand Up @@ -914,7 +915,7 @@ def _document_json_dict(document):
def get_document(collection, document):
directory = collection
real_dir = real_directory(directory)
doc_path = path_join(real_dir, document)
doc_path = path_join(real_dir, urllib.parse.unquote(document))
return _document_json_dict(doc_path)


Expand Down