diff --git a/pycorenlp/corenlp.py b/pycorenlp/corenlp.py index 6eb2175..c12e109 100644 --- a/pycorenlp/corenlp.py +++ b/pycorenlp/corenlp.py @@ -1,4 +1,4 @@ -import json, requests +import json, requests, sys class StanfordCoreNLP: @@ -8,7 +8,7 @@ def __init__(self, server_url): self.server_url = server_url def annotate(self, text, properties=None): - assert isinstance(text, str) + assert isinstance(text, str), "text parameter is not 'str'" if properties is None: properties = {} else: @@ -22,11 +22,14 @@ def annotate(self, text, properties=None): '$ cd stanford-corenlp-full-2015-12-09/ \n' '$ java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer') - data = text.encode() + # ensure proper encoding of python 3 strings + if sys.version_info.major >= 3: + text = text.encode('utf-8') + r = requests.post( self.server_url, params={ 'properties': str(properties) - }, data=data, headers={'Connection': 'close'}) + }, data=text, headers={'Connection': 'close'}) output = r.text if ('outputFormat' in properties and properties['outputFormat'] == 'json'):