Skip to content

Commit 046bde7

Browse files
committed
Add repr to python_2_unicode_compatible
Allow both __repr__ and __str__ to return unicode in both python 2 and 3.
1 parent a106543 commit 046bde7

File tree

5 files changed

+27
-13
lines changed

5 files changed

+27
-13
lines changed

chemdataextractor/doc/document.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import six
2424

25+
from ..utils import python_2_unicode_compatible
2526
from .text import Paragraph, Citation, Footnote, Heading, Title
2627
from .table import Table
2728
from .figure import Figure
@@ -33,7 +34,7 @@
3334
log = logging.getLogger(__name__)
3435

3536

36-
@six.python_2_unicode_compatible
37+
@python_2_unicode_compatible
3738
class BaseDocument(six.with_metaclass(ABCMeta, collections.Sequence)):
3839
"""Abstract base class for a Document."""
3940

chemdataextractor/doc/element.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818

1919
import six
2020

21+
from ..utils import python_2_unicode_compatible
2122

22-
@six.python_2_unicode_compatible
23+
24+
@python_2_unicode_compatible
2325
class BaseElement(six.with_metaclass(ABCMeta)):
2426
"""Abstract base class for a Document Element."""
2527

@@ -65,7 +67,7 @@ def to_json(self, *args, **kwargs):
6567
return json.dumps(self.serialize(), *args, **kwargs)
6668

6769

68-
@six.python_2_unicode_compatible
70+
@python_2_unicode_compatible
6971
class CaptionedElement(BaseElement):
7072
"""Document Element with a caption."""
7173

@@ -76,7 +78,7 @@ def __init__(self, caption, label=None, **kwargs):
7678
self.label = label
7779

7880
def __repr__(self):
79-
return '%s(id=%r, references=%r, caption=%r)' % (self.__class__.__name__, self.id, self.references, self.caption.text.encode('utf8'))
81+
return '%s(id=%r, references=%r, caption=%r)' % (self.__class__.__name__, self.id, self.references, self.caption.text)
8082

8183
def __str__(self):
8284
return self.caption.text

chemdataextractor/doc/text.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from __future__ import print_function
1515
from __future__ import unicode_literals
1616
from abc import abstractproperty
17+
import collections
1718
import logging
1819
import re
1920

@@ -34,14 +35,14 @@
3435
from ..nlp.pos import ChemCrfPosTagger
3536
from ..nlp.tokenize import ChemSentenceTokenizer, ChemWordTokenizer, regex_span_tokenize
3637
from ..text import CONTROL_RE
37-
from ..utils import memoized_property
38+
from ..utils import memoized_property, python_2_unicode_compatible
3839
from .element import BaseElement
3940

4041

4142
log = logging.getLogger(__name__)
4243

4344

44-
@six.python_2_unicode_compatible
45+
@python_2_unicode_compatible
4546
class BaseText(BaseElement):
4647
"""Abstract base class for a text Document Element."""
4748

@@ -59,7 +60,7 @@ def __init__(self, text, word_tokenizer=None, lexicon=None, abbreviation_detecto
5960
self.parsers = parsers if parsers is not None else self.parsers
6061

6162
def __repr__(self):
62-
return '%s(id=%r, references=%r, text=%r)' % (self.__class__.__name__, self.id, self.references, self._text.encode('utf8'))
63+
return '%s(id=%r, references=%r, text=%r)' % (self.__class__.__name__, self.id, self.references, self._text)
6364

6465
def __str__(self):
6566
return self._text
@@ -113,7 +114,7 @@ def _repr_html_(self):
113114
return self.text
114115

115116

116-
class Text(BaseText):
117+
class Text(collections.Sequence, BaseText):
117118
"""A passage of text, comprising one or more sentences."""
118119

119120
sentence_tokenizer = ChemSentenceTokenizer()
@@ -129,6 +130,12 @@ def __init__(self, text, sentence_tokenizer=None, word_tokenizer=None, lexicon=N
129130
super(Text, self).__init__(text, word_tokenizer=word_tokenizer, lexicon=lexicon, abbreviation_detector=abbreviation_detector, pos_tagger=pos_tagger, ner_tagger=ner_tagger, parsers=None, **kwargs)
130131
self.sentence_tokenizer = sentence_tokenizer if sentence_tokenizer is not None else self.sentence_tokenizer
131132

133+
def __getitem__(self, index):
134+
return self.sentences[index]
135+
136+
def __len__(self):
137+
return len(self.sentences)
138+
132139
@memoized_property
133140
def sentences(self):
134141
"""Return a list of Sentences that make up this text passage."""
@@ -310,7 +317,7 @@ def __init__(self, text, start=0, end=None, word_tokenizer=None, lexicon=None, a
310317
self.end = end if end is not None else len(text)
311318

312319
def __repr__(self):
313-
return '%s(%r, %r, %r)' % (self.__class__.__name__, self._text.encode('utf8'), self.start, self.end)
320+
return '%s(%r, %r, %r)' % (self.__class__.__name__, self._text, self.start, self.end)
314321

315322
@memoized_property
316323
def tokens(self):
@@ -532,7 +539,7 @@ def __add__(self, other):
532539
return NotImplemented
533540

534541

535-
@six.python_2_unicode_compatible
542+
@python_2_unicode_compatible
536543
class Span(object):
537544
"""A text span within a sentence."""
538545

@@ -545,7 +552,7 @@ def __init__(self, text, start, end):
545552
"""The end offset of this token in the original text."""
546553

547554
def __repr__(self):
548-
return '%s(%r, %r, %r)' % (self.__class__.__name__, self.text.encode('utf8'), self.start, self.end)
555+
return '%s(%r, %r, %r)' % (self.__class__.__name__, self.text, self.start, self.end)
549556

550557
def __str__(self):
551558
return self.text

chemdataextractor/model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
import six
2424

25+
from .utils import python_2_unicode_compatible
26+
2527

2628
log = logging.getLogger(__name__)
2729

@@ -144,7 +146,7 @@ def __setattr__(cls, key, value):
144146
return super(ModelMeta, cls).__setattr__(key, value)
145147

146148

147-
@six.python_2_unicode_compatible
149+
@python_2_unicode_compatible
148150
class BaseModel(six.with_metaclass(ModelMeta)):
149151
""""""
150152

@@ -267,7 +269,7 @@ def to_json(self, *args, **kwargs):
267269
return json.dumps(self.serialize(primitive=True), *args, **kwargs)
268270

269271

270-
@six.python_2_unicode_compatible
272+
@python_2_unicode_compatible
271273
class ModelList(MutableSequence):
272274
"""Wrapper around a list of Models objects to facilitate operations on all at once."""
273275

chemdataextractor/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ def python_2_unicode_compatible(klass):
5353
if six.PY2:
5454
if '__str__' not in klass.__dict__:
5555
raise ValueError("Define __str__() on %s to use @python_2_unicode_compatible" % klass.__name__)
56+
if '__repr__' not in klass.__dict__:
57+
raise ValueError("Define __repr__() on %s to use @python_2_unicode_compatible" % klass.__name__)
5658
klass.__unicode__ = klass.__str__
5759
klass._unicode_repr = klass.__repr__
5860
klass.__str__ = lambda self: self.__unicode__().encode('utf-8')

0 commit comments

Comments
 (0)