15
15
from os .path import join as path_join
16
16
from tempfile import gettempdir
17
17
18
+ from nltk import jsontags
18
19
from nltk .data import find , load
19
20
from nltk .tag .api import TaggerI
20
21
@@ -39,13 +40,16 @@ def lang_jsons(lang="eng"):
39
40
TAGGER_JSONS = {lang : lang_jsons (lang ) for lang in ["eng" , "rus" , "xxx" ]}
40
41
41
42
43
+ @jsontags .register_tag
42
44
class AveragedPerceptron :
43
45
"""An averaged perceptron, as implemented by Matthew Honnibal.
44
46
45
47
See more implementation details here:
46
48
https://explosion.ai/blog/part-of-speech-pos-tagger-in-python
47
49
"""
48
50
51
+ json_tag = "nltk.tag.perceptron.AveragedPerceptron"
52
+
49
53
def __init__ (self , weights = None ):
50
54
# Each feature gets its own weight vector, so weights is a dict-of-dicts
51
55
self .weights = weights if weights else {}
@@ -122,7 +126,15 @@ def load(self, path):
122
126
with open (path ) as fin :
123
127
self .weights = json .load (fin )
124
128
129
+ def encode_json_obj (self ):
130
+ return self .weights
131
+
132
+ @classmethod
133
+ def decode_json_obj (cls , obj ):
134
+ return cls (obj )
125
135
136
+
137
+ @jsontags .register_tag
126
138
class PerceptronTagger (TaggerI ):
127
139
"""
128
140
Greedy Averaged Perceptron tagger, as implemented by Matthew Honnibal.
@@ -152,6 +164,8 @@ class PerceptronTagger(TaggerI):
152
164
[('The', 'DT'), ('red', 'JJ'), ('cat', 'NN')]
153
165
"""
154
166
167
+ json_tag = "nltk.tag.perceptron.PerceptronTagger"
168
+
155
169
START = ["-START-" , "-START2-" ]
156
170
END = ["-END-" , "-END2-" ]
157
171
@@ -257,7 +271,7 @@ def save_to_json(self, lang="xxx", loc=None):
257
271
with open (path_join (loc , jsons ["tagdict" ]), "w" ) as fout :
258
272
json .dump (self .tagdict , fout )
259
273
with open (path_join (loc , jsons ["classes" ]), "w" ) as fout :
260
- json .dump (list (self .model . classes ), fout )
274
+ json .dump (list (self .classes ), fout )
261
275
262
276
def load_from_json (self , lang = "eng" , loc = None ):
263
277
# Automatically find path to the tagger if location is not specified.
@@ -269,7 +283,19 @@ def load_from_json(self, lang="eng", loc=None):
269
283
with open (loc + jsons ["tagdict" ]) as fin :
270
284
self .tagdict = json .load (fin )
271
285
with open (loc + jsons ["classes" ]) as fin :
272
- self .model .classes = set (json .load (fin ))
286
+ self .classes = set (json .load (fin ))
287
+ self .model .classes = self .classes
288
+
289
+ def encode_json_obj (self ):
290
+ return self .model .weights , self .tagdict , list (self .classes )
291
+
292
+ @classmethod
293
+ def decode_json_obj (cls , obj ):
294
+ tagger = cls (load = False )
295
+ tagger .model .weights , tagger .tagdict , tagger .classes = obj
296
+ tagger .classes = set (tagger .classes )
297
+ tagger .model .classes = tagger .classes
298
+ return tagger
273
299
274
300
def normalize (self , word ):
275
301
"""
0 commit comments