forked from Bilingual-Metrics-Analyses-Team/python-tagger
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.ini
More file actions
32 lines (27 loc) · 1.23 KB
/
Copy pathconfig.ini
File metadata and controls
32 lines (27 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
[DEFAULT]
LANG_SET = Eng,Spn
NGRAM = 5
TOKENIZE = False
HEADER = False
VERBOSE = False
[TRAIN_PATHS]
LANG1_TRAIN = ./TrainingCorpora/EngCorpus-1m.txt
LANG2_TRAIN = ./TrainingCorpora/MexCorpus.txt
[CLASS_PATHS]
# CLASS_JAR = ../stanford-ner-2015-04-20/stanford-ner.jar
# LANG1_CLASS = ../stanford-ner-2015-04-20/classifiers/english.all.3class.distsim.crf.ser.gz
# LANG2_CLASS = ../stanford-ner-2015-04-20/classifiers/spanish.ancora.distsim.s512.crf.ser.gz
CLASS_JAR = codeswitch-annotation/lib/stanford-ner-2020-11-17/stanford-ner.jar
LANG1_CLASS = codeswitch-annotation/lib/stanford-ner-2020-11-17/classifiers/english.all.3class.distsim.crf.ser.gz
LANG2_CLASS = codeswitch-annotation/lib/stanford-ner-2020-11-17/classifiers/spanish.ancora.distsim.s512.crf.ser.gz
[GOLD]
#GOLD_PATH = ./KC_Test/KillerCronicasGoldStandard.txt
#GOLD_PATH = codeswitch-annotation/KillerCronicas/KillerCronicasGoldStandard # UNCOMMENT THIS FOR KILLER CRONICAS AND COMMENT THE LINE BELOW
GOLD_PATH = codeswitch-annotation/SpinTX/99SpinTXcorpus.txt_annotated.txt # UNCOMMENT THIS FOR SPINTX AND COMMENT THE LINE ABOVE
GOLD_DELIMITER = " "
LANG1_OTHER = NonStEng,EngNoSpace,EngNonSt
LANG2_OTHER = NonStSpn,SpnNoSpace
NE_TAG = NamedEnt
OTHER_TAGS =
[ADVANCED]
NER_CHUNK_SIZE = 1000