-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathconfiguration-sample.yml
116 lines (95 loc) · 1.9 KB
/
configuration-sample.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
---
## Prediction
output_path: ./prediction/configuration-sample
## Probase
lemma_dir: ./spacy-probase/spacy/
model_dir: ./model/configuration-sample/
## Data
train: ./data/training/train.txt
dev: ./data/training/validation.txt
test: ./ref/truth.txt
## Features
data_type:
- train
- dev
- test
pretrain_embeddings:
- name: GloVe
path: ./data/vectors/glove.6B.300d.w2v
is_bin: False
- name: Word2Vec
path: ./data/vectors/GoogleNews-vectors-negative300.bin
is_bin: True
- name: FastText
path: ./data/vectors/wiki.en.vec
is_bin: False
## Evaluation
random_seed: 1
# Which data to use for cross validation and number of training folds
# uses is a list of data_type, i.e. train/dev/test
# fold is number of folds for cv
cross_validation_configs:
- uses:
- dev
folds: 5
# How to split the data for training and testing
# train and test are a list of data_type, i.e. train/dev/test
#
# For instance:
#
# - train:
# - train
# - dev
# test:
# - test
#
# means to train with the train and dev partition together,
# and test with test partition
split_configs:
- train:
- dev
test:
- test
- train:
- train
- dev
test:
- test
- train:
- train
test:
- test
- train:
- train
test:
- dev
# What feature to use in each training process
# For instance:
#
# - - GloVe
# - Probase
#
# means to use features from GloVe and Probase together (concatenate)
# and
#
# - - Probase
#
# means to use only use features from Probase
feature_configs:
- - Probase
- - GloVe
- - GloVe
- Probase
- - Word2Vec
- - Word2Vec
- Probase
- - FastText
- - FastText
- Probase
# Which classifier to use for training?
classifier_configs:
- logistic
- dtc
- svm-rbf
- rf
- LinearSVC