-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset_cfg.yaml
67 lines (62 loc) · 1.9 KB
/
dataset_cfg.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
data_cols:
label: 'fraud_bool' #Indicate the column corresponding to the label
timestamp: 'month' #If the dataset has a temporal dependency on splits, define the timestamp column (This can be ommited)
#Define the properties relating to your protected attribute - These can be ommited if there is no protected attribute
#you must define:
# - feature - the column name of the protected attribute
# - type - whether the protected attribute is NUMERICAL or CATEGORICAL
#If your protected attribute is NUMERICAL, you must define:
# - protected_threshold - value that separates the two distinct groups
# - protected_class- whether the values 'higher' or 'lower' than the parameter are the protected group.
#If your protected attribute is CATEGORICAL, you must define:
# - protected_class - value that corresponds to the protected group.
protected:
- feature: age
protected_class: higher
protected_threshold: 50
type: numeric
model_score: 'model_score' #If the experts have access to a ML model's score, define its column (This can be ommited)
categorical: # Define the categorical feature's columns (This can be ommited)
- "payment_type"
- "employment_status"
- "housing_status"
- "source"
- "device_os"
# Define the dictionary of possible categorical values
# Key: categorical feature's column
# Values: possible values for said feature
#This ensures that the categorical features are encoded identically when passed to the LGBM models.
categorical_dict:
device_os:
- linux
- macintosh
- other
- windows
- x11
employment_status:
- CA
- CB
- CC
- CD
- CE
- CF
- CG
housing_status:
- BA
- BB
- BC
- BD
- BE
- BF
- BG
payment_type:
- AA
- AB
- AC
- AD
- AE
source:
- INTERNET
- TELEAPP
#Define the cost structure of the problem, by setting lambda = (cost of a false positive)/(cost of a false negative)
lambda: 0.057