Code/alert_data/dataset_cfg.yaml

data_cols:
  label: 'fraud_bool' #Indicate the column corresponding to the label 
  timestamp: 'month' #If the dataset has a temporal dependency on splits, define the timestamp column (This can be ommited)
  
  #Define the properties relating to your protected attribute - These can be ommited if there is no protected attribute
  #you must define:
  # - feature - the column name of the protected attribute
  # - type - whether the protected attribute is NUMERICAL or CATEGORICAL
  #If your protected attribute is NUMERICAL, you must define:
  # - protected_threshold - value that separates the two distinct groups
  # - protected_class- whether the values 'higher' or 'lower' than the parameter are the protected group.
  #If your protected attribute is CATEGORICAL, you must define:
  # - protected_class - value that corresponds to the protected group.
  protected:
    - feature: age
      protected_class: higher
      protected_threshold: 50
      type: numeric 
  model_score: 'model_score' #If the experts have access to a ML model's score, define its column (This can be ommited)
  categorical: # Define the categorical feature's columns (This can be ommited)
    - "payment_type"
    - "employment_status"
    - "housing_status"
    - "source"
    - "device_os"

# Define the dictionary of possible categorical values
# Key: categorical feature's column
# Values: possible values for said feature

#This ensures that the categorical features are encoded identically when passed to the LGBM models.
categorical_dict:
  device_os:
  - linux
  - macintosh
  - other
  - windows
  - x11
  employment_status:
  - CA
  - CB
  - CC
  - CD
  - CE
  - CF
  - CG
  housing_status:
  - BA
  - BB
  - BC
  - BD
  - BE
  - BF
  - BG
  payment_type:
  - AA
  - AB
  - AC
  - AD
  - AE
  source:
  - INTERNET
  - TELEAPP

#Define the cost structure of the problem, by setting lambda = (cost of a false positive)/(cost of a false negative)
lambda: 0.057