-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbaseline.py
522 lines (486 loc) · 31.7 KB
/
baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
import numpy as np
import timeit
import datetime
import copy
import argparse
from keras.models import load_model
import read_data
import group_fairness
import individual_fairness
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default="german", type=str, nargs='?', help='dataset')
parser.add_argument('--sensitive', default="age", type=str, nargs='?', help='sensitive feature')
parser.add_argument('--validation_size', default="0.05", type=str, nargs='?', help='validation size')
parser.add_argument('--test_size', default="0.05", type=str, nargs='?', help='test size')
parser.add_argument('--method', default="roc", type=str, nargs='?', help='method')
parser.add_argument('--budget', default="50", type=str, nargs='?', help='budget')
parser.add_argument('--fair_constraint', default="max", type=str, nargs='?', help='fairness constraint')
parser.add_argument('--n_run', default="5", type=str, nargs='?', help='no of running times')
args = parser.parse_args()
print("dataset: {}, sensitive: {}, validation_size: {}, test_size: {}, "
"method: {}, budget: {}, fair_constraint: {}, n_run: {}".
format(args.dataset, args.sensitive, args.validation_size, args.test_size,
args.method, args.budget, args.fair_constraint, args.n_run))
dataset = args.dataset
sensitive = args.sensitive
valid_size = float(args.validation_size)
test_size = float(args.test_size)
method = args.method # random, roc (reject option based classification), igd (individual group debiasing)
budget = int(args.budget) # no of iterations
fair_constraint = args.fair_constraint # max or 0.95
if fair_constraint != "max":
fair_bound = float(fair_constraint)
n_run = int(args.n_run)
load_folder_model = "initial_model"
save_folder_model = "relabel_model"
# relabel samples in critical region
def relabel(X_true, y_true, current_ROC_margin, current_class_threshold=0.5):
# X_true contains samples
# y_true is predicted scores of initial model
# y_pred_round is new predicted labels after relabeling
y_pred_round = np.zeros(len(y_true))
# find positive samples whose initial predicted scores are greater than current classification threshold
fav_pred_inds = (y_true > current_class_threshold)
# find negative samples whose initial predicted scores are smaller than current classification threshold
unfav_pred_inds = ~fav_pred_inds
# reformat indices
fav_pred_inds = np.array(fav_pred_inds).reshape(1, -1)[0]
unfav_pred_inds = np.array(unfav_pred_inds).reshape(1, -1)[0]
# assign predicted positive labels to positive samples
y_pred_round[fav_pred_inds] = 1
# assign predicted negative labels to negative samples
y_pred_round[unfav_pred_inds] = 0
# find samples in critical region around classification boundary
crit_region_inds = np.logical_and(y_true <= current_class_threshold + current_ROC_margin,
y_true >= current_class_threshold - current_ROC_margin)
# find favored and unfavored samples
favored_indices = (X_true[:, sen_idx] == 1)
unfavored_indices = (X_true[:, sen_idx] == 0)
# reformat indices
favored_indices = np.array(favored_indices).reshape(-1, 1)
unfavored_indices = np.array(unfavored_indices).reshape(-1, 1)
# relabel samples in critical region
# favored samples are assigned negative labels whereas unfavored samples are assigned positive labels
crit_favored_indices = np.logical_and(crit_region_inds, favored_indices)
crit_unfavored_indices = np.logical_and(crit_region_inds, unfavored_indices)
# reformat indices
crit_favored_indices = np.array(crit_favored_indices).reshape(1, -1)[0]
crit_unfavored_indices = np.array(crit_unfavored_indices).reshape(1, -1)[0]
y_pred_round[crit_favored_indices] = 0
y_pred_round[crit_unfavored_indices] = 1
return y_pred_round
# debias samples having individual biases
def debias(X_true, y_true, y_true_inverse, indi_bias_scores, current_indi_bias_threshold):
# X_true contains samples
# y_true is predicted scores of initial model
# y_true_inverse is predicted scores of initial model with inverse sensitive feature
# indi_bias_scores contains individual bias scores of samples in X_true
# y_pred is new predicted scores after relabeling
y_pred = copy.deepcopy(y_true)
# find biased samples whose individual bias scores are greater than current individual bias threshold
biased_indices = (indi_bias_scores > current_indi_bias_threshold)
# find unbiased samples whose individual bias scores are smaller than current individual bias threshold
unbiased_indices = ~biased_indices
# reformat indices
biased_indices = np.array(biased_indices).reshape(1, -1)[0]
unbiased_indices = np.array(unbiased_indices).reshape(1, -1)[0]
# create individual bias indicators
indi_bias_indicators = np.zeros(len(indi_bias_scores))
# assign 1 to biased samples
indi_bias_indicators[biased_indices] = 1
# assign 0 to unbiased samples
indi_bias_indicators[unbiased_indices] = 0
# NOTE: igd method only focus on relabeling unfavored samples
# find unfavored samples
unfavored_indices = (X_true[:, sen_idx] == 0)
# find biased unfavored samples
biased_unfavored_indices = np.logical_and(indi_bias_indicators.astype(bool), unfavored_indices)
# relabel biased unfavored samples
y_pred[biased_unfavored_indices] = y_true_inverse[biased_unfavored_indices]
# y_pred_round is new predicted labels after relabeling
y_pred_round = np.around(y_pred)
return y_pred_round
start_date_time = datetime.datetime.now()
start_time = timeit.default_timer()
# read data
_, _, _, _, sen_var_indices = read_data.from_file(dataset, sensitive)
# get sensitive feature index
sen_idx = sen_var_indices[0]
# accuracy, group_fairness, individual_fairness
acc_valid_baseline, fair_valid_baseline, individual_valid_baseline = np.zeros(n_run), np.zeros(n_run), np.zeros(n_run)
acc_test_baseline, fair_test_baseline, individual_test_baseline = np.zeros(n_run), np.zeros(n_run), np.zeros(n_run)
for run in range(n_run):
print("run={}".format(run))
# load initial trained model from file
trained_model = load_model("./{}/model_{}_{}_vs{}_ts{}_run{}.h5".
format(load_folder_model, dataset, sensitive, valid_size, test_size, run))
# load validation set and prediction from file
with open("./{}/X_valid_{}_{}_vs{}_ts{}_run{}.file".
format(load_folder_model, dataset, sensitive, valid_size, test_size, run), "rb") as f:
X_valid = np.load(f)
with open("./{}/y_valid_{}_{}_vs{}_ts{}_run{}.file".
format(load_folder_model, dataset, sensitive, valid_size, test_size, run), "rb") as f:
y_valid = np.load(f)
with open("./{}/y_pred_validation_{}_{}_vs{}_ts{}_run{}.file".
format(load_folder_model, dataset, sensitive, valid_size, test_size, run), "rb") as f:
y_pred_validation = np.load(f)
# compute initial predicted labels on validation set
y_pred_validation_round = np.around(y_pred_validation)
# compute accuracy and fairness on validation set
accuracy_overall_valid, demographic_parity_valid, \
prob_favored_pred_positive_valid, prob_unfavored_pred_positive_valid \
= group_fairness.compute_accuracy_fairness(X_valid, sen_idx, y_valid, y_pred_validation_round)
# load testing set and prediction from file
with open("./{}/X_test_{}_{}_vs{}_ts{}_run{}.file".
format(load_folder_model, dataset, sensitive, valid_size, test_size, run), "rb") as f:
X_test = np.load(f)
with open("./{}/y_test_{}_{}_vs{}_ts{}_run{}.file".
format(load_folder_model, dataset, sensitive, valid_size, test_size, run), "rb") as f:
y_test = np.load(f)
with open("./{}/y_pred_testing_{}_{}_vs{}_ts{}_run{}.file".
format(load_folder_model, dataset, sensitive, valid_size, test_size, run), "rb") as f:
y_pred_testing = np.load(f)
# compute initial predicted labels on testing set
y_pred_testing_round = np.around(y_pred_testing)
# compute accuracy and fairness on testing set
accuracy_overall_test, demographic_parity_test, \
prob_favored_pred_positive_test, prob_unfavored_pred_positive_test \
= group_fairness.compute_accuracy_fairness(X_test, sen_idx, y_test, y_pred_testing_round)
# baseline to improve fairness
n_valid = len(y_valid)
n_test = len(y_test)
print("n_valid: {}, n_test: {}".format(n_valid, n_test))
if method == "random":
# select randomly samples from validation/testing set and relabel them to improve fairness
# no of random samples equals to no of optimization iterations in other methods
# NOTE: random method works directly on testing set, it doesn't require validation set
print("relabel validation set")
sample_indices = np.random.choice(range(n_valid), budget)
for sample_cnt, sample_idx in enumerate(sample_indices):
print("sample_cnt: {}, sample_idx: {}".format(sample_cnt, sample_idx))
# get a random sample
random_sample = X_valid[sample_idx]
# get its sensitive feature
random_sample_sen = random_sample[sen_idx]
print("random_sample_sen: {}".format(random_sample_sen))
# more positive outcome for favored group than positive outcome for unfavored group
if prob_favored_pred_positive_valid > prob_unfavored_pred_positive_valid:
# this sample belongs to favored group
if random_sample_sen == 1:
# we assign negative outcome to decrease prob_favored_pred_positive
y_pred_validation_round[sample_idx] = 0
# this sample belongs to unfavored group
elif random_sample_sen == 0:
# we assign positive outcome to increase prob_unfavored_pred_positive
y_pred_validation_round[sample_idx] = 1
# less positive outcome for favored group than positive outcome for unfavored group
if prob_favored_pred_positive_valid < prob_unfavored_pred_positive_valid:
# this sample belongs to favored group
if random_sample_sen == 1:
# we assign positive outcome to increase prob_favored_pred_positive
y_pred_validation_round[sample_idx] = 1
# this sample belongs to unfavored group
elif random_sample_sen == 0:
# we assign negative outcome to decrease prob_unfavored_pred_positive
y_pred_validation_round[sample_idx] = 0
# re-compute accuracy and fairness on validation set
accuracy_overall_valid, demographic_parity_valid, \
prob_favored_pred_positive_valid, prob_unfavored_pred_positive_valid \
= group_fairness.compute_accuracy_fairness(X_valid, sen_idx, y_valid, y_pred_validation_round)
print("relabeling func on validation")
print("accuracy={}, fairness={}, p_favored_positive={}, p_unfavored_positive={}".
format(round(accuracy_overall_valid, 2), round(demographic_parity_valid, 2),
round(prob_favored_pred_positive_valid, 4), round(prob_unfavored_pred_positive_valid, 4)))
# re-compute theil_index on validation set
theil_index_valid = individual_fairness.generalized_entropy_index(y_valid, y_pred_validation_round)
print("theil_index={}".format(round(theil_index_valid, 2)))
print("relabel testing set")
sample_indices = np.random.choice(range(n_test), budget)
for sample_cnt, sample_idx in enumerate(sample_indices):
print("sample_cnt: {}, sample_idx: {}".format(sample_cnt, sample_idx))
# get a random sample
random_sample = X_test[sample_idx]
# get its sensitive feature
random_sample_sen = random_sample[sen_idx]
print("random_sample_sen: {}".format(random_sample_sen))
# more positive outcome for favored group than positive outcome for unfavored group
if prob_favored_pred_positive_test > prob_unfavored_pred_positive_test:
# this sample belongs to favored group
if random_sample_sen == 1:
# we assign negative outcome to decrease prob_favored_pred_positive
y_pred_testing_round[sample_idx] = 0
# this sample belongs to unfavored group
elif random_sample_sen == 0:
# we assign positive outcome to increase prob_unfavored_pred_positive
y_pred_testing_round[sample_idx] = 1
# less positive outcome for favored group than positive outcome for unfavored group
if prob_favored_pred_positive_test < prob_unfavored_pred_positive_test:
# this sample belongs to favored group
if random_sample_sen == 1:
# we assign positive outcome to increase prob_favored_pred_positive
y_pred_testing_round[sample_idx] = 1
# this sample belongs to unfavored group
elif random_sample_sen == 0:
# we assign negative outcome to decrease prob_unfavored_pred_positive
y_pred_testing_round[sample_idx] = 0
# re-compute accuracy and fairness on testing set
accuracy_overall_test, demographic_parity_test, \
prob_favored_pred_positive_test, prob_unfavored_pred_positive_test \
= group_fairness.compute_accuracy_fairness(X_test, sen_idx, y_test, y_pred_testing_round)
print("relabeling func on testing")
print("accuracy={}, fairness={}, p_favored_positive={}, p_unfavored_positive={}".
format(round(accuracy_overall_test, 2), round(demographic_parity_test, 2),
round(prob_favored_pred_positive_test, 4), round(prob_unfavored_pred_positive_test, 4)))
# re-compute theil_index on testing set
theil_index_test = individual_fairness.generalized_entropy_index(y_test, y_pred_testing_round)
print("theil_index={}".format(round(theil_index_test, 2)))
# roc fixes optimal classification threshold to 0.5 (default value) and only finds optimal ROC margin
if method == "roc":
# search range of ROC margin
low_ROC_margin = 0.0
high_ROC_margin = 0.5
# no of ROC margins to search
num_ROC_margin = budget
if fair_constraint != "max":
# upper and lower bounds of fairness
metric_ub = 1.0 - fair_bound
metric_lb = fair_bound - 1.0
# optimal ROC margin
optimal_ROC_margin = None
# step 1: search optimal ROC margin on validation set such that it is small
# (i.e. no of samples to relabel is small => accuracy is maintained) while
# fairness score satisfies fairness constraint (i.e. fairness is improved)
fairness_arr = np.zeros(num_ROC_margin)
ROC_margin_arr = np.zeros_like(fairness_arr)
cnt = 0
# iterate through possible ROC margins
for ROC_margin in np.linspace(low_ROC_margin, high_ROC_margin, num_ROC_margin):
print("cnt: {}".format(cnt))
print("current ROC_margin: {}".format(round(ROC_margin, 4)))
# use current ROC margin to relabel samples in critical region
y_pred_validation_round = relabel(X_valid, y_pred_validation, ROC_margin)
# re-compute accuracy and fairness on validation set
accuracy_overall_valid, demographic_parity_valid, \
prob_favored_pred_positive_valid, prob_unfavored_pred_positive_valid \
= group_fairness.compute_accuracy_fairness(X_valid, sen_idx, y_valid, y_pred_validation_round)
print("relabeling func on validation")
print("accuracy={}, fairness={}, p_favored_positive={}, p_unfavored_positive={}".
format(round(accuracy_overall_valid, 2), round(demographic_parity_valid, 2),
round(prob_favored_pred_positive_valid, 4), round(prob_unfavored_pred_positive_valid, 4)))
# re-compute theil_index on validation set
theil_index_valid = individual_fairness.generalized_entropy_index(y_valid, y_pred_validation_round)
print("theil_index={}".format(round(theil_index_valid, 2)))
# compute fairness with current ROC margin
# in ROC method, fairness is defined as P(y=positive|S=unprivileged) - P(y=positive|S=privileged)
fairness_arr[cnt] = prob_unfavored_pred_positive_valid - prob_favored_pred_positive_valid
ROC_margin_arr[cnt] = ROC_margin
cnt += 1
# find good fairness scores that satisfy fairness constraint
if fair_constraint == "max":
rel_inds = (np.abs(fairness_arr) == np.min(np.abs(fairness_arr)))
else:
rel_inds = np.logical_and(fairness_arr >= metric_lb, fairness_arr <= metric_ub)
# if we can find some good fairness scores, then get the best one that has possible highest accuracy
# (i.e. ROC margin is smallest => critical region is smallest => least samples are relabeled)
if any(rel_inds):
print("Find some good fairness scores")
# get good fairness score with smallest ROC margin
best_ind = np.where(ROC_margin_arr[rel_inds] == np.min(ROC_margin_arr[rel_inds]))[0][0]
# cannot find any good fairness score satisfying fairness constraint
# we get best fairness score (i.e. smallest discrimination)
else:
print("Cannot find any good fairness score")
print("fairness_arr: {}".format(fairness_arr))
rel_inds = np.ones(len(fairness_arr), dtype=bool)
print("fairness_arr[rel_inds]: {}".format(fairness_arr[rel_inds]))
best_ind = np.where(np.abs(fairness_arr[rel_inds]) == np.min(np.abs(fairness_arr[rel_inds])))[0][0]
print("best_ind: {}, smallest_disc: {}".format(best_ind, fairness_arr[rel_inds][best_ind]))
# get optimal ROC margin
optimal_ROC_margin = ROC_margin_arr[rel_inds][best_ind] # get best index among good fairness scores
print("optimal ROC_margin: {}".format(round(optimal_ROC_margin, 4)))
# step 2: use optimal ROC margin to relabel samples in validation set and testing set
print("relabeling func on validation")
y_pred_validation_round = relabel(X_valid, y_pred_validation, optimal_ROC_margin)
# reformat y_pred_validation_round as same as y_valid
y_pred_validation_round = np.array(y_pred_validation_round).reshape(-1, 1)
# re-compute accuracy and fairness on validation set
accuracy_overall_valid, demographic_parity_valid, \
prob_favored_pred_positive_valid, prob_unfavored_pred_positive_valid \
= group_fairness.compute_accuracy_fairness(X_valid, sen_idx, y_valid, y_pred_validation_round)
print("accuracy={}, fairness={}, p_favored_positive={}, p_unfavored_positive={}".
format(round(accuracy_overall_valid, 2), round(demographic_parity_valid, 2),
round(prob_favored_pred_positive_valid, 4), round(prob_unfavored_pred_positive_valid, 4)))
# re-compute theil_index on validation set
theil_index_valid = individual_fairness.generalized_entropy_index(y_valid, y_pred_validation_round)
print("theil_index={}".format(round(theil_index_valid, 2)))
print("relabeling func on testing")
y_pred_testing_round = relabel(X_test, y_pred_testing, optimal_ROC_margin)
# reformat y_pred_testing_round as same as y_test
y_pred_testing_round = np.array(y_pred_testing_round).reshape(-1, 1)
# re-compute accuracy and fairness on testing set
accuracy_overall_test, demographic_parity_test, \
prob_favored_pred_positive_test, prob_unfavored_pred_positive_test = \
group_fairness.compute_accuracy_fairness(X_test, sen_idx, y_test, y_pred_testing_round)
print("accuracy={}, fairness={}, p_favored_positive={}, p_unfavored_positive={}".
format(round(accuracy_overall_test, 2), round(demographic_parity_test, 2),
round(prob_favored_pred_positive_test, 4), round(prob_unfavored_pred_positive_test, 4)))
# re-compute theil_index on testing set
theil_index_test = individual_fairness.generalized_entropy_index(y_test, y_pred_testing_round)
print("theil_index={}".format(round(theil_index_test, 2)))
if method == "igd":
# search range for individual bias threshold
low_indi_bias_thresh = 0.0
high_indi_bias_thresh = 1.0
# no of individual bias thresholds to search
num_indi_bias_thresh = budget
if fair_constraint != "max":
# upper and lower bounds of fairness
metric_ub = 1.0 - fair_bound
metric_lb = fair_bound - 1.0
# optimal individual bias threshold
optimal_indi_bias_threshold = None
# step 1: compute individual bias score for each sample in validation set
# individual bias score of a sample x is the difference in initial predicted score if the sensitive feature of x
# is set inversely i.e. indi_bias_score(x) = f(x, S=1) - f(x, S=0)
# create a new validation set where sensitive value is reversed
X_valid_inverse = copy.deepcopy(X_valid)
X_valid_inverse[:, sen_idx] = 1 - X_valid_inverse[:, sen_idx]
# compute initial predicted scores on inverse validation set
y_pred_validation_inverse = trained_model.predict(X_valid_inverse)
indi_bias_scores_valid = y_pred_validation_inverse - y_pred_validation
# step 2: search optimal individual bias threshold on validation set such that
# individual bias threshold has highest value (i.e. biased samples really have serious individual biases) while
# group fairness score satisfies group fairness constraint (i.e. group fairness is improved)
group_fairness_arr = np.zeros(num_indi_bias_thresh)
indi_bias_thresh_arr = np.zeros_like(group_fairness_arr)
cnt = 0
# iterate through possible individual bias thresholds
for indi_bias_thresh in np.linspace(low_indi_bias_thresh, high_indi_bias_thresh, num_indi_bias_thresh):
print("cnt: {}".format(cnt))
print("current indi_bias_threshold: {}".format(round(indi_bias_thresh, 4)))
# use current individual bias threshold to select biased samples and relabel them
y_pred_validation_round = debias(X_valid, y_pred_validation, y_pred_validation_inverse,
indi_bias_scores_valid, indi_bias_thresh)
# re-compute accuracy and fairness on validation set
accuracy_overall_valid, demographic_parity_valid, \
prob_favored_pred_positive_valid, prob_unfavored_pred_positive_valid \
= group_fairness.compute_accuracy_fairness(X_valid, sen_idx, y_valid, y_pred_validation_round)
print("relabeling func on validation")
print("accuracy={}, fairness={}, p_favored_positive={}, p_unfavored_positive={}".
format(round(accuracy_overall_valid, 2), round(demographic_parity_valid, 2),
round(prob_favored_pred_positive_valid, 4), round(prob_unfavored_pred_positive_valid, 4)))
# re-compute theil_index on validation
theil_index_valid = individual_fairness.generalized_entropy_index(y_valid, y_pred_validation_round)
print("theil_index={}".format(round(theil_index_valid, 2)))
# compute group fairness with current individual bias threshold
# in IGD method, group fairness is defined as P(y=positive|S=unprivileged) - P(y=positive|S=privileged)
group_fairness_arr[cnt] = prob_unfavored_pred_positive_valid - prob_favored_pred_positive_valid
indi_bias_thresh_arr[cnt] = indi_bias_thresh
cnt += 1
# find good group fairness scores that satisfy group fairness constraint
if fair_constraint == "max":
rel_inds = (np.abs(group_fairness_arr) == np.min(np.abs(group_fairness_arr)))
else:
rel_inds = np.logical_and(group_fairness_arr >= metric_lb, group_fairness_arr <= metric_ub)
# if we can find some good group fairness scores, then get the best one that has highest individual bias score
# since it means that chosen biased samples really have serious individual biases
if any(rel_inds):
print("Find some good group fairness scores")
# get good group fairness score with highest individual bias threshold
best_ind = np.where(indi_bias_thresh_arr[rel_inds] == np.max(indi_bias_thresh_arr[rel_inds]))[0][0]
# cannot find any good group fairness score satisfying group fairness constraint
# we get best group fairness score (i.e. smallest group discrimination)
else:
print("Cannot find any good group fairness score")
print("fairness_arr: {}".format(group_fairness_arr))
rel_inds = np.ones(len(group_fairness_arr), dtype=bool)
print("fairness_arr[rel_inds]: {}".format(group_fairness_arr[rel_inds]))
best_ind = np.where(np.abs(group_fairness_arr[rel_inds]) == np.min(np.abs(group_fairness_arr[rel_inds])))[0][0]
print("best_ind: {}, smallest_disc: {}".format(best_ind, group_fairness_arr[rel_inds][best_ind]))
# get optimal individual bias threshold
optimal_indi_bias_threshold = indi_bias_thresh_arr[rel_inds][best_ind] # get best index among good fairness scores
print("optimal indi_bias_threshold: {}".format(round(optimal_indi_bias_threshold, 4)))
# step 3: use optimal individual bias threshold to relabel samples in validation set and testing set
y_pred_validation_round = debias(X_valid, y_pred_validation, y_pred_validation_inverse,
indi_bias_scores_valid, optimal_indi_bias_threshold)
# re-compute accuracy and fairness on validation set
accuracy_overall_valid, demographic_parity_valid, \
prob_favored_pred_positive_valid, prob_unfavored_pred_positive_valid \
= group_fairness.compute_accuracy_fairness(X_valid, sen_idx, y_valid, y_pred_validation_round)
print("relabeling func on validation")
print("accuracy={}, fairness={}, p_favored_positive={}, p_unfavored_positive={}".
format(round(accuracy_overall_valid, 2), round(demographic_parity_valid, 2),
round(prob_favored_pred_positive_valid, 4), round(prob_unfavored_pred_positive_valid, 4)))
# re-compute theil_index on validation
theil_index_valid = individual_fairness.generalized_entropy_index(y_valid, y_pred_validation_round)
print("theil_index={}".format(round(theil_index_valid, 2)))
# create a new testing set where sensitive value is reversed
X_test_inverse = copy.deepcopy(X_test)
X_test_inverse[:, sen_idx] = 1 - X_test_inverse[:, sen_idx]
# compute initial predicted scores on inverse testing set
y_pred_testing_inverse = trained_model.predict(X_test_inverse)
indi_bias_scores_test = y_pred_testing_inverse - y_pred_testing
y_pred_testing_round = debias(X_test, y_pred_testing, y_pred_testing_inverse,
indi_bias_scores_test, optimal_indi_bias_threshold)
# re-compute accuracy and fairness on testing set
accuracy_overall_test, demographic_parity_test, \
prob_favored_pred_positive_test, prob_unfavored_pred_positive_test \
= group_fairness.compute_accuracy_fairness(X_test, sen_idx, y_test, y_pred_testing_round)
print("relabeling func on testing")
print("accuracy={}, fairness={}, p_favored_positive={}, p_unfavored_positive={}".
format(round(accuracy_overall_test, 2), round(demographic_parity_test, 2),
round(prob_favored_pred_positive_test, 4), round(prob_unfavored_pred_positive_test, 4)))
# re-compute theil_index on testing set
theil_index_test = individual_fairness.generalized_entropy_index(y_test, y_pred_testing_round)
print("theil_index={}".format(round(theil_index_test, 2)))
acc_valid_baseline[run] = accuracy_overall_valid
fair_valid_baseline[run] = demographic_parity_valid
individual_valid_baseline[run] = theil_index_valid
acc_test_baseline[run] = accuracy_overall_test
fair_test_baseline[run] = demographic_parity_test
individual_test_baseline[run] = theil_index_test
# save new predicted labels of relabeling function to file
if method == "random":
with open("./{}/y_relabel_validation_{}_{}_{}_vs{}_ts{}_run{}.file".
format(save_folder_model, method, dataset, sensitive, valid_size, test_size, run), "wb") as f:
np.save(f, y_pred_validation_round)
with open("./{}/y_relabel_testing_{}_{}_{}_vs{}_ts{}_run{}.file".
format(save_folder_model, method, dataset, sensitive, valid_size, test_size, run), "wb") as f:
np.save(f, y_pred_testing_round)
else:
with open("./{}/y_relabel_validation_{}_{}_{}_vs{}_ts{}_fair_{}_run{}.file".
format(save_folder_model, method, dataset, sensitive, valid_size, test_size, fair_constraint, run), "wb") as f:
np.save(f, y_pred_validation_round)
with open("./{}/y_relabel_testing_{}_{}_{}_vs{}_ts{}_fair_{}_run{}.file".
format(save_folder_model, method, dataset, sensitive, valid_size, test_size, fair_constraint, run), "wb") as f:
np.save(f, y_pred_testing_round)
# end run
end_date_time = datetime.datetime.now()
end_time = timeit.default_timer()
print("start date time: {} and end date time: {}".format(start_date_time, end_date_time))
print("runtime: {}(s)".format(round(end_time-start_time, 2)))
# save result to file
acc_valid, acc_valid_std = round(np.mean(acc_valid_baseline), 2), round(np.std(acc_valid_baseline), 2)
fair_valid, fair_valid_std = round(np.mean(fair_valid_baseline), 2), round(np.std(fair_valid_baseline), 2)
individual_valid, individual_valid_std = round(np.mean(individual_valid_baseline), 2), round(np.std(individual_valid_baseline), 2)
acc_test, acc_test_std = round(np.mean(acc_test_baseline), 2), round(np.std(acc_test_baseline), 2)
fair_test, fair_test_std = round(np.mean(fair_test_baseline), 2), round(np.std(fair_test_baseline), 2)
individual_test, individual_test_std = round(np.mean(individual_test_baseline), 2), round(np.std(individual_test_baseline), 2)
if method == "random":
file_name = './{}/_{}_{}_{}_vs{}_ts{}_budget_{}.txt'.format(save_folder_model, method, dataset, sensitive,
valid_size, test_size, budget)
else:
file_name = './{}/_{}_{}_{}_vs{}_ts{}_budget_{}_fair_{}.txt'.format(save_folder_model, method, dataset, sensitive,
valid_size, test_size, budget, fair_constraint)
with open(file_name, 'w') as f:
f.write("dataset: {}, sensitive: {}, validation_size: {}, test_size: {}\n".format(dataset, sensitive, valid_size, test_size))
if method == "random":
f.write("method: {}, budget: {}\n".format(method, budget))
else:
f.write("method: {}, budget: {}, fair_constraint: {}\n".format(method, budget, fair_constraint))
f.write("acc_valid: {} ({}), fair_valid: {} ({}), individual_valid: {} ({})\n".
format(acc_valid, acc_valid_std, fair_valid, fair_valid_std, individual_valid, individual_valid_std))
f.write("acc_test: {} ({}), fair_test: {} ({}), individual_test: {} ({})\n".
format(acc_test, acc_test_std, fair_test, fair_test_std, individual_test, individual_test_std))
f.write("start date time: {} and end date time: {}\n".format(start_date_time, end_date_time))
f.write("runtime: {}(s)\n".format(round(end_time-start_time, 2)))