-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFinal_Analysis.Rmd
577 lines (415 loc) · 21.3 KB
/
Final_Analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
---
title: "BioInfo_KCrossVal_LogReg_Updated"
author: "Charles Marks"
date: "February 2019"
output: html_document
---
## This Document
The following code was utilized in the production of the manuscript entitled "Machine learning-based predictive modeling of surgical intervention in glaucoma using systemic data from electronic health records" by Sally L. Baxter, MD, MSc, Charles Marks, MPH, Tsung-Ting Kuo, PhD, Lucila Ohno-Machado, MD, PhD, Robert N. Weinreb, MD
## Initializing rMarkdown
Here we set options and upload the libraries necessary for our full analysis.
```{r setup, include=FALSE}
library(tidyverse)
library(tableone)
library(PerformanceAnalytics)
library(psych)
library(kableExtra)
library(ROCR)
library(randomForest)
library(neuralnet)
options(digits = 3, scipen = 999)
knitr::opts_chunk$set(warning=FALSE, message=FALSE, error = FALSE)
```
## Loading the Dataset
This dataset was produced from UCSD EHR records. As a human subjects protection, this dataset is not being made available with the code to analyze. Concerns and questions should be directed to the corresponding author of the work.
```{r dataset}
df <- read.csv("final_dataset.csv", header=TRUE)
```
## Exclusion Criteria
So, now we need to exclude people based on certain factors. The three exclusion factors are :
- Not having 6 months of records prior to first surgery date
- Having less than six months of records data in the system at all
- Not having any vital records in the system
```{r pressure, echo=FALSE}
#this will be our final dataset
clean_data <- data.frame(matrix(nrow=0,ncol=ncol(df)))
#set the column names to match our full data set
colnames(clean_data) <- colnames(df)
#this will contain all patients excluded because they didn't have 6 months of records prior to their first surgery
surgery_exclude_data <- data.frame(matrix(nrow=0,ncol=ncol(df)))
colnames(surgery_exclude_data) <- colnames(df)
#this will contain non-surgery patients with less than six months of data
newpatient_exclude_data <- data.frame(matrix(nrow=0,ncol=ncol(df)))
colnames(newpatient_exclude_data) <- colnames(df)
#this will contain all patients with no vital records
no_vital_data_exclude_data <- data.frame(matrix(nrow=0,ncol=ncol(df)))
colnames(no_vital_data_exclude_data) <- colnames(df)
#loop through all patients in the uploaded dataset to check
#each of the exclusion criteria
for(patient_num in 1:nrow(df))
{
#if patient has had surgery and has vital records
if(!is.na(df$first_surgery_date[patient_num]) &&
!is.na(df$first_vitals_contact_date[patient_num]))
{
#check the time difference between first vital contact and first surgery
time_test <- difftime(df$first_surgery_date[patient_num],
df$first_vitals_contact_date[patient_num], units = "days")
#if length is greater than 6 months (ie 180 days), include
if(time_test > 180)
{
# place in the included data set
clean_data <- rbind(clean_data, df[patient_num,])
} # else exclude by placing in the surgery exclude table
else
{
surgery_exclude_data <- rbind(surgery_exclude_data, df[patient_num,])
}
} #for non-surgery patients with vital records
else if(is.na(df$first_surgery_date[patient_num]) &&
!is.na(df$first_vitals_contact_date[patient_num]))
{
#check if first record for patient in the system occurred prior to 6 months
#before the end of September 2018
time_test2 <- difftime(as.Date("10/01/2018","%m/%d/%Y"),
df$first_vitals_contact_date[patient_num], units = "days")
#if 6 months or longer, include
if(time_test2 > 180)
{
clean_data <- rbind(clean_data, df[patient_num,])
} #if less than 6 months, exclude
else {
newpatient_exclude_data <- rbind(newpatient_exclude_data, df[patient_num,])
}
} #the remaining patients don't have vital records and therefore are excluded
else
{
no_vital_data_exclude_data <- rbind(no_vital_data_exclude_data, df[patient_num,])
}
}
```
## table one
Here is the descriptive statistics, stratified by history of glaucoma surgery. Rows from this can be taken to present a final descriptive table, if so desired. Descriptive table might not be overly meaningful tho...
```{r tableone}
tone_vars = colnames(clean_data)
#the CreateTableOne function didn't like having these columns included
tone_vars[12] <- NA
tone_vars[28] <- NA
tone <- tableone::CreateTableOne(vars = tone_vars, strata = c("any_glaucoma_surgey") ,data = clean_data)
# necessary step to make the kableExtra package work, this just makes the output easier
# to read for internal use
tone_df <- print(tone, printToggle = FALSE, noSpaces = TRUE)
# prints table one
kable(tone_df) %>%
kable_styling(bootstrap_options = "striped", full_width = F)
```
## Bivariate Associations
We wanted to look and see if there are bivariate associations between each of our predictor variables and our outcome, so we looped through the table, conducting univariate logistic regressions and reporting he results
```{r Bivariate, warning=FALSE}
## create a table for bivariate results
bi.final <- data.frame(matrix(nrow = 0, ncol = 3))
## loop through the columns of the data
for(i in 3:ncol(clean_data))
{
# gets the variable name for the column
iv <- colnames(clean_data)[i]
# we need to check for an error because the column data may not be regressable (ie only one value for the variable for all participants)
possibleError <- tryCatch(
#run the model
bi.model <- glm (any_glaucoma_surgey ~ get(iv), data = clean_data, family =
binomial(link="logit")), error = function(e) e
)
## if there was no error we will run the following code to add the results to our
## bi.final table
if(!inherits(possibleError, "error")){
## another error check, this step also produced an error at times
possibleError2 <- tryCatch(
## convert the coefficient into an OR and compute the confidence interval
bi.results <- exp(cbind(OR = coef(bi.model), confint(bi.model))),
error = function(e) e)
## if there was no error above we will add the results to the table of results
if(!inherits(possibleError2, "error")){
## add to the final bivariate results
rownames(bi.results)[2] <- colnames(clean_data)[i]
bi.final <- rbind(bi.final, bi.results)
}
}
}
# this table represents all variables found to be significant and their OR
bi.signif <- bi.final[which(bi.final$`2.5 %` > 1 | bi.final$`97.5 %`<1),]
##output both tables to view the results
kable(bi.final) %>%
kable_styling(bootstrap_options = "striped", full_width = F)
kable(bi.signif) %>%
kable_styling(bootstrap_options = "striped", full_width = F)
```
## Logisitc Regression LOO
So, we are going to run the leave one out validation strategy, utilizing logisitic regression
```{r lofreg LOO}
## this function runs a single fold of the LOO crossval
## the results of the prediction and the actual values attempting to predict
runfold <- function(train, test)
{
model.full= glm(any_glaucoma_surgey ~ Age + Gender +systolic_max + systolic_min + systolic_mean + diastolic_max + diastolic_min + diastolic_mean +pulse_min + pulse_max + pulse_mean +ever_hospitalized + days_hospitalized + CHF + PVD + Stroke + Dementia + Pulmonary + LiverMild + DM + DMcx + Renal + Cancer + Mets + med_class.Analgesics_._non.opioids + med_class.Analgesics_._opioids + med_class.Anti.rheumatic + med_class.Antianxiety_agents + med_class.Antiasthmatic + med_class.Anticoagulants + med_class.Anticonvulsant + med_class.Antidepressants + med_class.Antidiabetic + med_class.Antiemetics + med_class.Antihyperlipidemic + med_class.Antihypertensive + med_class.Beta_blockers + med_class.Calcium_blockers + med_class.Corticosteroids + med_class.Cough.Cold + med_class.Decongestants + med_class.Dermatological + med_class.Diuretics + med_class.Laxatives + med_class.Macrolide_antibiotics + med_class.Misc._antiinfectives + med_class.Ophthalmic+med_class.Ulcer_drugs,
data=train,
family = binomial(link="logit")
)
lr.pr <- predict(model.full, test, type="response")
# return the predicted values and the actual values
return(c(lr.pr,test$any_glaucoma_surgey[1]))
}
# these lists will be population with our predictions and actuals
predictions <- c()
actuals <- c()
# this loop will run the 25 iterations of 5-crossfold
for(i in 1:nrow(clean_data)){
# we want to get train and test
train <- clean_data[-i,]
test <- clean_data[i,]
result_fold <- runfold(train,test)
predictions <- c(predictions, result_fold[1])
actuals <- c(actuals, result_fold[2])
# this is just for progress checking
print(i)
message(i)
}
### Let us make the average ROC curve
pred <- ROCR::prediction(predictions, actuals)
### Lets plot the average
avg_auc_curve_log_reg <- ROCR::performance(pred,"tpr","fpr")
plot(avg_auc_curve_log_reg, avg="vertical",main='Average ROC Curve for Logistic Regression',col=2,lwd=2)
abline(a=0,b=1,lwd=2,lty=2,col='gray')
### And all the curves
plot(avg_auc_curve_log_reg,main='All the ROC Curves for Logistic Regression',col=2,lwd=1)
abline(a=0,b=1,lwd=2,lty=2,col='gray')
### And lets compute the average ROC curve
auc_list <- ROCR::performance(pred,"auc")@y.values
aucs <- c()
for(num in auc_list) { aucs <- c(aucs, num)}
auc_mean_lr <- mean(aucs)
auc_min_lr <- min(aucs)
auc_max_lr <- max(aucs)
auc_sd_lr <-sd(aucs)
## we will save the predictions and actuals for later
## so that we can graph the average roc curves of all three
## models at the same time
lr_predictions <- predictions
lr_actuals <- actuals
```
### Running Stepwise Log Reg
As well, we want to just report the logisitc regression results of the entire data set
```{r full logreg}
final.model <- glm(any_glaucoma_surgey ~ Age + Gender +systolic_max + systolic_min + systolic_mean + diastolic_max + diastolic_min + diastolic_mean +pulse_min + pulse_max + pulse_mean +ever_hospitalized + days_hospitalized + CHF + PVD + Stroke + Dementia + Pulmonary + LiverMild + DM + DMcx + Renal + Cancer + Mets + med_class.Analgesics_._non.opioids + med_class.Analgesics_._opioids + med_class.Anti.rheumatic + med_class.Antianxiety_agents + med_class.Antiasthmatic + med_class.Anticoagulants + med_class.Anticonvulsant + med_class.Antidepressants + med_class.Antidiabetic + med_class.Antiemetics + med_class.Antihyperlipidemic + med_class.Antihypertensive + med_class.Beta_blockers + med_class.Calcium_blockers + med_class.Corticosteroids + med_class.Cough.Cold + med_class.Decongestants + med_class.Dermatological + med_class.Diuretics + med_class.Laxatives + med_class.Macrolide_antibiotics + med_class.Misc._antiinfectives + med_class.Ophthalmic+med_class.Ulcer_drugs,
data=clean_data,
family = binomial(link="logit")
)
# create a table with the aORs and confidence intervals and p-values
results <- exp(cbind(OR = coef(final.model), confint(final.model)))
results <- cbind(results,summary(final.model)$coefficients[,4])
# print out the results
kable(results) %>%
kable_styling(bootstrap_options = "striped", full_width = F)
```
## Random Forests CrossVal
So, we are going to run the LOO crossval using Random Forest.
```{r rf LOO}
# this function runs a single fold
runfold_rf <- function(train, test)
{
## this code runs the random forests on our 48 variables
model.rf = randomForest(any_glaucoma_surgey ~ Age + Gender +systolic_max + systolic_min + systolic_mean + diastolic_max + diastolic_min + diastolic_mean +pulse_min + pulse_max + pulse_mean +ever_hospitalized + days_hospitalized + CHF + PVD + Stroke + Dementia + Pulmonary + LiverMild + DM + DMcx + Renal + Cancer + Mets + med_class.Analgesics_._non.opioids + med_class.Analgesics_._opioids + med_class.Anti.rheumatic + med_class.Antianxiety_agents + med_class.Antiasthmatic + med_class.Anticoagulants + med_class.Anticonvulsant + med_class.Antidepressants + med_class.Antidiabetic + med_class.Antiemetics + med_class.Antihyperlipidemic + med_class.Antihypertensive + med_class.Beta_blockers + med_class.Calcium_blockers + med_class.Corticosteroids + med_class.Cough.Cold + med_class.Decongestants + med_class.Dermatological + med_class.Diuretics + med_class.Laxatives + med_class.Macrolide_antibiotics + med_class.Misc._antiinfectives + med_class.Ophthalmic + med_class.Ulcer_drugs,
data=train, importance = TRUE, mtry = 6
)
# this line uses the random forests model to predict the results of the test data
# the predicted value is saved
pr <- predict(model.rf, test, type="prob")[,2]
# here we return the predicted and actual
return(c(pr,test$any_glaucoma_surgey[1]))
}
# these lists will be population with our predictions and actuals
predictions <- c()
actuals <- c()
# this loop through, removing each observation one by one
# and using the rest of the data set to predict it
for(i in 1:nrow(clean_data)){
# we want to get train and test
train <- clean_data[-i,]
test <- clean_data[i,]
result_fold <- runfold_rf(train,test)
predictions <- c(predictions, result_fold[1])
actuals <- c(actuals, result_fold[2])
# run one iteration of the five crossfold
#kfold_result <- fivefold(clean_data)
#add the predictions and actuals for each fold
#predictions <- append(predictions, kfold_result[[1]])
#actuals <- append(actuals, kfold_result[[2]])
# this is just for progress checking
print(i)
message(i)
}
### Let us make the average ROC curve
pred <- ROCR::prediction(predictions, actuals)
### Lets plot the average
avg_auc_curve_rf <- ROCR::performance(pred,"tpr","fpr")
plot(avg_auc_curve_rf, avg="vertical",main='Average ROC Curve for Random Forests',col=2,lwd=2)
abline(a=0,b=1,lwd=2,lty=2,col='gray')
### And all the curvers
plot(avg_auc_curve_rf,main='All the ROC Curves for Random Forest',col=2,lwd=1)
abline(a=0,b=1,lwd=2,lty=2,col='gray')
### And lets compute the average ROC curve
auc_list <- ROCR::performance(pred,"auc")@y.values
aucs <- c()
for(num in auc_list) { aucs <- c(aucs, num)}
auc_mean_rf <- mean(aucs)
auc_min_rf <- min(aucs)
auc_max_rf <- max(aucs)
auc_sd_rf <-sd(aucs)
## finally we will save the predictions and actuals for later analysis
rf_predictions <- predictions
rf_actuals <- actuals
```
### Random Forests Importance Scores
We also want to compute the importance scores for the entire data set
```{r RF Importance }
# run random forests on the full dataset
model.rf = randomForest(any_glaucoma_surgey ~ Age + Gender +systolic_max + systolic_min + systolic_mean + diastolic_max + diastolic_min + diastolic_mean +pulse_min + pulse_max + pulse_mean +ever_hospitalized + days_hospitalized + CHF + PVD + Stroke + Dementia + Pulmonary + LiverMild + DM + DMcx + Renal + Cancer + Mets + med_class.Analgesics_._non.opioids + med_class.Analgesics_._opioids + med_class.Anti.rheumatic + med_class.Antianxiety_agents + med_class.Antiasthmatic + med_class.Anticoagulants + med_class.Anticonvulsant + med_class.Antidepressants + med_class.Antidiabetic + med_class.Antiemetics + med_class.Antihyperlipidemic + med_class.Antihypertensive + med_class.Beta_blockers + med_class.Calcium_blockers + med_class.Corticosteroids + med_class.Cough.Cold + med_class.Decongestants + med_class.Dermatological + med_class.Diuretics + med_class.Laxatives + med_class.Macrolide_antibiotics + med_class.Misc._antiinfectives + med_class.Ophthalmic + med_class.Ulcer_drugs,
data=clean_data, importance = TRUE, mtry = 6
)
# get the results
results <- importance(model.rf)
# output the results
kable(results) %>%
kable_styling(bootstrap_options = "striped", full_width = F)
```
## ANN CrossVal
### Make Numeric Data Set
To Run ANN you have to use a numeric data set and everything need to be scaled as well
```{r numericize}
#first scale the data
numeric_data <- clean_data
# this loop goes through all of the variables and makes the non-numeric data numeric
for(i in 1:ncol(numeric_data))
{
if(!is.numeric(numeric_data[,i]))
{
numeric_data[,i] <- as.numeric(numeric_data[,i])
}
}
# these three lines scale all of the data frame
max <- apply(numeric_data,2,max)
min <- apply(numeric_data,2,min)
numeric_data <- as.data.frame(scale(numeric_data, center = min, scale = max - min))
```
### Run the CrossVal
Like with RF and logistic regression, we will run LOO.
```{r ANN LOO}
# Note on 8.14.2019, code to be updated prior to December paper publication
```
### Print Average AUC Curves Together
We wanted to print the average AUC curvers all together, here is the code for doing that
```{r average AUCs}
plot(avg_auc_curve_ann, avg="vertical",main='Average ROC Curves', lwd = 2, pch = 0)
plot(avg_auc_curve_rf, avg="vertical",add = TRUE,lwd=2, pch = 8, lty = "dotted")
plot(avg_auc_curve_log_reg, avg="vertical",add = TRUE,lwd=2, pch = 6, lty = "twodash")
abline(a=0,b=1,lwd=2,lty=2,col='gray')
legend(0,1, legend=c("ANN", "RF", "LogReg"), lty=c('solid','dotted','twodash'), lwd =2, cex=0.8)
```
### Here Are the AUCs and Related Stats
First we wanted to compute statistics about the AUCs of each model. The averages, max, in, and standard deviations
```{r output tables}
## create the values for the table, compute the means, sd, mins, and maxs
models <- c("Logistic Regression", "Random Forest", "Artificial Neural Network")
means <- c(auc_mean_lr, auc_mean_rf, auc_mean_ann)
sds <- c(auc_sd_lr,auc_sd_rf, auc_sd_ann)
mins <- c(auc_min_lr,auc_min_rf,auc_min_ann)
maxs <- c(auc_max_lr, auc_max_rf, auc_max_ann)
# place values into a single dataframe
auc_table <- data.frame(matrix(nrow = 3, ncol = 0))
auc_table$model <- models
auc_table$auc_mean <- round(means, digits = 3)
auc_table$auc_sd <- round(sds, digits = 3)
auc_table$auc_min <- round(mins, digits = 3)
auc_table$auc_max <- round(maxs, digits = 3)
# print it out
kable(auc_table) %>%
kable_styling(bootstrap_options = "striped", full_width = F)
```
### CutPoint
We also want to compute the specificity, sensitivity and accuracy of the model. We used the oc_youden_kernel in the cutpointr function in order to identify this point.
```{r cutpoint}
## for finding cutpoint
library(cutpointr)
### Logisitic Regression
lr_sensitivity_sum <- 0
lr_specificity_sum <- 0
lr_cutpoint_sum <- 0
lr_accuracy_sum <- 0
## we went through each of the 25 runs and computed the metrics for each
for(i in 1:length(lr_actuals)) {
cp <- cutpointr(x = lr_predictions[[i]], class = lr_actuals[[i]], method = oc_youden_kernel)
sum_cp <-summary(cp)$cutpointr[[1]]
lr_sensitivity_sum <- lr_sensitivity_sum + sum_cp$sensitivity
lr_specificity_sum <- lr_specificity_sum + sum_cp$specificity
lr_cutpoint_sum <- lr_cutpoint_sum + sum_cp$optimal_cutpoint
lr_accuracy_sum <- lr_accuracy_sum + sum_cp$acc
}
## then we reported the averages of each value for each run
lr_mean_sensitivity <- lr_sensitivity_sum/length(lr_actuals)
lr_mean_specificity <- lr_specificity_sum/length(lr_actuals)
lr_mean_cutpoint <- lr_cutpoint_sum/length(lr_actuals)
lr_mean_accuracy <- lr_accuracy_sum/length(lr_actuals)
## Random Forests
rf_sensitivity_sum <- 0
rf_specificity_sum <- 0
rf_cutpoint_sum <- 0
rf_accuracy_sum <- 0
# compute metrics for each run
for(i in 1:length(rf_actuals)) {
cp <- cutpointr(x = rf_predictions[[i]], class = rf_actuals[[i]], method = oc_youden_kernel)
sum_cp <-summary(cp)$cutpointr[[1]]
rf_sensitivity_sum <- rf_sensitivity_sum + sum_cp$sensitivity
rf_specificity_sum <- rf_specificity_sum + sum_cp$specificity
rf_cutpoint_sum <- rf_cutpoint_sum + sum_cp$optimal_cutpoint
rf_accuracy_sum <- rf_accuracy_sum + sum_cp$acc
}
#average the results across all 25 folds
rf_mean_sensitivity <- rf_sensitivity_sum/length(rf_actuals)
rf_mean_specificity <- rf_specificity_sum/length(rf_actuals)
rf_mean_cutpoint <- rf_cutpoint_sum/length(rf_actuals)
rf_mean_accuracy <- rf_accuracy_sum/length(rf_actuals)
### ANN
ann_sensitivity_sum <- 0
ann_specificity_sum <- 0
ann_cutpoint_sum <- 0
ann_accuracy_sum <- 0
for(i in 1:length(ann_predictions)) {
ann_predictions[[i]] <- ann_predictions[[i]][,1]
}
for(i in 1:length(ann_actuals)) {
cp <- cutpointr(x = ann_predictions[[i]], class = ann_actuals[[i]], method = oc_youden_kernel)
sum_cp <-summary(cp)$cutpointr[[1]]
ann_sensitivity_sum <- ann_sensitivity_sum + sum_cp$sensitivity
ann_specificity_sum <- ann_specificity_sum + sum_cp$specificity
ann_cutpoint_sum <- ann_cutpoint_sum + sum_cp$optimal_cutpoint
ann_accuracy_sum <- ann_accuracy_sum + sum_cp$acc
}
ann_mean_sensitivity <- ann_sensitivity_sum/length(ann_actuals)
ann_mean_specificity <- ann_specificity_sum/length(ann_actuals)
ann_mean_cutpoint <- ann_cutpoint_sum/length(ann_actuals)
ann_mean_accuracy <- ann_accuracy_sum/length(ann_actuals)
## create the final table
models <- c("Logistic Regression", "Random Forest", "Artificial Neural Network")
sensitivitys <- c(lr_mean_sensitivity,rf_mean_sensitivity, ann_mean_sensitivity )
specificitys <- c(lr_mean_specificity,rf_mean_specificity, ann_mean_specificity )
accuracys <- c(lr_mean_accuracy,rf_mean_accuracy, ann_mean_accuracy)
cutpoints <- c(lr_mean_cutpoint,rf_mean_cutpoint, ann_mean_cutpoint )
table <- data.frame(matrix(nrow = 3, ncol = 0))
table$model <- models
table$sensitivity <- round(sensitivitys, digits = 3)
table$specificity <- round(specificitys, digits = 3)
table$accuracy <- round(accuracys, digits = 3)
table$cutpoint <- round(cutpoints, digits = 3)
## print it all out
kable(table) %>%
kable_styling(bootstrap_options = "striped", full_width = F)
```