# Code to illustrate a ROC curve. # Thomas Yee # 202305 library("VGAM") library("VGAMdata") # Use nonparametric logistic regression. # The response is depression amongst male Maori. depress <- na.omit(xs.nz[, c("depressed", "age", "sex", "ethnicity", "weight", "marital", "educ", "smokenow")]) depress <- subset(depress, sex == "M" & ethnicity == "Maori") dim(depress) set.seed(784) nrow(depress) train.sample <- sample(nrow(depress), size = 500) depress.train <- depress[ train.sample, ] depress.test <- depress[-train.sample, ] # This is also known as a generalized additive model. # Smoothing is used for the age variable, to allow more flexibility. fit.depress <- vgam(depressed ~ s(age) + s(weight) + marital + educ + smokenow, binomialff, data = depress.train, trace = TRUE) # FYI par(mfrow = c(2, 3)) plot(fit.depress, se = TRUE, scale = 5) #depress.train <- transform(depress.train, fv = fitted(fit.depress)) # Plot a ROC curve manually ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, # Vector of cutoff points. If the predicted probability # is greater than this then classify as "success". cutvec <- seq(0.1, 0.9, length = 09) cutvec <- seq(0.1, 0.5, by = 0.1) cutvec <- seq(0.1, 0.5, length = 5) cutvec <- seq(0.1, 0.5, length = 12) yhat.mat <- matrix(0, nrow(depress.test), length(cutvec)) #depress.test.0 <- subset(depress.test, depressed == 0) #depress.test.1 <- subset(depress.test, depressed == 1) #index.test.0 <- with(depress.test, which(depressed == 0)) #index.test.1 <- with(depress.test, which(depressed == 1)) # Set up the plot for the ROC curve par(mfrow = c(1, 2)) plot(runif(9), runif(9), type = "n", xlim = 0:1, ylim = 0:1, las = 1, ylab = "Sensitivity", xlab = "1 - specificity", main = "(a) My (partial) ROC curve") # The ROC curve applies to the test data sens.spec <- function(tab2x2) { # The first row is assumed to match the first column, etc. # The second col is assumed to be a positive test. # The second row is assumed to be positive (diseased; by gold standard). if (!is.matrix(tab2x2) || nrow(tab2x2) != 2 || ncol(tab2x2) != 2) { print("argument 'tab2x2' is not a 2 x 2 matrix") warning("argument 'tab2x2' is not a 2 x 2 matrix") list(sensitivity = NULL, specificity = NULL) } else { list(sensitivity = tab2x2[2, 2] / sum(tab2x2[2, ]), specificity = tab2x2[1, 1] / sum(tab2x2[1, ])) } } # sens.spec for (its in 1:length(cutvec)) { print(its) yhat.mat[, its] <- as.numeric(predict(fit.depress, depress.test, type = "response") > cutvec[its]) mylist <- sens.spec(table(depress.test$depressed, yhat.mat[, its])) if (is.numeric(mylist$sensitivity)) points(1 - mylist$specificity, mylist$sensitivity, col = "blue") } abline(a = 0, b = 1, lty = "dashed", col = "gray50") summary(yhat.mat) # Use \pkg{ROCR} to plot the ROC curve ,,,,,,,,,,,,,,,,,,,,,,,,, pred_test <- predict(fit.depress, depress.test, type = "response") library("ROCR") ROCR_pred_test <- prediction(pred_test, depress.test$depressed) ROCR_perf_test <- performance(ROCR_pred_test, 'tpr', 'fpr') plot(ROCR_perf_test, main = "(b) ROC curve using the ROCR package", print.cutoffs.at = c(0.1, 0.2, 0.3), colorize = TRUE) abline(a = 0, b = 1, lty = "dashed", col = "gray50")