# Code to illustrate a ROC curve.
# Thomas Yee
# 202305


library("VGAM")
library("VGAMdata")


# Use nonparametric logistic regression.
# The response is depression amongst male Maori.

depress <- na.omit(xs.nz[, c("depressed", "age", "sex", "ethnicity",
                             "weight", "marital", "educ", "smokenow")])
depress <- subset(depress, sex == "M" & ethnicity == "Maori")
dim(depress)


set.seed(784)
nrow(depress)
train.sample <- sample(nrow(depress), size = 500)
depress.train <- depress[ train.sample, ]
depress.test  <- depress[-train.sample, ]


# This is also known as a generalized additive model.
# Smoothing is used for the age variable, to allow more flexibility.

fit.depress <- vgam(depressed ~ s(age) + s(weight) +
                                marital + educ + smokenow,
                    binomialff, data = depress.train, trace = TRUE)

# FYI
par(mfrow = c(2, 3))
plot(fit.depress, se = TRUE, scale = 5)


#depress.train <- transform(depress.train, fv = fitted(fit.depress))


# Plot a ROC curve manually ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,

# Vector of cutoff points. If the predicted probability
# is greater than this then classify as "success".

cutvec <- seq(0.1, 0.9, length = 09)
cutvec <- seq(0.1, 0.5, by = 0.1)
cutvec <- seq(0.1, 0.5, length =  5)
cutvec <- seq(0.1, 0.5, length = 12)

yhat.mat <- matrix(0, nrow(depress.test), length(cutvec))


#depress.test.0  <- subset(depress.test, depressed == 0)
#depress.test.1  <- subset(depress.test, depressed == 1)
#index.test.0    <-   with(depress.test, which(depressed == 0))
#index.test.1    <-   with(depress.test, which(depressed == 1))


# Set up the plot for the ROC curve
par(mfrow = c(1, 2))
plot(runif(9), runif(9), type = "n", xlim = 0:1, ylim = 0:1,
     las = 1, ylab = "Sensitivity",
     xlab = "1 - specificity",
     main = "(a) My (partial) ROC curve")


# The ROC curve applies to the test data


sens.spec <- function(tab2x2) {
# The first row is assumed to match the first column, etc.
# The second col is assumed to be a positive test.
# The second row is assumed to be positive (diseased; by gold standard).
  if (!is.matrix(tab2x2) || nrow(tab2x2) != 2 || ncol(tab2x2) != 2) {
    print("argument 'tab2x2' is not a 2 x 2 matrix")
    warning("argument 'tab2x2' is not a 2 x 2 matrix")
    list(sensitivity = NULL,
         specificity = NULL)
  } else {
    list(sensitivity = tab2x2[2, 2] / sum(tab2x2[2, ]),
         specificity = tab2x2[1, 1] / sum(tab2x2[1, ]))
  }
}  # sens.spec


for (its in 1:length(cutvec)) {
  print(its)
  yhat.mat[, its] <-
    as.numeric(predict(fit.depress, depress.test, type = "response") >
               cutvec[its])
  mylist <- sens.spec(table(depress.test$depressed,
                            yhat.mat[, its]))
  if (is.numeric(mylist$sensitivity))
    points(1 - mylist$specificity,
           mylist$sensitivity, col = "blue")
}
abline(a = 0, b = 1, lty = "dashed", col = "gray50")


summary(yhat.mat)


# Use \pkg{ROCR} to plot the ROC curve ,,,,,,,,,,,,,,,,,,,,,,,,,


pred_test <- predict(fit.depress, depress.test, type = "response")


library("ROCR")
ROCR_pred_test <- prediction(pred_test, depress.test$depressed)
ROCR_perf_test <- performance(ROCR_pred_test, 'tpr', 'fpr')
plot(ROCR_perf_test,
     main = "(b) ROC curve using the ROCR package",
     print.cutoffs.at = c(0.1, 0.2, 0.3),
     colorize = TRUE)
abline(a = 0, b = 1, lty = "dashed", col = "gray50")