# R example for "Generally altered, inflated, truncated and # deflated regression", for Statistical Science. # January 2024. # Note: # 1. Two R packages are needed below: "VGAM" and "COUNT". # Ideally the latest versions are needed, both on CRAN. # options(digits = 5) # ========================================================= # Get the packages if necessary, and load them. # Note: VGAM version 1.1-9 or higher is needed. if (R.Version()$major < "4") stop("R version 4.0.0 or higher is needed") if (!any(installed.packages()[, 'Package'] == "VGAM")) install.packages("VGAM") library("VGAM") if (packageVersion("VGAM") < "1.1.9") warning("'VGAM' 1.1-9 or higher is needed") # Get COUNT installed if necessary ------------------------ # If not yet installed then install it: if (!any(installed.packages()[, 'Package'] == "COUNT")) install.packages("COUNT") # ========================================================= # GT-Expansion method on the azpro30 data. # Section 8.2.1 of the paper. data("azpro", package = "COUNT") mymax30 <- 30 azpro30 <- subset(azpro, los <= mymax30) summary(azpro30[, -6]) prop1 <- with(azpro30, mean(procedure)) # A 0 & 1 variable # Obtain two side-by-side spike plots par(mfrow = c(1, 2), mar = c(4.4, 4.2, 0.2, 0.4), las = 1) mylwd <- 1 with(azpro30, spikeplot(los, lwd = mylwd + 0.5, las = 1)) put.caption("(a)") with(azpro30, spikeplot(los, lwd = mylwd + 0.5, las = 1, ylab = "")) put.caption("(b)") with(subset(azpro30, procedure == 0), spikeplot(los, lwd = mylwd, new.plot = FALSE, ymux = 1 - prop1, offset.x = 1/4, col = "blue")) with(subset(azpro30, procedure == 1), spikeplot(los, lwd = mylwd, new.plot = FALSE, ymux = prop1, offset.x = 1/2, col = "red")) legend("topright", col = c("black", "blue", "red"), lwd = c(mylwd + 1, mylwd, mylwd), legend = c("Overall", "procedure = 0", "procedure = 1")) ## Biomodality can be modelled by general deflation mymux <- 3 # Multiplier for GTE method myrg <- with(azpro30, range(los)) d.mix <- 3:7 azpro30 <- transform(azpro30, muxlos = mymux * los, # Modified response logmux = log(mymux), all0s = 0 * los) tail(azpro30) fit1.nb.t0 <- vglm(muxlos ~ 1, gaitdnbinomial(i.mlm = 2 * mymux, imunb.d = 6 * mymux, isize.d = 2 * mymux, d.mix = d.mix * mymux, eq.dp = FALSE, truncate = c(seq(mymux) - 1, Trunc(myrg, mymux))), offset = cbind(logmux, all0s, all0s, logmux, all0s, all0s), trace = TRUE, data = azpro30) t(coef(fit1.nb.t0, matrix = TRUE)) summary(fit1.nb.t0, HDEtest = FALSE) head(fit1.nb.t0@offset) ## Spikeplot comparing the model and the data par(mfrow = c(1, 1), mar = c(4.4, 4.2, 0.2, 0.4), las = 1) with(azpro30, spikeplot(muxlos, lwd = mylwd, las = 1, xlab = "Expanded los")) plotdgaitd(fit1.nb.t0, new.plot = FALSE, offset.x = 0.5, all.lwd = mylwd, deflation = TRUE, col.p = "purple") # ========================================================= # GA- to handle (artificial) censoring at 20 days. # GT-Expansion (GTE) method also applied. # Data is called azpro20. Section 8.2.2 of the paper. data("azpro", package = "COUNT") mymax20 <- 20 # Censoring point azpro20 <- azpro azpro20 <- transform(azpro20, los = pmin(los, mymax20)) summary(azpro20[, -6]) # los is right-censored artificially ## Plot the censored data par(mfrow = c(1, 1), mar = c(4.4, 4.2, 0.2, 0.4), las = 1) mylwd <- 1 with(azpro20, spikeplot(los, lwd = mylwd + 0.5, las = 1)) ## Fit a GAITD -NB, with backward elimination already done cmat2 <- diag(3)[, -2] # Delete the effect of 1 coefficient clist <- list("(Intercept)" = diag(3), "admit" = diag(3), "age75" = cmat2, "procedure" = cmat2, "sex" = cmat2) fit1.azpro20 <- vglm(los ~ admit + age75 + procedure + sex, constraints = clist, trace = TRUE, # maxit=11, gaitdnbinomial(a.mlm = 20, zero = "", truncate = c(0, mymax20 + (1:20))), data = azpro20) ## All regression coefficients are significant coef(fit1.azpro20, matrix = TRUE) summary(fit1.azpro20) # Some estimated effects. cfit1.azpro20 <- coef(fit1.azpro20, matrix = TRUE) # age round(100 * expm1(cfit1.azpro20["age75", 1]), 1) # % round(exp(cfit1.azpro20["age75", 3]), 2) # Odds # procedure round(100 * expm1(cfit1.azpro20["procedure", 1]), 1) round(exp(cfit1.azpro20["procedure", 3]), 1) # =========================================================