# R example for "Generally altered, inflated, truncated and # deflated regression", for Statistical Science. # January 2024. # Note: # 1. Two R packages are needed below: "VGAM" and "VGAMdata". # Ideally the latest versions are needed, both on CRAN. # options(digits = 5) # ========================================================= # Get the packages if necessary, and load them. # Note: VGAM version 1.1-9 or higher is needed. if (R.Version()$major < "4") stop("R version 4.0.0 or higher is needed") if (!any(installed.packages()[, 'Package'] == "VGAM")) install.packages("VGAM") library("VGAM") if (packageVersion("VGAM") < "1.1.9") warning("'VGAM' 1.1-9 or higher is needed") # Get VGAMdata installed if necessary --------------------- # If not yet installed then install it: if (!any(installed.packages()[, 'Package'] == "VGAMdata")) install.packages("VGAMdata") # ========================================================= # GT-Expansion (GTE) method on the sleep duration data. # Section 8.1 of the paper. data("xs.nz", package = "VGAMdata") sxs.nz <- subset(xs.nz, !is.na(sleep)) # Remove NAs sleep.min <- 3 # Smallest value allowed sleep.max <- 12 # Largest value allowed; Remove outliers sxs.nz <- subset(sxs.nz, sleep.min <= sleep & sleep <= sleep.max) with(sxs.nz, table(sleep)) # Obtain a spike plot with(sxs.nz, spikeplot(sleep)) ## Search for the optimal multiplier org1 <- with(sxs.nz, range(sleep)) # Original data range m.max <- 8 # Try multipliers 1:m.max aics <- logliks <- numeric(m.max) allfits <- vector("list", m.max) names(allfits) <- names(aics) <- names(logliks) <- as.character(1:m.max) for (mux in 1:m.max) { fit <- vglm(mux * sleep ~ 1, # trace = TRUE, gaitdpoisson(trunc = c(0:(sleep.min * mux - 1), Trunc(org1, mux)), max.support = sleep.max * mux, i.mlm = 8 * mux), offset = rep(log(mux), nrow(sxs.nz)), data = sxs.nz) # Occasional warnings issued allfits[[mux]] <- fit logliks[mux] <- logLik(fit) aics[mux] <- AIC(fit) } ## Choose the optimal multiplier sort(sapply(allfits, logLik), decreas = TRUE) # Best to worse (mux.best <- as.vector(which.max(logliks))) # 5 is best ## Plot the log-likelihood versus the multipliers par(mfrow = c(1, 1), mar = c(4.4, 4.2, 0.5, 0.9), las = 1) plot(logliks, col = "blue", type = "b", xlab = "Multiplier") abline(v = 5, h = logliks[5], col = 'orange', lty = "dashed") ## Moment estimator with(sxs.nz, mean(sleep) / var(sleep)) # Plot the best fit on the raw data fit1.sleep <- allfits[[mux.best]] par(mfrow = c(1, 1), mar = c(4.4, 4.2, 0.1, 0.9), las = 1) with(sxs.nz, spikeplot(mux.best * sleep, col = "red", lwd = 2, xlab = "Expanded response", ylab = "", xlim = c(mux.best * sleep.min, mux.best * sleep.max))) plotdgaitd(fit1.sleep, new.plot = FALSE, offset.x = 0.5, all.lwd = 2, col.p = "blue") ## Useful quantities logLik(fit1.sleep) coef(fit1.sleep, matrix = TRUE) head(fitted(fit1.sleep, type = "pstr.mlm"), 2) KLD(fit1.sleep) specials(fit1.sleep) ## Fitted parent mean and confidence interval head(fitted(fit1.sleep), 1) / mux.best exp(confint(fit1.sleep)[1, ]) # =========================================================