# R example for "Generally altered, inflated, truncated and
# deflated regression", for Statistical Science.
# January 2024.


# Note:
# 1. Two R packages are needed below: "VGAM" and "VGAMdata".
#    Ideally the latest versions are needed, both on CRAN.


# options(digits = 5)


# =========================================================
# Get the packages if necessary, and load them.

# Note: VGAM version 1.1-9 or higher is needed.


if (R.Version()$major < "4")
  stop("R version 4.0.0 or higher is needed")


if (!any(installed.packages()[, 'Package'] == "VGAM"))
  install.packages("VGAM")


library("VGAM")


if (packageVersion("VGAM") < "1.1.9")
  warning("'VGAM' 1.1-9 or higher is needed")


# Get VGAMdata installed if necessary ---------------------
# If not yet installed then install it:

if (!any(installed.packages()[, 'Package'] == "VGAMdata"))
  install.packages("VGAMdata")


# =========================================================
# GT-Expansion (GTE) method on the sleep duration data.
# Section 8.1 of the paper.


data("xs.nz", package = "VGAMdata")
sxs.nz <- subset(xs.nz, !is.na(sleep))  # Remove NAs
sleep.min <-  3  # Smallest value allowed
sleep.max <- 12  # Largest  value allowed; Remove outliers
sxs.nz <- subset(sxs.nz, sleep.min <= sleep &
                         sleep <= sleep.max)
with(sxs.nz, table(sleep))


# Obtain a spike plot
with(sxs.nz, spikeplot(sleep))


## Search for the optimal multiplier
org1 <- with(sxs.nz, range(sleep))  # Original data range
m.max <- 8  # Try multipliers 1:m.max
aics <- logliks <- numeric(m.max)
allfits <- vector("list", m.max)
names(allfits) <- names(aics) <-
names(logliks) <- as.character(1:m.max)
for (mux in 1:m.max) {
  fit <- vglm(mux * sleep ~ 1,   # trace = TRUE,
              gaitdpoisson(trunc = c(0:(sleep.min * mux - 1),
                                     Trunc(org1, mux)),
                           max.support = sleep.max * mux,
                           i.mlm = 8 * mux),
              offset = rep(log(mux), nrow(sxs.nz)),
              data = sxs.nz)  # Occasional warnings issued
  allfits[[mux]] <- fit
  logliks[mux] <- logLik(fit)
  aics[mux] <- AIC(fit)
}


## Choose the optimal multiplier
sort(sapply(allfits, logLik), decreas = TRUE)  # Best to worse
(mux.best <- as.vector(which.max(logliks)))  # 5 is best


## Plot the log-likelihood versus the multipliers
par(mfrow = c(1, 1), mar = c(4.4, 4.2, 0.5, 0.9), las = 1)
plot(logliks, col = "blue", type = "b", xlab = "Multiplier")
abline(v = 5, h = logliks[5], col = 'orange', lty = "dashed")


## Moment estimator
with(sxs.nz, mean(sleep) / var(sleep))


# Plot the best fit on the raw data
fit1.sleep <- allfits[[mux.best]]
par(mfrow = c(1, 1), mar = c(4.4, 4.2, 0.1, 0.9), las = 1)
with(sxs.nz, spikeplot(mux.best * sleep, col = "red", lwd = 2,
                       xlab = "Expanded response", ylab = "",
                       xlim = c(mux.best * sleep.min,
                                mux.best * sleep.max)))
plotdgaitd(fit1.sleep, new.plot = FALSE, offset.x = 0.5,
           all.lwd = 2, col.p = "blue")


## Useful quantities
logLik(fit1.sleep)
coef(fit1.sleep, matrix = TRUE)
head(fitted(fit1.sleep, type = "pstr.mlm"), 2)
KLD(fit1.sleep)
specials(fit1.sleep)


## Fitted parent mean and confidence interval
head(fitted(fit1.sleep), 1) / mux.best
exp(confint(fit1.sleep)[1, ])


# =========================================================