# R example for "Generally altered, inflated, truncated and
# deflated regression", for Statistical Science.
# January 2024.


# Note:
# 1. Two R packages are needed below: "VGAM" and "COUNT".
#    Ideally the latest versions are needed, both on CRAN.


# options(digits = 5)


# =========================================================
# Get the packages if necessary, and load them.

# Note: VGAM version 1.1-9 or higher is needed.


if (R.Version()$major < "4")
  stop("R version 4.0.0 or higher is needed")


if (!any(installed.packages()[, 'Package'] == "VGAM"))
  install.packages("VGAM")


library("VGAM")


if (packageVersion("VGAM") < "1.1.9")
  warning("'VGAM' 1.1-9 or higher is needed")


# Get COUNT installed if necessary ------------------------
# If not yet installed then install it:

if (!any(installed.packages()[, 'Package'] == "COUNT"))
  install.packages("COUNT")


# =========================================================
# GT-Expansion method on the azpro30 data.
# Section 8.2.1 of the paper.


data("azpro", package = "COUNT")
mymax30 <- 30
azpro30 <- subset(azpro, los <= mymax30)
summary(azpro30[, -6])
prop1 <- with(azpro30, mean(procedure))  # A 0 & 1 variable


# Obtain two side-by-side spike plots
par(mfrow = c(1, 2), mar = c(4.4, 4.2, 0.2, 0.4), las = 1)
mylwd <- 1
with(azpro30, spikeplot(los, lwd = mylwd + 0.5, las = 1))
put.caption("(a)")

with(azpro30, spikeplot(los, lwd = mylwd + 0.5, las = 1,
                        ylab = ""))
put.caption("(b)")
with(subset(azpro30, procedure == 0),
     spikeplot(los, lwd = mylwd, new.plot = FALSE, 
               ymux = 1 - prop1, offset.x = 1/4,
               col = "blue"))
with(subset(azpro30, procedure == 1),
     spikeplot(los, lwd = mylwd, new.plot = FALSE, 
               ymux = prop1, offset.x = 1/2, col = "red"))
legend("topright", col = c("black", "blue", "red"),
       lwd = c(mylwd + 1, mylwd, mylwd),
       legend = c("Overall",
                  "procedure = 0", "procedure = 1"))


## Biomodality can be modelled by general deflation
mymux <- 3  # Multiplier for GTE method
myrg <- with(azpro30, range(los))
d.mix <- 3:7
azpro30 <-
  transform(azpro30,
            muxlos = mymux * los,  # Modified response
            logmux = log(mymux),
            all0s  = 0 * los)
tail(azpro30)
fit1.nb.t0 <-
  vglm(muxlos ~ 1,
       gaitdnbinomial(i.mlm = 2 * mymux,
           imunb.d = 6 * mymux, isize.d =  2 * mymux,
           d.mix = d.mix * mymux, eq.dp = FALSE,
           truncate = c(seq(mymux) - 1, Trunc(myrg, mymux))),
       offset = cbind(logmux, all0s, all0s,
                      logmux, all0s, all0s),
       trace = TRUE, data = azpro30)


t(coef(fit1.nb.t0, matrix = TRUE))
summary(fit1.nb.t0, HDEtest = FALSE)
head(fit1.nb.t0@offset) 


## Spikeplot comparing the model and the data

 par(mfrow = c(1, 1), mar = c(4.4, 4.2, 0.2, 0.4), las = 1)

with(azpro30, spikeplot(muxlos, lwd = mylwd, las = 1,
                        xlab = "Expanded los"))
plotdgaitd(fit1.nb.t0, new.plot = FALSE, offset.x = 0.5,
           all.lwd = mylwd, deflation = TRUE,
           col.p = "purple")


# =========================================================
# GA- to handle (artificial) censoring at 20 days.
# GT-Expansion (GTE) method also applied.
# Data is called azpro20. Section 8.2.2 of the paper.


data("azpro", package = "COUNT")
mymax20 <- 20  # Censoring point
azpro20 <- azpro
azpro20 <- transform(azpro20, los = pmin(los, mymax20))
summary(azpro20[, -6])  # los is right-censored artificially


## Plot the censored data

par(mfrow = c(1, 1), mar = c(4.4, 4.2, 0.2, 0.4), las = 1)
mylwd <- 1
with(azpro20, spikeplot(los, lwd = mylwd + 0.5, las = 1))


## Fit a GAITD -NB, with backward elimination already done
cmat2 <- diag(3)[, -2]  # Delete the effect of 1 coefficient 
clist <- list("(Intercept)" = diag(3),
              "admit"       = diag(3),
              "age75"       = cmat2,
              "procedure"   = cmat2,
              "sex"         = cmat2)
fit1.azpro20 <-
  vglm(los ~ admit + age75 + procedure + sex,
       constraints = clist,   trace = TRUE,  #  maxit=11,
       gaitdnbinomial(a.mlm = 20, zero = "",
                      truncate = c(0, mymax20 + (1:20))),
       data = azpro20)


## All regression coefficients are significant
coef(fit1.azpro20, matrix = TRUE)
summary(fit1.azpro20)


# Some estimated effects.
cfit1.azpro20 <- coef(fit1.azpro20, matrix = TRUE)

# age
round(100 * expm1(cfit1.azpro20["age75", 1]), 1)  # %
round(exp(cfit1.azpro20["age75", 3]), 2)  #  Odds

# procedure
round(100 * expm1(cfit1.azpro20["procedure", 1]), 1)
round(exp(cfit1.azpro20["procedure", 3]), 1)


# =========================================================