# Analysis of the 2008 WFFC data
# Concentrates on the length-points association.
# T. W. Yee (2013)

# ======================================================================
# Read the raw data in


library("VGAM")      # Requires Version 0.9-2 or later
library("VGAMdata")  # Requires Version 0.9-2 or later


data(wffc)
summary(wffc)
dim(wffc)


data(wffc.indiv)
data(wffc.teams)
summary(wffc.indiv)
summary(wffc.teams)
dim(wffc.indiv)
dim(wffc.teams)


teams <- wffc.teams
indiv <- wffc.indiv
teams <- transform(teams,
            approxmeanlength = (points/noofcaptures-100) / 2) # In mm
indiv <- transform(indiv,
            approxmeanlength = (points/noofcaptures-100) / 2) # Revise this

# ======================================================================
# Misc. musings

apply(wffc.teams[,c("points","noofcaptures")], 2, sum)

apply(wffc.indiv[,c("points","noofcaptures","individual")], 2, sum)

with(wffc, mean(length))


# ======================================================================
# The competition scoring system 
# Look at adding a quadratic to the points


par(mfrow = c(1,1), mar = c(5.1, 4.1, 0.3, 0.3), las = 1)

yy <- seq(0.0, 0.72, by = 0.001)
plot(yy, wffc.P2star(yy), type = "l", col = "blue", las = 1, lty = "dashed",
     xlab = "Length (m)",
     ylab = "Competition points", lwd = 2)
lines(yy, wffc.P1star(yy), type = "l", col = "red", lwd = 2)

abline(v = (1:4)*0.18, lty = "dotted")
abline(h = (1:9)*wffc.P1star(0.18), lty = "dotted")


# Improved ratios
wffc.P2star(4*0.18) / wffc.P2star(1*0.18)
wffc.P1star(4*0.18) / wffc.P1star(1*0.18)

2 * wffc.P1star(1*0.18) == wffc.P1star(0.41)
wffc.P1star(1*0.18)
wffc.P1star(3*0.18)


# ======================================================================
# Scatterplot of lengths versus points---for each team.
# Do this for the 2 small rivers only.

# standalone specifies whether the two plots are joined together.
standalone <- FALSE

myylab <- "Mean length (mm)"
myxlab <- "Total number of points scored (thousands)"


teams.river <- teams
teams.river$mmmeanlength <- 0 * teams.river$noofcaptures
teams.river$totalPlacings <-
    teams.river$noofcaptures <-
    teams.river$longestfish <-
    teams.river$approxmeanlength <- NULL
# Now overwrite teams.river  with the numbers from "wffc"
wffc.river <- subset(wffc, sector==2 | sector==3)  # Lakes
wffc.river <- subset(wffc, sector==1 | sector==1)  # Whanganui River
wffc.river <- subset(wffc, sector==4 | sector==5)  # Used in wffc.tex
myrowno <- 0
for(places in teams[["country"]]) {
    myrowno <- myrowno + 1
    wffc.river.country <- wffc.river[wffc.river$country == places,]
    mypoints <- wffc.P1(wffc.river.country$length / 1000)
    teams.river[myrowno,"points"] <- sum(mypoints)
    teams.river[myrowno,"noofcaptures"] <- length(mypoints)
    teams.river[myrowno,"mmmeanlength"] <- mean(wffc.river.country$length)
}

teams.river

# Plot the results
par(mfrow = c(1,1), mar = c(5.1, 4.1, 0.3, 0.3), las = 1)
with(teams.river, plot(points/1000, mmmeanlength, type = "n",
                       ylab = ifelse(standalone, myylab, ""),
                       xlab = ifelse(standalone, myxlab, "")))
with(teams.river, text(points/1000, mmmeanlength,
                       labels = country, col = "blue",
                       cex = 0.10 * sqrt(noofcaptures)))

if(standalone) {
    dev.off()
} else {
    text(x = 22000/1000, y = 237, "(a)", font = 1) # "helvitica"
}


# Simple linear regression
fittr <- lm( mmmeanlength ~ points, data = teams.river, weight = noofcaptures, y = TRUE)
summary(fittr)
length(fittr$y)


# ======================================================================
# Look at scatterplot of lengths versus points---for each individual
# This is computed over 2 rivers only.

indiv.river <- indiv
indiv.river$mmmeanlength <- 0 * indiv.river$noofcaptures + NA
indiv.river$placing <-
    indiv.river$noofcaptures <-
    indiv.river$longest <-
    indiv.river$country <-
    indiv.river$approxmeanlength <- NULL
# Now overwrite teams.river  with the numbers from "wffc"
wffc.river <- subset(wffc, sector==1 | sector==1)  # Whanganui River
wffc.river <- subset(wffc, sector==2 | sector==3)  # Both Lakes
wffc.river <- subset(wffc, sector==2 | sector==2)  # Lake 2
wffc.river <- subset(wffc, sector==3 | sector==3)  # Lake 3
wffc.river <- subset(wffc, sector==4 | sector==5)  # Used in wffc.tex
myrowno <- 0
for(persons in indiv[["iname"]]) {
    myrowno <- myrowno + 1
#print("persons")
#print( persons )
    wffc.river.indiv <- wffc.river[wffc.river$iname == persons,]
    mypoints <- wffc.P1(wffc.river.indiv$length / 1000)
    if(length(wffc.river.indiv)) {
        indiv.river[myrowno,"points"] <- sum(mypoints)
        indiv.river[myrowno,"noofcaptures"] <- length(mypoints)
        indiv.river[myrowno,"mmmeanlength"] <- mean(wffc.river.indiv$length)
    } else {
        indiv.river[myrowno,"points"] <- 0 * NA
        indiv.river[myrowno,"noofcaptures"] <- 0 * NA
        indiv.river[myrowno,"mmmeanlength"] <- 0 * NA
    }
}


head(indiv.river)


# Scatterplot of lengths versus points---for each individual
par(mfrow = c(1,1), mar = c(5.1, 4.1, 0.3, 0.3), las = 1)
with(indiv.river,
     plot(points/1000, mmmeanlength,
          ylab = ifelse(standalone, myylab, ""),
          type = "p", col = "blue",
          cex = 0.30 * sqrt(noofcaptures),
          xlab = myxlab))


if(standalone) {

} else {
    text(x = 0.5, y = 291, "(b)", font = 1) # "helvitica"
    mtext(text = myylab, side = 2, line =  0.0, outer = TRUE, las = 0, cex = 1.3)
}


# Simple linear regression
fitir <- lm(mmmeanlength ~ points, data = indiv.river, weight = noofcaptures,
           y = TRUE, x = TRUE)
summary(fitir)
length(fitir$y)

# ======================================================================