#################################################################### cross.val.lm <- function (f, nfold = 10, nrep = 10, ...){ # f: an lm object returned by lm # nfold: the number of folds (e.g. 5 for 5-fold CV) # nrep: the number of random splits # returns the CV estimate X <- model.matrix(f$terms, model.frame(f)) y = fitted.values(f) + residuals(f) n <- dim(X)[1] CV <- numeric(nrep) pred.error <- numeric(nfold) m <- n%/%nfold for (k in 1:nrep) { rand.order <- order(runif(n)) yr <- y[rand.order] Xr <- X[rand.order, ] sample <- 1:m for (i in 1:nfold) { use.mat <- as.matrix(Xr[-sample,]) test.mat <- as.matrix(Xr[sample,]) y.use = yr[-sample] new.data <- data.frame(test.mat) fit <- lm(y.use ~ -1+use.mat) my.predict = test.mat%*%coefficients(fit) pred.error[i] <- sum((yr[sample] - my.predict)^2)/m sample <- if(i==nfold) (max(sample)+1):n else sample + m } CV[k] <- mean(pred.error) } mean(CV) } ###################################################################