# Example to illustrate bagging. # T. Yee, 202305. data(Boston, package = "MASS") library("rpart") # Set things up n <- nrow(Boston) Ngrid <- 500 range.crim <- with(Boston, range(crim)) # min and max Newdata <- data.frame(crim = exp(seq(log(range.crim[1]), log(range.crim[2]), length = Ngrid))) # Scatterplot plot(medv ~ log(crim), Boston, las = 1, col = "blue", main = "Bagging regression tree example") # Now do some bagging set.seed(1) pvec1 <- pvec2 <- rep(0, Ngrid) B <- 100 # Number of bootstrap samples taken for (b in 1:B) { bss <- sample(1:n, n , replace = TRUE) rp <- rpart(medv ~ log(crim), data = Boston[bss, ], control = rpart.control(maxdepth = 3)) # A simple tree Pred <- predict(rp, Newdata) pvec1 <- pvec1 + Pred pvec2 <- pvec2 + Pred^2 # Add the bagged tree to the plot lines(Pred ~ log(crim), Newdata, col = b, lwd = 0.5) } pvec1 <- pvec1 / B SD <- sqrt((pvec2/B - pvec1^2) * B / (B-1)) # Plot the 'average' of the B trees using a big black line, and # add +- 1 SD too to it. lines(pvec1 ~ log(crim), Newdata, col = 1, lwd = 10) lines(pvec1 + SD ~ log(crim), Newdata, col = 1, lwd = 5) lines(pvec1 - SD ~ log(crim), Newdata, col = 1, lwd = 5)