R version 4.3.0 (2023-04-21) -- "Already Tomorrow"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> # Small example to illustrate rpart() on the kyphosis data
> 
> 
> library("rpart")
> set.seed(784)
> kyphosis[sample(nrow(kyphosis), 4), ]
   Kyphosis Age Number Start
8    absent  37      3    16
43   absent 143      9     3
52   absent   9      2    17
66   absent  17      4    10
> 
> fit <- rpart(Kyphosis ~ ., data = kyphosis, method = "class")
> 
> fit
n= 81 

node), split, n, loss, yval, (yprob)
      * denotes terminal node

 1) root 81 17 absent (0.79012346 0.20987654)  
   2) Start>=8.5 62  6 absent (0.90322581 0.09677419)  
     4) Start>=14.5 29  0 absent (1.00000000 0.00000000) *
     5) Start< 14.5 33  6 absent (0.81818182 0.18181818)  
      10) Age< 55 12  0 absent (1.00000000 0.00000000) *
      11) Age>=55 21  6 absent (0.71428571 0.28571429)  
        22) Age>=111 14  2 absent (0.85714286 0.14285714) *
        23) Age< 111 7  3 present (0.42857143 0.57142857) *
   3) Start< 8.5 19  8 present (0.42105263 0.57894737) *
> printcp(fit)

Classification tree:
rpart(formula = Kyphosis ~ ., data = kyphosis, method = "class")

Variables actually used in tree construction:
[1] Age   Start

Root node error: 17/81 = 0.20988

n= 81 

        CP nsplit rel error  xerror    xstd
1 0.176471      0   1.00000 1.00000 0.21559
2 0.019608      1   0.82353 0.88235 0.20565
3 0.010000      4   0.76471 0.94118 0.21078
> summary(fit)
Call:
rpart(formula = Kyphosis ~ ., data = kyphosis, method = "class")
  n= 81 

          CP nsplit rel error    xerror      xstd
1 0.17647059      0 1.0000000 1.0000000 0.2155872
2 0.01960784      1 0.8235294 0.8823529 0.2056488
3 0.01000000      4 0.7647059 0.9411765 0.2107780

Variable importance
 Start    Age Number 
    64     24     12 

Node number 1: 81 observations,    complexity param=0.1764706
  predicted class=absent   expected loss=0.2098765  P(node) =1
    class counts:    64    17
   probabilities: 0.790 0.210 
  left son=2 (62 obs) right son=3 (19 obs)
  Primary splits:
      Start  < 8.5  to the right, improve=6.762330, (0 missing)
      Number < 5.5  to the left,  improve=2.866795, (0 missing)
      Age    < 39.5 to the left,  improve=2.250212, (0 missing)
  Surrogate splits:
      Number < 6.5  to the left,  agree=0.802, adj=0.158, (0 split)

Node number 2: 62 observations,    complexity param=0.01960784
  predicted class=absent   expected loss=0.09677419  P(node) =0.7654321
    class counts:    56     6
   probabilities: 0.903 0.097 
  left son=4 (29 obs) right son=5 (33 obs)
  Primary splits:
      Start  < 14.5 to the right, improve=1.0205280, (0 missing)
      Age    < 55   to the left,  improve=0.6848635, (0 missing)
      Number < 4.5  to the left,  improve=0.2975332, (0 missing)
  Surrogate splits:
      Number < 3.5  to the left,  agree=0.645, adj=0.241, (0 split)
      Age    < 16   to the left,  agree=0.597, adj=0.138, (0 split)

Node number 3: 19 observations
  predicted class=present  expected loss=0.4210526  P(node) =0.2345679
    class counts:     8    11
   probabilities: 0.421 0.579 

Node number 4: 29 observations
  predicted class=absent   expected loss=0  P(node) =0.3580247
    class counts:    29     0
   probabilities: 1.000 0.000 

Node number 5: 33 observations,    complexity param=0.01960784
  predicted class=absent   expected loss=0.1818182  P(node) =0.4074074
    class counts:    27     6
   probabilities: 0.818 0.182 
  left son=10 (12 obs) right son=11 (21 obs)
  Primary splits:
      Age    < 55   to the left,  improve=1.2467530, (0 missing)
      Start  < 12.5 to the right, improve=0.2887701, (0 missing)
      Number < 3.5  to the right, improve=0.1753247, (0 missing)
  Surrogate splits:
      Start  < 9.5  to the left,  agree=0.758, adj=0.333, (0 split)
      Number < 5.5  to the right, agree=0.697, adj=0.167, (0 split)

Node number 10: 12 observations
  predicted class=absent   expected loss=0  P(node) =0.1481481
    class counts:    12     0
   probabilities: 1.000 0.000 

Node number 11: 21 observations,    complexity param=0.01960784
  predicted class=absent   expected loss=0.2857143  P(node) =0.2592593
    class counts:    15     6
   probabilities: 0.714 0.286 
  left son=22 (14 obs) right son=23 (7 obs)
  Primary splits:
      Age    < 111  to the right, improve=1.71428600, (0 missing)
      Start  < 12.5 to the right, improve=0.79365080, (0 missing)
      Number < 3.5  to the right, improve=0.07142857, (0 missing)

Node number 22: 14 observations
  predicted class=absent   expected loss=0.1428571  P(node) =0.1728395
    class counts:    12     2
   probabilities: 0.857 0.143 

Node number 23: 7 observations
  predicted class=present  expected loss=0.4285714  P(node) =0.08641975
    class counts:     3     4
   probabilities: 0.429 0.571 

> names(fit) 
 [1] "frame"               "where"               "call"               
 [4] "terms"               "cptable"             "method"             
 [7] "parms"               "control"             "functions"          
[10] "numresp"             "splits"              "variable.importance"
[13] "y"                   "ordered"            
> 
> 
> 
> if(FALSE) {
+   postscript("kyphosis.eps",
+              horiz = FALSE, onefile = FALSE, print.it = FALSE,
+              width = 9, height = 5)
+ } else {
+   pdf("kyphosis.pdf",
+       onefile = FALSE,
+       width = 9, height = 5)
+ }
> 
> par(mar = c(5, 4, 2, 2) + 0.8, mfrow = c(1, 1), xpd = TRUE)
> plot(fit)
> text(fit)
> dev.off() 
null device 
          1 
> 
> pfit <- predict(fit, kyphosis, type = "class")
> tab <- table(with(kyphosis, Kyphosis), pfit)
> 
> # Error checking: the table must be square
> if(!all(dimnames(tab)[[1]] == dimnames(tab)[[2]]))
+   stop("not all dimnames are the same")
> 
> # Obtain the (resubstitution) misclassification rate
> 1 - sum(diag(tab)) / sum(tab)
[1] 0.1604938
> # Other ways of obtaining the (resubstitution) misclassification rate
> rev(fit$cptable[,"rel error"])[1] *  rev(fit$parms$prior)[1]
        3 
0.1604938 
> tail(fit$cptable[,"rel error"], 1) *  tail(fit$parms$prior, 1)
        2 
0.1604938 
> 
> 
> 
> 
> proc.time()
   user  system elapsed 
  0.190   0.027   0.198