Packages: make sure they are installed
library(tidyverse)
library(rpart)
library(rpart.plot)
library(gridExtra)
Toy example: setup
We run simulations and create the first graph.
sample <- 10^3 # Sample size
x1 <- runif(sample) # x1 variable
x2 <- runif(sample) # x2 variable
y <- x1/3 + sin(10*x2)/4 + x2/9 - 0.12 + rnorm(sample)/10
data <- data.frame(y, x1, x2) # Aggregate dataset
data %>% ggplot() + # Plot
geom_point(aes(x = x1, y = y), color = "blue2", size = 0.3, alpha = 0.4) +
geom_point(aes(x = x2, y = y), color = "red2", size = 0.3, alpha = 0.4) +
stat_smooth(aes(x = x1, y = y), color = "blue2") +
stat_smooth(aes(x = x2, y = y), color = "red2") +
xlab("x1 (blue) or x2 (red)")

Gain analysis (pre-filter)
We compute the important indicators stemming from splits (the gains essentially).
sample <- 10^6 # More points & then start again.
x1 <- runif(sample) # x1 variable
x2 <- runif(sample) # x2 variable
y <- x1/3 + sin(10*x2)/4 + x2/9 - 0.12 + rnorm(sample)/10
data <- data.frame(y, x1, x2)
gain <- function(fit){ # Simple function
out_fit <- fit$frame
var10 <- out_fit$dev[1]
var11 <- out_fit$dev[2]
var12 <- out_fit$dev[3]
return((var10-var11-var12)/out_fit$n[1])
}
fit1 <- rpart(y ~ x1, data = data, maxdepth = 1)
gain(fit1) # Gain realized by first variable
[1] 0.007008279
fit2 <- rpart(y ~ x2, data = data, maxdepth = 1)
gain(fit2) # Gain realized by second variable
[1] 0.007792604
The second variable leads to the highest gain: this is the one that would be chosen.
And plot the corresponding trees.
layout(matrix(c(1,2), 1, 2, byrow = TRUE))
rpart.plot(fit1)
rpart.plot(fit2)

Tree post-filter
We show what happens if you filter the data upfront: the split changes.
q <- 0.15
sample <- 10^4 # Generate again.
x1 <- runif(sample) # x1 variable
x2 <- runif(sample) # x2 variable
y <- x1/3 + sin(10*x2)/4 + x2/9 - 0.12 + rnorm(sample)/10
data <- data.frame(y, x1, x2)
data_filter <- data %>% filter(y < quantile(y,q) | y > quantile(y, 1-q))
fit <- rpart(y ~ x1 + x2, data = data_filter, maxdepth = 1)
rpart.plot(fit)

Graphical explanation
This one is a bit long, we keep a lot of information. We show the separations dictated by the splits via a vertical line.
fit1 <- rpart(y ~ x1, data = data_filter, maxdepth = 1)
fit2 <- rpart(y ~ x2, data = data_filter, maxdepth = 1)
split1 <- fit1$splits[fit1$splits[,3]==min(fit1$splits[,3]),4]
split2 <- fit2$splits[fit2$splits[,3]==min(fit2$splits[,3]),4]
data_filter <- data %>% filter(y < quantile(y,q) | y > quantile(y, 1-q))
g1 <- data_filter %>%
ggplot() +
geom_point(aes(x = x1, y = y), color = "blue", size = 0.3) +
stat_smooth(aes(x = x1, y = y)) +
annotate("segment", x = split1, xend = 1,
y = mean(data_filter$y[data_filter$x1>split1]),
yend = mean(data_filter$y[data_filter$x1>split1]),
colour = "black", linetype = 2, size = 1.5) +
annotate("segment", x = 0, xend = split1,
y = mean(data_filter$y[data_filter$x1<split1]),
yend = mean(data_filter$y[data_filter$x1<split1]),
colour = "black", linetype = 2, size = 1.5) +
annotate("text", x = split1-0.05, y = 0.6 , label = "split", color = "red") +
geom_vline(xintercept = split1, color = "red") +
ggtitle("First variable")
g2 <- data_filter %>%
ggplot() +
geom_point(aes(x = x2, y = y), color = "red", size = 0.3) +
stat_smooth(aes(x = x2, y = y)) +
annotate("segment", x = split2, xend = 1,
y = mean(data_filter$y[data_filter$x2>split2]),
yend = mean(data_filter$y[data_filter$x2>split2]),
colour = "black", linetype = 2, size = 1.5) +
annotate("segment", x = 0, xend = split2,
y = mean(data_filter$y[data_filter$x2<split2]),
yend = mean(data_filter$y[data_filter$x2<split2]),
colour = "black", linetype = 2, size = 1.5) +
annotate("text", x = split2-0.05, y = 0.6 , label = "split", color = "red") +
geom_vline(xintercept = split2, color = "red") +
ggtitle("Second variable")
grid.arrange(g1,g2)

LS0tCnRpdGxlOiAiVGhlIHRveSBleGFtcGxlIgpvdXRwdXQ6IAogICAgaHRtbF9ub3RlYm9vawotLS0KCiMgUGFja2FnZXM6IG1ha2Ugc3VyZSB0aGV5IGFyZSBpbnN0YWxsZWQKCmBgYHtyIHBhY2t9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHJwYXJ0KQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCmxpYnJhcnkoZ3JpZEV4dHJhKQpgYGAKCgojIFRveSBleGFtcGxlOiBzZXR1cAoKV2UgcnVuIHNpbXVsYXRpb25zIGFuZCBjcmVhdGUgdGhlIGZpcnN0IGdyYXBoLgoKYGBge3IgbGluX3F1YWQsIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFfQpzYW1wbGUgPC0gMTBeMyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIFNhbXBsZSBzaXplCngxIDwtIHJ1bmlmKHNhbXBsZSkgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgeDEgdmFyaWFibGUKeDIgPC0gcnVuaWYoc2FtcGxlKSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyB4MiB2YXJpYWJsZQp5IDwtIHgxLzMgKyBzaW4oMTAqeDIpLzQgKyB4Mi85IC0gMC4xMiArIHJub3JtKHNhbXBsZSkvMTAKZGF0YSA8LSBkYXRhLmZyYW1lKHksIHgxLCB4MikgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyBBZ2dyZWdhdGUgZGF0YXNldApkYXRhICU+JSBnZ3Bsb3QoKSArICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIFBsb3QKICAgIGdlb21fcG9pbnQoYWVzKHggPSB4MSwgeSA9IHkpLCBjb2xvciA9ICJibHVlMiIsIHNpemUgPSAwLjMsIGFscGhhID0gMC40KSArCiAgICBnZW9tX3BvaW50KGFlcyh4ID0geDIsIHkgPSB5KSwgY29sb3IgPSAicmVkMiIsIHNpemUgPSAwLjMsIGFscGhhID0gMC40KSArCiAgICBzdGF0X3Ntb290aChhZXMoeCA9IHgxLCB5ID0geSksIGNvbG9yID0gImJsdWUyIikgKwogICAgc3RhdF9zbW9vdGgoYWVzKHggPSB4MiwgeSA9IHkpLCBjb2xvciA9ICJyZWQyIikgKwogICAgeGxhYigieDEgKGJsdWUpIG9yIHgyIChyZWQpIikKYGBgCgoKIyBHYWluIGFuYWx5c2lzIChwcmUtZmlsdGVyKQoKV2UgY29tcHV0ZSB0aGUgaW1wb3J0YW50IGluZGljYXRvcnMgc3RlbW1pbmcgZnJvbSBzcGxpdHMgKHRoZSBnYWlucyBlc3NlbnRpYWxseSkuCgpgYGB7ciBnYWlucywgbWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nID0gRkFMU0V9CnNhbXBsZSA8LSAxMF42ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgTW9yZSBwb2ludHMgJiB0aGVuIHN0YXJ0IGFnYWluLgp4MSA8LSBydW5pZihzYW1wbGUpICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHgxIHZhcmlhYmxlCngyIDwtIHJ1bmlmKHNhbXBsZSkgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgeDIgdmFyaWFibGUKeSA8LSB4MS8zICsgc2luKDEwKngyKS80ICsgeDIvOSAtIDAuMTIgKyBybm9ybShzYW1wbGUpLzEwCmRhdGEgPC0gZGF0YS5mcmFtZSh5LCB4MSwgeDIpIAoKZ2FpbiA8LSBmdW5jdGlvbihmaXQpeyAgICAgIyBTaW1wbGUgZnVuY3Rpb24gdG8gZXh0cmFjdCB0aGUgZ2FpbnMKICAgIG91dF9maXQgPC0gZml0JGZyYW1lCiAgICB2YXIxMCA8LSBvdXRfZml0JGRldlsxXQogICAgdmFyMTEgPC0gb3V0X2ZpdCRkZXZbMl0gIAogICAgdmFyMTIgPC0gb3V0X2ZpdCRkZXZbM10KICAgIHJldHVybigodmFyMTAtdmFyMTEtdmFyMTIpL291dF9maXQkblsxXSkKfQoKZml0MSA8LSBycGFydCh5IH4geDEsIGRhdGEgPSBkYXRhLCBtYXhkZXB0aCA9IDEpCmdhaW4oZml0MSkgIyBHYWluIHJlYWxpemVkIGJ5IGZpcnN0IHZhcmlhYmxlCmZpdDIgPC0gcnBhcnQoeSB+IHgyLCBkYXRhID0gZGF0YSwgbWF4ZGVwdGggPSAxKQpnYWluKGZpdDIpICMgR2FpbiByZWFsaXplZCBieSBzZWNvbmQgdmFyaWFibGUKYGBgCgpUaGUgc2Vjb25kIHZhcmlhYmxlIGxlYWRzIHRvIHRoZSBoaWdoZXN0IGdhaW46IHRoaXMgaXMgdGhlIG9uZSB0aGF0IHdvdWxkIGJlIGNob3Nlbi4KCkFuZCBwbG90IHRoZSBjb3JyZXNwb25kaW5nIHRyZWVzLgpgYGB7ciB0cmVlX3ByZWZpbHRlcn0KbGF5b3V0KG1hdHJpeChjKDEsMiksIDEsIDIsIGJ5cm93ID0gVFJVRSkpCnJwYXJ0LnBsb3QoZml0MSkKcnBhcnQucGxvdChmaXQyKQpgYGAKCiMgVHJlZSBwb3N0LWZpbHRlcgoKV2Ugc2hvdyB3aGF0IGhhcHBlbnMgaWYgeW91IGZpbHRlciB0aGUgZGF0YSB1cGZyb250OiB0aGUgc3BsaXQgY2hhbmdlcy4KCmBgYHtyIHRyZWVfZmlsdGVyfQpxIDwtIDAuMTUKc2FtcGxlIDwtIDEwXjQgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyBHZW5lcmF0ZSBhZ2Fpbi4KeDEgPC0gcnVuaWYoc2FtcGxlKSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyB4MSB2YXJpYWJsZQp4MiA8LSBydW5pZihzYW1wbGUpICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIHgyIHZhcmlhYmxlCnkgPC0geDEvMyArIHNpbigxMCp4MikvNCArIHgyLzkgLSAwLjEyICsgcm5vcm0oc2FtcGxlKS8xMApkYXRhIDwtIGRhdGEuZnJhbWUoeSwgeDEsIHgyKSAKZGF0YV9maWx0ZXIgPC0gZGF0YSAlPiUgZmlsdGVyKHkgPCBxdWFudGlsZSh5LHEpIHwgeSA+IHF1YW50aWxlKHksIDEtcSkpCmZpdCA8LSBycGFydCh5IH4geDEgKyB4MiwgZGF0YSA9IGRhdGFfZmlsdGVyLCBtYXhkZXB0aCA9IDEpICAKcnBhcnQucGxvdChmaXQpCmBgYAoKIyBHcmFwaGljYWwgZXhwbGFuYXRpb24KClRoaXMgb25lIGlzIGEgYml0IGxvbmcsIHdlIGtlZXAgYSBsb3Qgb2YgaW5mb3JtYXRpb24uIFdlIHNob3cgdGhlIHNlcGFyYXRpb25zIGRpY3RhdGVkIGJ5IHRoZSBzcGxpdHMgdmlhIGEgdmVydGljYWwgbGluZS4KCmBgYHtyIGZpbHRlciwgbWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nID0gRkFMU0V9CmZpdDEgPC0gcnBhcnQoeSB+IHgxLCBkYXRhID0gZGF0YV9maWx0ZXIsIG1heGRlcHRoID0gMSkKZml0MiA8LSBycGFydCh5IH4geDIsIGRhdGEgPSBkYXRhX2ZpbHRlciwgbWF4ZGVwdGggPSAxKQpzcGxpdDEgPC0gZml0MSRzcGxpdHNbZml0MSRzcGxpdHNbLDNdPT1taW4oZml0MSRzcGxpdHNbLDNdKSw0XQpzcGxpdDIgPC0gZml0MiRzcGxpdHNbZml0MiRzcGxpdHNbLDNdPT1taW4oZml0MiRzcGxpdHNbLDNdKSw0XQpkYXRhX2ZpbHRlciA8LSBkYXRhICU+JSBmaWx0ZXIoeSA8IHF1YW50aWxlKHkscSkgfCB5ID4gcXVhbnRpbGUoeSwgMS1xKSkKCmcxIDwtIGRhdGFfZmlsdGVyICU+JSAKICAgIGdncGxvdCgpICsKICAgIGdlb21fcG9pbnQoYWVzKHggPSB4MSwgeSA9IHkpLCBjb2xvciA9ICJibHVlIiwgc2l6ZSA9IDAuMykgKwogICAgc3RhdF9zbW9vdGgoYWVzKHggPSB4MSwgeSA9IHkpKSArCiAgICBhbm5vdGF0ZSgic2VnbWVudCIsIHggPSBzcGxpdDEsIHhlbmQgPSAxLCAKICAgICAgICAgICAgIHkgPSBtZWFuKGRhdGFfZmlsdGVyJHlbZGF0YV9maWx0ZXIkeDE+c3BsaXQxXSksIAogICAgICAgICAgICAgeWVuZCA9IG1lYW4oZGF0YV9maWx0ZXIkeVtkYXRhX2ZpbHRlciR4MT5zcGxpdDFdKSwgCiAgICAgICAgICAgICBjb2xvdXIgPSAiYmxhY2siLCBsaW5ldHlwZSA9IDIsIHNpemUgPSAxLjUpICsKICAgIGFubm90YXRlKCJzZWdtZW50IiwgeCA9IDAsIHhlbmQgPSBzcGxpdDEsIAogICAgICAgICAgICAgeSA9IG1lYW4oZGF0YV9maWx0ZXIkeVtkYXRhX2ZpbHRlciR4MTxzcGxpdDFdKSwgCiAgICAgICAgICAgICB5ZW5kID0gbWVhbihkYXRhX2ZpbHRlciR5W2RhdGFfZmlsdGVyJHgxPHNwbGl0MV0pLCAKICAgICAgICAgICAgIGNvbG91ciA9ICJibGFjayIsIGxpbmV0eXBlID0gMiwgc2l6ZSA9IDEuNSkgKwogICAgYW5ub3RhdGUoInRleHQiLCB4ID0gc3BsaXQxLTAuMDUsIHkgPSAwLjYgLCBsYWJlbCA9ICJzcGxpdCIsIGNvbG9yID0gInJlZCIpICsKICAgIGdlb21fdmxpbmUoeGludGVyY2VwdCA9IHNwbGl0MSwgY29sb3IgPSAicmVkIikgKwogICAgZ2d0aXRsZSgiRmlyc3QgdmFyaWFibGUiKQoKZzIgPC0gZGF0YV9maWx0ZXIgJT4lIAogICAgZ2dwbG90KCkgKwogICAgZ2VvbV9wb2ludChhZXMoeCA9IHgyLCB5ID0geSksIGNvbG9yID0gInJlZCIsIHNpemUgPSAwLjMpICsKICAgIHN0YXRfc21vb3RoKGFlcyh4ID0geDIsIHkgPSB5KSkgKwogICAgYW5ub3RhdGUoInNlZ21lbnQiLCB4ID0gc3BsaXQyLCB4ZW5kID0gMSwgCiAgICAgICAgICAgICB5ID0gbWVhbihkYXRhX2ZpbHRlciR5W2RhdGFfZmlsdGVyJHgyPnNwbGl0Ml0pLCAKICAgICAgICAgICAgIHllbmQgPSBtZWFuKGRhdGFfZmlsdGVyJHlbZGF0YV9maWx0ZXIkeDI+c3BsaXQyXSksIAogICAgICAgICAgICAgY29sb3VyID0gImJsYWNrIiwgbGluZXR5cGUgPSAyLCBzaXplID0gMS41KSArCiAgICBhbm5vdGF0ZSgic2VnbWVudCIsIHggPSAwLCB4ZW5kID0gc3BsaXQyLCAKICAgICAgICAgICAgIHkgPSBtZWFuKGRhdGFfZmlsdGVyJHlbZGF0YV9maWx0ZXIkeDI8c3BsaXQyXSksIAogICAgICAgICAgICAgeWVuZCA9IG1lYW4oZGF0YV9maWx0ZXIkeVtkYXRhX2ZpbHRlciR4MjxzcGxpdDJdKSwgCiAgICAgICAgICAgICBjb2xvdXIgPSAiYmxhY2siLCBsaW5ldHlwZSA9IDIsIHNpemUgPSAxLjUpICsKICAgIGFubm90YXRlKCJ0ZXh0IiwgeCA9IHNwbGl0Mi0wLjA1LCB5ID0gMC42ICwgbGFiZWwgPSAic3BsaXQiLCBjb2xvciA9ICJyZWQiKSArCiAgICBnZW9tX3ZsaW5lKHhpbnRlcmNlcHQgPSBzcGxpdDIsIGNvbG9yID0gInJlZCIpICsKICAgIGdndGl0bGUoIlNlY29uZCB2YXJpYWJsZSIpCmdyaWQuYXJyYW5nZShnMSxnMikKYGBg