本文介绍了R:从决策树中提取规则的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我正在使用R编程语言。最近,我读到了一种新的决策树算法,称为强化学习树(RLT),它被认为有可能将&Quot;Better&Quot;决策树匹配到数据集。此处提供了该库的文档:https://cran.r-project.org/web/packages/RLT/RLT.pdf
我尝试使用此库对(著名的)虹膜数据集运行分类决策树:
library(RLT)
data(iris)
fit = RLT(iris[,c(1,2,3,4)], iris$Species, model = "classification", ntrees = 1)
问题:是否可以从此决策树中提取规则(&q;)?
例如,如果您使用购物车决策树模型:
library(rpart)
library(rpart.plot)
fit <-rpart( Species ~. , data = iris)
rpart.plot(fit)
rpart.rules(fit)
Species seto vers virg
setosa [1.00 .00 .00] when Petal.Length < 2.5
versicolor [ .00 .91 .09] when Petal.Length >= 2.5 & Petal.Width < 1.8
virginica [ .00 .02 .98] when Petal.Length >= 2.5 & Petal.Width >= 1.8
使用RLT库可以做到这一点吗?我一直在阅读这个库的文档,但似乎找不到直接提取决策规则的方法。我知道这个库通常被用作随机林(没有决策规则)的替代品--但我阅读了该算法的原始论文,其中指定RLT算法适合单个决策树(通过RLT算法),然后将它们聚合在一起,就像在随机林中一样。因此,在某种程度上,RLT算法能够适合单个决策树-理论上应该具有决策规则。
有人知道如何提取这些规则吗?
谢谢!
引用:
推荐答案
规则以相对难以解释的表格格式存储在fit$FittedTrees[[1]]
中。
我已经为您构建了一个相当长的函数,该函数将把规则提取为数据框,并在需要时将树绘制为gggraph。
RLT_tree <- function(RLT_obj, plot = TRUE)
{
tree <- as.data.frame(t(RLT_obj$FittedTrees[[1]]))
tree <- tree[c(2, 3, 5, 6, 8, 9, grep("Class\d", names(tree)))]
class_cols <- grep("Class\d", names(tree))
names(tree)[class_cols] <-
RLT_obj$ylevels[1 + as.numeric(sub("Class(\d+)", "\1",
names(tree)[class_cols]))]
tree$variable <- RLT_obj$variablenames[tree$SplitVar1]
tree$variable[is.na(tree$variable)] <- "(Leaf node)"
tree$rule <- tree$variable
tree$depth <- numeric(nrow(tree))
tree$rightness <- numeric(nrow(tree))
tree$group <- character(nrow(tree))
walk_tree <- function(node, depth, rightness, node_label = "Start", group = "S")
{
new_row <- tree[which(tree$Node == node),]
new_row$depth <- depth
new_row$rightness <- rightness
left_label <- paste(new_row$variable, new_row$SplitValue, sep = " < ")
right_label <- paste(new_row$variable, new_row$SplitValue, sep = " > ")
new_row$variable <- paste(node_label, "
n = ", new_row$NumObs)
new_row$rule <- node_label
if(is.nan(new_row$SplitValue)) {
n_objs <- round(new_row[,class_cols] * new_row$NumObs)
classify <- paste((names(tree)[class_cols])[n_objs != 0],
n_objs[n_objs != 0],
collapse = "
")
new_row$variable <- paste(new_row$variable, classify, sep = "
")
}
new_row$group <- group
tree[which(tree$Node == node),] <<- new_row
if(!is.nan(new_row$SplitValue)){
walk_tree(new_row$NextLeft, depth + 1, rightness - 2/(depth/2 + 1),
left_label, paste(group, "L"))
walk_tree(new_row$NextRight, depth + 1, rightness + 2/(depth/2 + 1),
right_label, paste(group, "R"))
}
}
walk_tree(0, 0, 0)
tree$depth <- max(tree$depth) - tree$depth
tree$type <- is.nan(tree$NextLeft)
tree$group <- substr(tree$group, 1, nchar(tree$group) - 1)
if(plot)
{
print(ggplot(tree, aes(rightness, depth)) +
geom_segment(aes(x = rightness, xend = rightness,
y = depth, yend = depth - 1, alpha = type)) +
geom_line(aes(group = group)) +
geom_label(aes(label = variable, fill = type), size = 4) +
theme_void() +
scale_x_continuous(expand = c(0, 1)) +
suppressWarnings(scale_alpha_discrete(range = c(1, 0))) +
theme(legend.position = "none"))
}
tree$isLeaf <- is.nan(tree$NextLeft)
tree[c(match(c("Node", "rule", "depth", "isLeaf"), names(tree)), class_cols)]
}
这允许:
df <- RLT_tree(fit, plot = TRUE)
和
df
#> Node rule depth isLeaf setosa versicolor virginica
#> 1 0 Start 6 FALSE 0.3111111 0.34814815 0.3407407
#> 2 1 Sepal.Width < 3.2 5 FALSE 0.1573034 0.51685393 0.3258427
#> 3 2 Sepal.Width > 3.2 5 FALSE 0.6086957 0.02173913 0.3695652
#> 4 3 Sepal.Length < 5.4 4 FALSE 0.7000000 0.30000000 0.0000000
#> 5 4 Sepal.Length > 5.4 4 TRUE 0.0000000 0.57971014 0.4202899
#> 6 5 Petal.Length < 1.3 3 TRUE 1.0000000 0.00000000 0.0000000
#> 7 6 Petal.Length > 1.3 3 FALSE 0.6000000 0.40000000 0.0000000
#> 8 7 Petal.Length < 1.4 2 TRUE 1.0000000 0.00000000 0.0000000
#> 9 8 Petal.Length > 1.4 2 FALSE 0.5000000 0.50000000 0.0000000
#> 10 9 Petal.Length < 3.9 1 FALSE 0.7500000 0.25000000 0.0000000
#> 11 10 Petal.Length > 3.9 1 TRUE 0.0000000 1.00000000 0.0000000
#> 12 11 Sepal.Length < 4.9 0 TRUE 1.0000000 0.00000000 0.0000000
#> 13 12 Sepal.Length > 4.9 0 TRUE 0.0000000 1.00000000 0.0000000
#> 14 13 Petal.Width < 0.2 4 TRUE 1.0000000 0.00000000 0.0000000
#> 15 14 Petal.Width > 0.2 4 FALSE 0.3793103 0.03448276 0.5862069
#> 16 15 Sepal.Length < 5.7 3 TRUE 1.0000000 0.00000000 0.0000000
#> 17 16 Sepal.Length > 5.7 3 FALSE 0.0000000 0.05555556 0.9444444
#> 18 17 Sepal.Width < 3.3 2 TRUE 0.0000000 0.00000000 1.0000000
#> 19 18 Sepal.Width > 3.3 2 FALSE 0.0000000 0.08333333 0.9166667
#> 20 19 Petal.Length < 6.1 1 FALSE 0.0000000 0.11111111 0.8888889
#> 21 20 Petal.Length > 6.1 1 TRUE 0.0000000 0.00000000 1.0000000
#> 22 21 Sepal.Length < 6.3 0 TRUE 0.0000000 0.16666667 0.8333333
#> 23 22 Sepal.Length > 6.3 0 TRUE 0.0000000 0.00000000 1.0000000
要在更一般的情况下显示此功能,我们还可以执行以下操作:
fit2 = RLT(mtcars[,1:3], factor(rownames(mtcars)), model = "classification", ntrees = 1)
df <- RLT_tree(fit2)
这篇关于R:从决策树中提取规则的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!