Title: | Interpret Tree Ensembles |
---|---|
Description: | For tree ensembles such as random forests, regularized random forests and gradient boosted trees, this package provides functions for: extracting, measuring and pruning rules; selecting a compact rule set; summarizing rules into a learner; calculating frequent variable interactions; formatting rules in latex code. Reference: Interpreting tree ensembles with inTrees (Houtao Deng, 2019, <doi:10.1007/s41060-018-0144-8>). |
Authors: | Houtao Deng [aut, cre], Xin Guan [aut], Vadim Khotilovich [aut] |
Maintainer: | Houtao Deng <[email protected]> |
License: | GPL (>= 3) |
Version: | 1.5 |
Built: | 2025-02-26 05:00:36 UTC |
Source: | https://github.com/softwaredeng/intrees |
apply STEL to data and get predictions
applyLearner(learner, X)
applyLearner(learner, X)
learner |
a matrix with rules ordered by priority |
X |
predictor variable matrix |
predictions for the data
Build a simplified tree ensemble learner (STEL). Currently works only for classification problems.
buildLearner(ruleMetric, X, target, minFreq = 0.01)
buildLearner(ruleMetric, X, target, minFreq = 0.01)
ruleMetric |
a matrix including the conditions, predictions, and and metrics |
X |
predictor variable matrix |
target |
target variable |
minFreq |
minimum frequency of a rule condition in order to be included in STEL. |
a matrix including the conditions, prediction, and metrics, ordered by priority.
Houtao Deng
Houtao Deng, Interpreting Tree Ensembles with inTrees, technical report, 2014
data(iris) library(RRF) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X) ruleExec <- unique(ruleExec) ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules ruleMetric <- pruneRule(ruleMetric,X,target) # prune each rule #ruleMetric <- selectRuleRRF(ruleMetric,X,target) # rule selection learner <- buildLearner(ruleMetric,X,target) pred <- applyLearner(learner,X) read <- presentRules(learner,colnames(X)) # more readable format # format the rule and metrics as a table in latex code library(xtable) print(xtable(read), include.rownames=FALSE) print(xtable(ruleMetric[1:2,]), include.rownames=FALSE)
data(iris) library(RRF) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X) ruleExec <- unique(ruleExec) ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules ruleMetric <- pruneRule(ruleMetric,X,target) # prune each rule #ruleMetric <- selectRuleRRF(ruleMetric,X,target) # rule selection learner <- buildLearner(ruleMetric,X,target) pred <- applyLearner(learner,X) read <- presentRules(learner,colnames(X)) # more readable format # format the rule and metrics as a table in latex code library(xtable) print(xtable(read), include.rownames=FALSE) print(xtable(ruleMetric[1:2,]), include.rownames=FALSE)
Simulate data
dataSimulate(flag = 1, nCol = 20, nRow = 1000)
dataSimulate(flag = 1, nCol = 20, nRow = 1000)
flag |
1 (default): team optimization; 2: non-linear; 3: linear. |
nCol |
the number of columns in the data set. must >= 2. |
nRow |
the number of rows in the data set. |
predictor variable matrix and target variable
res <- dataSimulate(flag=1) X <- res$X; target <- res$target
res <- dataSimulate(flag=1) X <- res$X; target <- res$target
discretize a variable
dicretizeVector(v, K = 3)
dicretizeVector(v, K = 3)
v |
vector |
K |
discretize into up to K levels with equal frequency |
discretized levels for v
data(iris) dicretizeVector(iris[,1],3)
data(iris) dicretizeVector(iris[,1],3)
Extract rule conditions from a list of trees. Use functions RF2List/GBM2List to transform RF/GBM objects to list of trees.
extractRules(treeList, X, ntree = 100, maxdepth = 6, random = FALSE, digits = NULL)
extractRules(treeList, X, ntree = 100, maxdepth = 6, random = FALSE, digits = NULL)
treeList |
tree list |
X |
predictor variable matrix |
ntree |
conditions are extracted from the first ntree trees |
maxdepth |
conditions are extracted from the top maxdepth levels from each tree |
random |
the max depth for each tree is an integer randomly chosen between 1 and maxdepth |
digits |
digits for rounding |
a set of rule conditions
library(RRF) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X,digits=4) # transform to R-executable rules ruleExec <- unique(ruleExec)
library(RRF) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X,digits=4) # transform to R-executable rules ruleExec <- unique(ruleExec)
Transform gbm object to a list of trees that can be used for rule condition extraction
GBM2List(gbm1,X)
GBM2List(gbm1,X)
gbm1 |
gbm object |
X |
predictor variable matrix |
a list of trees in an inTrees-required format
library(gbm) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] gbmFit <- gbm(Species~ ., data=iris, n.tree = 400, interaction.depth = 10,distribution="multinomial") treeList <- GBM2List(gbmFit,X) ruleExec = extractRules(treeList,X) ruleExec <- unique(ruleExec) #ruleExec <- ruleExec[1:min(2000,length(ruleExec)),,drop=FALSE] ruleMetric <- getRuleMetric(ruleExec,X,target) ruleMetric <- pruneRule(ruleMetric,X,target) ruleMetric <- unique(ruleMetric) learner <- buildLearner(ruleMetric,X,target) pred <- applyLearner(learner,X) readableLearner <- presentRules(learner,colnames(X)) # more readable format err <- 1-sum(pred==target)/length(pred);
library(gbm) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] gbmFit <- gbm(Species~ ., data=iris, n.tree = 400, interaction.depth = 10,distribution="multinomial") treeList <- GBM2List(gbmFit,X) ruleExec = extractRules(treeList,X) ruleExec <- unique(ruleExec) #ruleExec <- ruleExec[1:min(2000,length(ruleExec)),,drop=FALSE] ruleMetric <- getRuleMetric(ruleExec,X,target) ruleMetric <- pruneRule(ruleMetric,X,target) ruleMetric <- unique(ruleMetric) learner <- buildLearner(ruleMetric,X,target) pred <- applyLearner(learner,X) readableLearner <- presentRules(learner,colnames(X)) # more readable format err <- 1-sum(pred==target)/length(pred);
calculate frequent variable interactions
getFreqPattern(ruleMetric, minsup = 0.01, minconf = 0.5, minlen = 1, maxlen = 4)
getFreqPattern(ruleMetric, minsup = 0.01, minconf = 0.5, minlen = 1, maxlen = 4)
ruleMetric |
a matrix including conditions, predictions, and the metrics |
minsup |
minimum support of conditions in a tree ensemble |
minconf |
minimum confidence of the rules |
minlen |
minimum length of the conditions |
maxlen |
max length of the conditions |
a matrix including frequent variable interations (in a form of conditions), predictions, length, support, and confidence.
library(RRF) library(arules) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X) # transform to R-executable rules ruleMetric <- getRuleMetric(ruleExec,X,target) freqPattern <- getFreqPattern(ruleMetric) freqPatternMetric <- getRuleMetric(freqPattern,X,target)
library(RRF) library(arules) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X) # transform to R-executable rules ruleMetric <- getRuleMetric(ruleExec,X,target) freqPattern <- getFreqPattern(ruleMetric) freqPatternMetric <- getRuleMetric(freqPattern,X,target)
Assign outcomes to a conditions, and measure the rules
getRuleMetric(ruleExec, X, target)
getRuleMetric(ruleExec, X, target)
ruleExec |
a set of rule conditions |
X |
predictor variable matrix |
target |
target variable |
a matrix including the condictions, predictions, and metrics
Houtao Deng, Interpreting Tree Ensembles with inTrees, technical report, 2014
library(RRF) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X) # transform to R-executable rules ruleExec <- unique(ruleExec) ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules
library(RRF) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X) # transform to R-executable rules ruleExec <- unique(ruleExec) ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules
Present a learner using column names instead of X[i,]
presentRules(rules, colN, digits)
presentRules(rules, colN, digits)
rules |
a set of rules |
colN |
a vector including the column names |
digits |
digits for rounding |
a matrix including the conditions (with column names), etc.
# See function "buildLearner"
# See function "buildLearner"
Prune irrevant variable-value pair from a rule condition
pruneRule(rules, X, target, maxDecay = 0.05, typeDecay = 2)
pruneRule(rules, X, target, maxDecay = 0.05, typeDecay = 2)
rules |
A metrix including the rules and metrics |
X |
predictor variable matrix |
target |
target variable vector |
maxDecay |
threshold of decay |
typeDecay |
1: relative error; 2: error; default :2 |
A matrix including the rules each being pruned, and metrics
Houtao Deng
Houtao Deng, Interpreting Tree Ensembles with inTrees, technical report, 2014
# see function "buildLearner"
# see function "buildLearner"
Transform a random forest object to a list of trees
RF2List(rf)
RF2List(rf)
rf |
random forest object |
a list of trees
library(RRF) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X) # transform to R-executable rules
library(RRF) data(iris) X <- iris[,1:(ncol(iris)-1)] target <- iris[,"Species"] rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF treeList <- RF2List(rf) ruleExec <- extractRules(treeList,X) # transform to R-executable rules
select a set of relevant and non-redundant rules using regularized random forests
selectRuleRRF(ruleMetric, X, target)
selectRuleRRF(ruleMetric, X, target)
ruleMetric |
a matrix including the rules and metrics |
X |
predictor variable matrix |
target |
response variable |
a matrix including a set of relevant and non-redundant rules, and their metrics
Houtao Deng
# See function "buildLearner:
# See function "buildLearner:
Transform an xgboost object to a list of trees
XGB2List(xgb, X)
XGB2List(xgb, X)
xgb |
xgboost object |
X |
predictor variable matrix |
a list of trees in an inTrees-required format
library(data.table) library(xgboost) # test data set 1: iris X <- within(iris,rm("Species")); Y <- iris[,"Species"] X <- within(iris,rm("Species")); Y <- iris[,"Species"] model_mat <- model.matrix(~. -1, data=X) xgb <- xgboost(model_mat, label = as.numeric(Y) - 1, nrounds = 20, objective = "multi:softprob", num_class = 3 ) tree_list <- XGB2List(xgb,model_mat)
library(data.table) library(xgboost) # test data set 1: iris X <- within(iris,rm("Species")); Y <- iris[,"Species"] X <- within(iris,rm("Species")); Y <- iris[,"Species"] model_mat <- model.matrix(~. -1, data=X) xgb <- xgboost(model_mat, label = as.numeric(Y) - 1, nrounds = 20, objective = "multi:softprob", num_class = 3 ) tree_list <- XGB2List(xgb,model_mat)