Package 'inTrees' reference manual

Title:	Interpret Tree Ensembles
Description:	For tree ensembles such as random forests, regularized random forests and gradient boosted trees, this package provides functions for: extracting, measuring and pruning rules; selecting a compact rule set; summarizing rules into a learner; calculating frequent variable interactions; formatting rules in latex code. Reference: Interpreting tree ensembles with inTrees (Houtao Deng, 2019, <doi:10.1007/s41060-018-0144-8>).
Authors:	Houtao Deng [aut, cre], Xin Guan [aut], Vadim Khotilovich [aut]
Maintainer:	Houtao Deng <softwaredeng@gmail.com>
License:	GPL (>= 3)
Version:	1.5
Built:	2025-03-28 05:05:27 UTC
Source:	https://github.com/softwaredeng/intrees

apply a simplified tree ensemble learner (STEL) to data

Description

apply STEL to data and get predictions

Usage

applyLearner(learner, X)
applyLearner(learner, X)

Arguments

`learner`	a matrix with rules ordered by priority
`X`	predictor variable matrix

Value

predictions for the data

build a simplified tree ensemble learner (STEL)

Description

Build a simplified tree ensemble learner (STEL). Currently works only for classification problems.

Usage

buildLearner(ruleMetric, X, target, minFreq = 0.01)
buildLearner(ruleMetric, X, target, minFreq = 0.01)

Arguments

`ruleMetric`	a matrix including the conditions, predictions, and and metrics
`X`	predictor variable matrix
`target`	target variable
`minFreq`	minimum frequency of a rule condition in order to be included in STEL.

Value

a matrix including the conditions, prediction, and metrics, ordered by priority.

Author(s)

Houtao Deng

References

Houtao Deng, Interpreting Tree Ensembles with inTrees, technical report, 2014

Examples

data(iris)
library(RRF)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X)
ruleExec <- unique(ruleExec)
ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules
ruleMetric <- pruneRule(ruleMetric,X,target) # prune each rule
#ruleMetric <- selectRuleRRF(ruleMetric,X,target) # rule selection
learner <- buildLearner(ruleMetric,X,target)
pred <- applyLearner(learner,X)
read <- presentRules(learner,colnames(X)) # more readable format

# format the rule and metrics as a table in latex code
library(xtable)
print(xtable(read), include.rownames=FALSE)
print(xtable(ruleMetric[1:2,]), include.rownames=FALSE)

data(iris)
library(RRF)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X)
ruleExec <- unique(ruleExec)
ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules
ruleMetric <- pruneRule(ruleMetric,X,target) # prune each rule
#ruleMetric <- selectRuleRRF(ruleMetric,X,target) # rule selection
learner <- buildLearner(ruleMetric,X,target)
pred <- applyLearner(learner,X)
read <- presentRules(learner,colnames(X)) # more readable format

# format the rule and metrics as a table in latex code
library(xtable)
print(xtable(read), include.rownames=FALSE)
print(xtable(ruleMetric[1:2,]), include.rownames=FALSE)

Simulate data

Description

Simulate data

Usage

dataSimulate(flag = 1, nCol = 20, nRow = 1000)
dataSimulate(flag = 1, nCol = 20, nRow = 1000)

Arguments

`flag`	1 (default): team optimization; 2: non-linear; 3: linear.
`nCol`	the number of columns in the data set. must >= 2.
`nRow`	the number of rows in the data set.

Value

predictor variable matrix and target variable

Examples

res <- dataSimulate(flag=1)
X <- res$X; 
target <- res$target
res <- dataSimulate(flag=1)
X <- res$X; 
target <- res$target

discretize a variable

Description

discretize a variable

Usage

dicretizeVector(v, K = 3)
dicretizeVector(v, K = 3)

Arguments

`v`	vector
`K`	discretize into up to K levels with equal frequency

Value

discretized levels for v

Examples

 data(iris)
 dicretizeVector(iris[,1],3)
data(iris)
 dicretizeVector(iris[,1],3)

Extract rules from a list of trees

Description

Extract rule conditions from a list of trees. Use functions RF2List/GBM2List to transform RF/GBM objects to list of trees.

Usage

extractRules(treeList, X, ntree = 100, maxdepth = 6, random = FALSE, digits = NULL)
extractRules(treeList, X, ntree = 100, maxdepth = 6, random = FALSE, digits = NULL)

Arguments

`treeList`	tree list
`X`	predictor variable matrix
`ntree`	conditions are extracted from the first ntree trees
`maxdepth`	conditions are extracted from the top maxdepth levels from each tree
`random`	the max depth for each tree is an integer randomly chosen between 1 and maxdepth
`digits`	digits for rounding

Value

a set of rule conditions

Examples

    library(RRF)
    data(iris)
    X <- iris[,1:(ncol(iris)-1)]
    target <- iris[,"Species"] 
    rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
    treeList <- RF2List(rf)
    ruleExec <- extractRules(treeList,X,digits=4) # transform to R-executable rules
    ruleExec <- unique(ruleExec)
library(RRF)
    data(iris)
    X <- iris[,1:(ncol(iris)-1)]
    target <- iris[,"Species"] 
    rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
    treeList <- RF2List(rf)
    ruleExec <- extractRules(treeList,X,digits=4) # transform to R-executable rules
    ruleExec <- unique(ruleExec)

Transform gbm object to a list of trees

Description

Transform gbm object to a list of trees that can be used for rule condition extraction

Usage

GBM2List(gbm1,X)
GBM2List(gbm1,X)

Arguments

`gbm1`	gbm object
`X`	predictor variable matrix

Value

a list of trees in an inTrees-required format

Examples

    library(gbm)
    data(iris)
    X <- iris[,1:(ncol(iris)-1)]
    target <- iris[,"Species"] 
    gbmFit <- gbm(Species~ ., data=iris, n.tree = 400,
                    interaction.depth = 10,distribution="multinomial")
    treeList <- GBM2List(gbmFit,X)
    ruleExec = extractRules(treeList,X)
    ruleExec <- unique(ruleExec)
    #ruleExec <- ruleExec[1:min(2000,length(ruleExec)),,drop=FALSE]
    ruleMetric <- getRuleMetric(ruleExec,X,target)
    ruleMetric <- pruneRule(ruleMetric,X,target)
    ruleMetric <- unique(ruleMetric)
    learner <- buildLearner(ruleMetric,X,target)
    pred <- applyLearner(learner,X)
    readableLearner <- presentRules(learner,colnames(X)) # more readable format
    err <- 1-sum(pred==target)/length(pred);
library(gbm)
    data(iris)
    X <- iris[,1:(ncol(iris)-1)]
    target <- iris[,"Species"] 
    gbmFit <- gbm(Species~ ., data=iris, n.tree = 400,
                    interaction.depth = 10,distribution="multinomial")
    treeList <- GBM2List(gbmFit,X)
    ruleExec = extractRules(treeList,X)
    ruleExec <- unique(ruleExec)
    #ruleExec <- ruleExec[1:min(2000,length(ruleExec)),,drop=FALSE]
    ruleMetric <- getRuleMetric(ruleExec,X,target)
    ruleMetric <- pruneRule(ruleMetric,X,target)
    ruleMetric <- unique(ruleMetric)
    learner <- buildLearner(ruleMetric,X,target)
    pred <- applyLearner(learner,X)
    readableLearner <- presentRules(learner,colnames(X)) # more readable format
    err <- 1-sum(pred==target)/length(pred);

calculate frequent variable interactions

Description

calculate frequent variable interactions

Usage

getFreqPattern(ruleMetric, minsup = 0.01, minconf = 0.5, minlen = 1, maxlen = 4)
getFreqPattern(ruleMetric, minsup = 0.01, minconf = 0.5, minlen = 1, maxlen = 4)

Arguments

`ruleMetric`	a matrix including conditions, predictions, and the metrics
`minsup`	minimum support of conditions in a tree ensemble
`minconf`	minimum confidence of the rules
`minlen`	minimum length of the conditions
`maxlen`	max length of the conditions

Value

a matrix including frequent variable interations (in a form of conditions), predictions, length, support, and confidence.

Examples

library(RRF)
library(arules)
data(iris)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X) # transform to R-executable rules
ruleMetric <- getRuleMetric(ruleExec,X,target) 
freqPattern <- getFreqPattern(ruleMetric)
freqPatternMetric <- getRuleMetric(freqPattern,X,target)
library(RRF)
library(arules)
data(iris)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X) # transform to R-executable rules
ruleMetric <- getRuleMetric(ruleExec,X,target) 
freqPattern <- getFreqPattern(ruleMetric)
freqPatternMetric <- getRuleMetric(freqPattern,X,target)

Assign outcomes to a conditions, and measure the rules

Description

Assign outcomes to a conditions, and measure the rules

Usage

getRuleMetric(ruleExec, X, target)
getRuleMetric(ruleExec, X, target)

Arguments

`ruleExec`	a set of rule conditions
`X`	predictor variable matrix
`target`	target variable

Value

a matrix including the condictions, predictions, and metrics

References

Houtao Deng, Interpreting Tree Ensembles with inTrees, technical report, 2014

Examples

library(RRF)
data(iris)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X) # transform to R-executable rules
ruleExec <- unique(ruleExec)
ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules
library(RRF)
data(iris)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X) # transform to R-executable rules
ruleExec <- unique(ruleExec)
ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules

Present a learner using column names instead of X[i,]

Description

Present a learner using column names instead of X[i,]

Usage

presentRules(rules, colN, digits)
presentRules(rules, colN, digits)

Arguments

`rules`	a set of rules
`colN`	a vector including the column names
`digits`	digits for rounding

Value

a matrix including the conditions (with column names), etc.

Examples

 # See function "buildLearner"
# See function "buildLearner"

Prune irrevant variable-value pair from a rule condition

Description

Prune irrevant variable-value pair from a rule condition

Usage

pruneRule(rules, X, target, maxDecay = 0.05, typeDecay = 2)
pruneRule(rules, X, target, maxDecay = 0.05, typeDecay = 2)

Arguments

`rules`	A metrix including the rules and metrics
`X`	predictor variable matrix
`target`	target variable vector
`maxDecay`	threshold of decay
`typeDecay`	1: relative error; 2: error; default :2

Value

A matrix including the rules each being pruned, and metrics

Author(s)

Houtao Deng

References

Houtao Deng, Interpreting Tree Ensembles with inTrees, technical report, 2014

Examples

# see function "buildLearner"
# see function "buildLearner"

Transform a random forest object to a list of trees

Description

Transform a random forest object to a list of trees

Usage

RF2List(rf)
RF2List(rf)

Arguments

`rf`	random forest object

Value

a list of trees

Examples

library(RRF)
data(iris)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X) # transform to R-executable rules
library(RRF)
data(iris)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X) # transform to R-executable rules

select a set of relevant and non-redundant rules

Description

select a set of relevant and non-redundant rules using regularized random forests

Usage

selectRuleRRF(ruleMetric, X, target)
selectRuleRRF(ruleMetric, X, target)

Arguments

`ruleMetric`	a matrix including the rules and metrics
`X`	predictor variable matrix
`target`	response variable

Value

a matrix including a set of relevant and non-redundant rules, and their metrics

Author(s)

Houtao Deng

Examples

 # See function "buildLearner:
# See function "buildLearner:

Transform an xgboost object to a list of trees

Description

Transform an xgboost object to a list of trees

Usage

XGB2List(xgb, X)
XGB2List(xgb, X)

Arguments

`xgb`	xgboost object
`X`	predictor variable matrix

Value

a list of trees in an inTrees-required format

Examples

	library(data.table)
	library(xgboost)
	# test data set 1: iris
	X <- within(iris,rm("Species")); Y <- iris[,"Species"]
	X <- within(iris,rm("Species")); Y <- iris[,"Species"]
	model_mat <- model.matrix(~. -1, data=X)
	xgb <- xgboost(model_mat, label = as.numeric(Y) - 1, nrounds = 20, 
		objective = "multi:softprob", num_class = 3 )
	tree_list <- XGB2List(xgb,model_mat)
library(data.table)
	library(xgboost)
	# test data set 1: iris
	X <- within(iris,rm("Species")); Y <- iris[,"Species"]
	X <- within(iris,rm("Species")); Y <- iris[,"Species"]
	model_mat <- model.matrix(~. -1, data=X)
	xgb <- xgboost(model_mat, label = as.numeric(Y) - 1, nrounds = 20, 
		objective = "multi:softprob", num_class = 3 )
	tree_list <- XGB2List(xgb,model_mat)

Package 'inTrees'

Help Index

apply a simplified tree ensemble learner (STEL) to data

Description

Usage

Arguments

Value

See Also

build a simplified tree ensemble learner (STEL)

Description

Usage

Arguments

Value

Author(s)

References

Examples

Simulate data

Description

Usage

Arguments

Value

Examples

discretize a variable

Description

Usage

Arguments

Value

Examples

Extract rules from a list of trees

Description

Usage

Arguments

Value

Examples

Transform gbm object to a list of trees

Description

Usage

Arguments

Value

See Also

Examples

calculate frequent variable interactions

Description

Usage

Arguments

Value

Examples

Assign outcomes to a conditions, and measure the rules

Description

Usage

Arguments

Value

References

Examples

Present a learner using column names instead of X[i,]

Description

Usage

Arguments

Value

See Also

Examples

Prune irrevant variable-value pair from a rule condition

Description

Usage

Arguments

Value

Author(s)

References

See Also

Examples

Transform a random forest object to a list of trees

Description

Usage

Arguments

Value

See Also

Examples

select a set of relevant and non-redundant rules

Description

Usage