library(C50)
library(gmodels)
## Step 1: Collect the Data

# Shamelessly swiped the .csv from UC Irvine
# But it's okay, because everyone uses the German credit agency data set
# http://archive.ics.uci.edu/ml/

str(credit)
## 'data.frame':    1000 obs. of  17 variables:
##  $checking_balance : Factor w/ 4 levels "< 0 DM","> 200 DM",..: 1 3 4 1 1 4 4 3 4 3 ... ##$ months_loan_duration: int  6 48 12 42 24 36 24 36 12 30 ...
##  $credit_history : Factor w/ 5 levels "critical","good",..: 1 2 1 2 4 2 2 2 2 1 ... ##$ purpose             : Factor w/ 6 levels "business","car",..: 5 5 4 5 2 4 5 2 5 2 ...
##  $amount : int 1169 5951 2096 7882 4870 9055 2835 6948 3059 5234 ... ##$ savings_balance     : Factor w/ 5 levels "< 100 DM","> 1000 DM",..: 5 1 1 1 1 5 4 1 2 1 ...
##  $employment_duration : Factor w/ 5 levels "< 1 year","> 7 years",..: 2 3 4 4 3 3 2 3 4 5 ... ##$ percent_of_income   : int  4 2 2 2 3 2 3 2 2 4 ...
##  $years_at_residence : int 4 2 3 4 4 4 4 2 4 2 ... ##$ age                 : int  67 22 49 45 53 35 53 35 61 28 ...
##  $other_credit : Factor w/ 3 levels "bank","none",..: 2 2 2 2 2 2 2 2 2 2 ... ##$ housing             : Factor w/ 3 levels "other","own",..: 2 2 2 1 1 1 2 3 2 2 ...
##  $existing_loans_count: int 2 1 1 1 2 1 1 1 1 2 ... ##$ job                 : Factor w/ 4 levels "management","skilled",..: 2 2 4 2 2 4 2 1 4 1 ...
##  $dependents : int 1 1 2 2 2 2 1 1 1 1 ... ##$ phone               : Factor w/ 2 levels "no","yes": 2 1 1 1 1 2 1 2 1 1 ...
##  $default : Factor w/ 2 levels "no","yes": 1 2 1 1 2 1 1 1 1 2 ... # You can see that the factors are a mix of numeric and categorical features. ## Step 2: Explore and Prepare the Data # Take a closer look at checking and savings account balances - categorical # Note: all values are in marks table(credit$checking_balance)
##
##     < 0 DM   > 200 DM 1 - 200 DM    unknown
##        274         63        269        394
table(credit$savings_balance) ## ## < 100 DM > 1000 DM 100 - 500 DM 500 - 1000 DM unknown ## 603 48 103 63 183 # Loan features - numeric summary(credit$months_loan_duration)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
##     4.0    12.0    18.0    20.9    24.0    72.0
summary(credit$amount) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 250 1366 2320 3271 3972 18420 # Our predictor - whether the individual defaulted on a loan table(credit$default)
##
##  no yes
## 700 300

#### Data Preparation - Creating Random Training and Test Data Sets

set.seed(12345)
# Since values are not sorted in random order, we have to randomly select.
# runif() selects from a uniform distribution, and order() orders it.
credit_rand <- credit[order(runif(1000)), ]

# What is the order() function?
order(c(0.5, 0.25, 0.75, 0.1))
## [1] 4 2 1 3
# Check to make sure you have the same data frame, just sorted differently.
summary(credit$amount) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 250 1366 2320 3271 3972 18420 summary(credit_rand$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
##     250    1366    2320    3271    3972   18420
# Check the first few values in each data frame.
head(credit$amount) ## [1] 1169 5951 2096 7882 4870 9055 head(credit_rand$amount)
## [1] 1199 2576 1103 4020 1501 1568
# 90% for training, and 10% for testing.
credit_train <- credit_rand[1:900, ]
credit_test  <- credit_rand[901:1000, ]

# Check to make sure you have ~30% of defaulted loans in each of the data sets.
prop.table(table(credit_train$default)) ## ## no yes ## 0.7022222 0.2977778 prop.table(table(credit_test$default))
##
##   no  yes
## 0.68 0.32

## Step 3: Train a Model on the Data

# 17th column is the default class variable
credit_model <- C5.0(credit_train[-17], credit_train$default) # Learn more about the tree. credit_model ## ## Call: ## C5.0.default(x = credit_train[-17], y = credit_train$default)
##
## Classification Tree
## Number of samples: 900
## Number of predictors: 16
##
## Tree size: 67
##
## Non-standard options: attempt to group attributes
summary(credit_model)
##
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default) ## ## ## C5.0 [Release 2.07 GPL Edition] Thu May 25 16:04:34 2017 ## ------------------------------- ## ## Class specified by attribute outcome' ## ## Read 900 cases (17 attributes) from undefined.data ## ## Decision tree: ## ## checking_balance = unknown: no (358/44) ## checking_balance in {< 0 DM,> 200 DM,1 - 200 DM}: ## :...credit_history in {perfect,very good}: ## :...dependents > 1: yes (10/1) ## : dependents <= 1: ## : :...savings_balance = < 100 DM: yes (39/11) ## : savings_balance in {> 1000 DM,500 - 1000 DM,unknown}: no (8/1) ## : savings_balance = 100 - 500 DM: ## : :...checking_balance = < 0 DM: no (1) ## : checking_balance in {> 200 DM,1 - 200 DM}: yes (5/1) ## credit_history in {critical,good,poor}: ## :...months_loan_duration <= 11: no (87/14) ## months_loan_duration > 11: ## :...savings_balance = > 1000 DM: no (13) ## savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM,unknown}: ## :...checking_balance = > 200 DM: ## :...dependents > 1: yes (3) ## : dependents <= 1: ## : :...credit_history in {good,poor}: no (23/3) ## : credit_history = critical: ## : :...amount <= 2337: yes (3) ## : amount > 2337: no (6) ## checking_balance = 1 - 200 DM: ## :...savings_balance = unknown: no (34/6) ## : savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM}: ## : :...months_loan_duration > 45: yes (11/1) ## : months_loan_duration <= 45: ## : :...other_credit = store: ## : :...age <= 35: yes (4) ## : : age > 35: no (2) ## : other_credit = bank: ## : :...years_at_residence <= 1: no (3) ## : : years_at_residence > 1: ## : : :...existing_loans_count <= 1: yes (5) ## : : existing_loans_count > 1: ## : : :...percent_of_income <= 2: no (4/1) ## : : percent_of_income > 2: yes (3) ## : other_credit = none: ## : :...job = unemployed: no (1) ## : job = management: ## : :...amount <= 7511: no (10/3) ## : : amount > 7511: yes (7) ## : job = unskilled: [S1] ## : job = skilled: ## : :...dependents <= 1: no (55/15) ## : dependents > 1: ## : :...age <= 34: no (3) ## : age > 34: yes (4) ## checking_balance = < 0 DM: ## :...job = management: no (26/6) ## job = unemployed: yes (4/1) ## job = unskilled: ## :...employment_duration in {4 - 7 years, ## : : unemployed}: no (4) ## : employment_duration = < 1 year: ## : :...other_credit = bank: no (1) ## : : other_credit in {none,store}: yes (11/2) ## : employment_duration = > 7 years: ## : :...other_credit in {bank,none}: no (5/1) ## : : other_credit = store: yes (2) ## : employment_duration = 1 - 4 years: ## : :...age <= 39: no (14/3) ## : age > 39: ## : :...credit_history in {critical,good}: yes (3) ## : credit_history = poor: no (1) ## job = skilled: ## :...credit_history = poor: ## :...savings_balance in {< 100 DM,100 - 500 DM, ## : : 500 - 1000 DM}: yes (8) ## : savings_balance = unknown: no (1) ## credit_history = critical: ## :...other_credit = store: no (0) ## : other_credit = bank: yes (4) ## : other_credit = none: ## : :...savings_balance in {100 - 500 DM, ## : : unknown}: no (1) ## : savings_balance = 500 - 1000 DM: yes (1) ## : savings_balance = < 100 DM: ## : :...months_loan_duration <= 13: ## : :...percent_of_income <= 3: yes (3) ## : : percent_of_income > 3: no (3/1) ## : months_loan_duration > 13: ## : :...amount <= 5293: no (10/1) ## : amount > 5293: yes (2) ## credit_history = good: ## :...existing_loans_count > 1: yes (5) ## existing_loans_count <= 1: ## :...other_credit = store: no (2) ## other_credit = bank: ## :...percent_of_income <= 2: yes (2) ## : percent_of_income > 2: no (6/1) ## other_credit = none: [S2] ## ## SubTree [S1] ## ## employment_duration in {< 1 year,1 - 4 years}: yes (11/3) ## employment_duration in {> 7 years,4 - 7 years,unemployed}: no (8) ## ## SubTree [S2] ## ## savings_balance = 100 - 500 DM: yes (3) ## savings_balance = 500 - 1000 DM: no (1) ## savings_balance = unknown: ## :...phone = no: yes (9/1) ## : phone = yes: no (3/1) ## savings_balance = < 100 DM: ## :...percent_of_income <= 1: no (4) ## percent_of_income > 1: ## :...phone = yes: yes (10/1) ## phone = no: ## :...purpose in {business,car0,education,renovations}: yes (3) ## purpose = car: ## :...percent_of_income <= 3: no (2) ## : percent_of_income > 3: yes (6/1) ## purpose = furniture/appliances: ## :...years_at_residence <= 1: no (4) ## years_at_residence > 1: ## :...housing = other: no (1) ## housing = rent: yes (2) ## housing = own: ## :...amount <= 1778: no (3) ## amount > 1778: ## :...years_at_residence <= 3: yes (6) ## years_at_residence > 3: no (3/1) ## ## ## Evaluation on training data (900 cases): ## ## Decision Tree ## ---------------- ## Size Errors ## ## 66 125(13.9%) << ## ## ## (a) (b) <-classified as ## ---- ---- ## 609 23 (a): class no ## 102 166 (b): class yes ## ## ## Attribute usage: ## ## 100.00% checking_balance ## 60.22% credit_history ## 53.22% months_loan_duration ## 49.44% savings_balance ## 30.89% job ## 25.89% other_credit ## 17.78% dependents ## 9.67% existing_loans_count ## 7.22% percent_of_income ## 6.67% employment_duration ## 5.78% phone ## 5.56% amount ## 3.78% years_at_residence ## 3.44% age ## 3.33% purpose ## 1.67% housing ## ## ## Time: 0.1 secs ## Step 4: Evaluate Model Performance # Apply decision tree to test data set. credit_pred <- predict(credit_model, credit_test) # Compare predicted class values to actuals # prop.c = column percentages; prop.r = row percentages CrossTable(credit_test$default, credit_pred,
prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
dnn = c('actual default', 'predicted default'))
##
##
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table:  100
##
##
##                | predicted default
## actual default |        no |       yes | Row Total |
## ---------------|-----------|-----------|-----------|
##             no |        57 |        11 |        68 |
##                |     0.570 |     0.110 |           |
## ---------------|-----------|-----------|-----------|
##            yes |        16 |        16 |        32 |
##                |     0.160 |     0.160 |           |
## ---------------|-----------|-----------|-----------|
##   Column Total |        73 |        27 |       100 |
## ---------------|-----------|-----------|-----------|
##
## 

## Step 5: Improve Model Performance

# ADAPTIVE BOOSTING = process in which many decision trees are built, and trees vote on the
# best class for each sample. Combine a number of weak performers, and you have a team that
# is stronger than any one of the learners alone.

# If you're interested in literature: The idea of boosting is based largely upon research by Rob Schapire
# and Yoav Freund. For more information, try searching the web for their publications or their recent
# textbook: "Boosting: Foundations and Algorithms Understanding Rule Learners" (The MIT Press, 2012).

credit_boost10 <- C5.0(credit_train[-17], credit_train$default, trials = 10) credit_boost10 ## ## Call: ## C5.0.default(x = credit_train[-17], y = credit_train$default, trials = 10)
##
## Classification Tree
## Number of samples: 900
## Number of predictors: 16
##
## Number of boosting iterations: 10
## Average tree size: 56
##
## Non-standard options: attempt to group attributes
summary(credit_boost10)
##
## Call:
## C5.0.default(x = credit_train[-17], y = credit_train$default, trials = 10) ## ## ## C5.0 [Release 2.07 GPL Edition] Thu May 25 16:04:35 2017 ## ------------------------------- ## ## Class specified by attribute outcome' ## ## Read 900 cases (17 attributes) from undefined.data ## ## ----- Trial 0: ----- ## ## Decision tree: ## ## checking_balance = unknown: no (358/44) ## checking_balance in {< 0 DM,> 200 DM,1 - 200 DM}: ## :...credit_history in {perfect,very good}: ## :...dependents > 1: yes (10/1) ## : dependents <= 1: ## : :...savings_balance = < 100 DM: yes (39/11) ## : savings_balance in {> 1000 DM,500 - 1000 DM,unknown}: no (8/1) ## : savings_balance = 100 - 500 DM: ## : :...checking_balance = < 0 DM: no (1) ## : checking_balance in {> 200 DM,1 - 200 DM}: yes (5/1) ## credit_history in {critical,good,poor}: ## :...months_loan_duration <= 11: no (87/14) ## months_loan_duration > 11: ## :...savings_balance = > 1000 DM: no (13) ## savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM,unknown}: ## :...checking_balance = > 200 DM: ## :...dependents > 1: yes (3) ## : dependents <= 1: ## : :...credit_history in {good,poor}: no (23/3) ## : credit_history = critical: ## : :...amount <= 2337: yes (3) ## : amount > 2337: no (6) ## checking_balance = 1 - 200 DM: ## :...savings_balance = unknown: no (34/6) ## : savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM}: ## : :...months_loan_duration > 45: yes (11/1) ## : months_loan_duration <= 45: ## : :...other_credit = store: ## : :...age <= 35: yes (4) ## : : age > 35: no (2) ## : other_credit = bank: ## : :...years_at_residence <= 1: no (3) ## : : years_at_residence > 1: ## : : :...existing_loans_count <= 1: yes (5) ## : : existing_loans_count > 1: ## : : :...percent_of_income <= 2: no (4/1) ## : : percent_of_income > 2: yes (3) ## : other_credit = none: ## : :...job = unemployed: no (1) ## : job = management: ## : :...amount <= 7511: no (10/3) ## : : amount > 7511: yes (7) ## : job = unskilled: [S1] ## : job = skilled: ## : :...dependents <= 1: no (55/15) ## : dependents > 1: ## : :...age <= 34: no (3) ## : age > 34: yes (4) ## checking_balance = < 0 DM: ## :...job = management: no (26/6) ## job = unemployed: yes (4/1) ## job = unskilled: ## :...employment_duration in {4 - 7 years, ## : : unemployed}: no (4) ## : employment_duration = < 1 year: ## : :...other_credit = bank: no (1) ## : : other_credit in {none,store}: yes (11/2) ## : employment_duration = > 7 years: ## : :...other_credit in {bank,none}: no (5/1) ## : : other_credit = store: yes (2) ## : employment_duration = 1 - 4 years: ## : :...age <= 39: no (14/3) ## : age > 39: ## : :...credit_history in {critical,good}: yes (3) ## : credit_history = poor: no (1) ## job = skilled: ## :...credit_history = poor: ## :...savings_balance in {< 100 DM,100 - 500 DM, ## : : 500 - 1000 DM}: yes (8) ## : savings_balance = unknown: no (1) ## credit_history = critical: ## :...other_credit = store: no (0) ## : other_credit = bank: yes (4) ## : other_credit = none: ## : :...savings_balance in {100 - 500 DM, ## : : unknown}: no (1) ## : savings_balance = 500 - 1000 DM: yes (1) ## : savings_balance = < 100 DM: ## : :...months_loan_duration <= 13: ## : :...percent_of_income <= 3: yes (3) ## : : percent_of_income > 3: no (3/1) ## : months_loan_duration > 13: ## : :...amount <= 5293: no (10/1) ## : amount > 5293: yes (2) ## credit_history = good: ## :...existing_loans_count > 1: yes (5) ## existing_loans_count <= 1: ## :...other_credit = store: no (2) ## other_credit = bank: ## :...percent_of_income <= 2: yes (2) ## : percent_of_income > 2: no (6/1) ## other_credit = none: [S2] ## ## SubTree [S1] ## ## employment_duration in {< 1 year,1 - 4 years}: yes (11/3) ## employment_duration in {> 7 years,4 - 7 years,unemployed}: no (8) ## ## SubTree [S2] ## ## savings_balance = 100 - 500 DM: yes (3) ## savings_balance = 500 - 1000 DM: no (1) ## savings_balance = unknown: ## :...phone = no: yes (9/1) ## : phone = yes: no (3/1) ## savings_balance = < 100 DM: ## :...percent_of_income <= 1: no (4) ## percent_of_income > 1: ## :...phone = yes: yes (10/1) ## phone = no: ## :...purpose in {business,car0,education,renovations}: yes (3) ## purpose = car: ## :...percent_of_income <= 3: no (2) ## : percent_of_income > 3: yes (6/1) ## purpose = furniture/appliances: ## :...years_at_residence <= 1: no (4) ## years_at_residence > 1: ## :...housing = other: no (1) ## housing = rent: yes (2) ## housing = own: ## :...amount <= 1778: no (3) ## amount > 1778: ## :...years_at_residence <= 3: yes (6) ## years_at_residence > 3: no (3/1) ## ## ----- Trial 1: ----- ## ## Decision tree: ## ## checking_balance in {< 0 DM,1 - 200 DM}: ## :...savings_balance in {> 1000 DM,500 - 1000 DM}: no (29/8.6) ## : savings_balance = 100 - 500 DM: ## : :...credit_history in {critical,perfect,poor}: no (18/3.2) ## : : credit_history in {good,very good}: yes (30.5/9.5) ## : savings_balance = unknown: ## : :...credit_history in {critical,perfect,poor}: no (16.6) ## : : credit_history in {good,very good}: ## : : :...job = management: yes (9.3/2.4) ## : : job in {unemployed,unskilled}: no (7.9/0.8) ## : : job = skilled: ## : : :...purpose in {business,car0,renovations}: yes (0) ## : : purpose = education: no (3.2) ## : : purpose in {car,furniture/appliances}: ## : : :...months_loan_duration <= 18: yes (16.2/3.2) ## : : months_loan_duration > 18: no (16.5/7) ## : savings_balance = < 100 DM: ## : :...months_loan_duration > 47: yes (24.3/3.1) ## : months_loan_duration <= 47: ## : :...job = unemployed: yes (7/3.1) ## : job = unskilled: ## : :...housing in {other,rent}: yes (14.2/4.7) ## : : housing = own: no (66.4/20.3) ## : job = skilled: ## : :...percent_of_income > 2: yes (109.5/40.2) ## : : percent_of_income <= 2: ## : : :...employment_duration in {< 1 year,> 7 years, ## : : : 4 - 7 years}: yes (36/15) ## : : employment_duration in {1 - 4 years, ## : : unemployed}: no (31.3/7) ## : job = management: ## : :...existing_loans_count > 2: no (3.2) ## : existing_loans_count <= 2: ## : :...employment_duration in {< 1 year, ## : : 4 - 7 years}: no (9.4/0.8) ## : employment_duration in {> 7 years, ## : : 1 - 4 years}: yes (30.3/7.1) ## : employment_duration = unemployed: ## : :...percent_of_income <= 2: no (4.7) ## : percent_of_income > 2: yes (13.9/4) ## checking_balance in {> 200 DM,unknown}: ## :...other_credit in {bank,store}: ## :...purpose = renovations: yes (0) ## : purpose in {car0,furniture/appliances}: no (31.4/6.9) ## : purpose in {business,car,education}: ## : :...percent_of_income <= 1: no (7/2.3) ## : percent_of_income > 1: yes (44.8/11.1) ## other_credit = none: ## :...credit_history in {critical,perfect}: no (116.7/10.1) ## credit_history in {good,poor,very good}: ## :...existing_loans_count > 1: ## :...employment_duration = 4 - 7 years: no (7.9) ## : employment_duration in {< 1 year,> 7 years,1 - 4 years, ## : : unemployed}: ## : :...job in {management,unemployed}: yes (6.9) ## : job in {skilled,unskilled}: ## : :...years_at_residence <= 1: yes (4.6) ## : years_at_residence > 1: ## : :...years_at_residence <= 3: no (15.7/2.3) ## : years_at_residence > 3: yes (15.5/4) ## existing_loans_count <= 1: ## :...credit_history = poor: yes (9.3/2.4) ## credit_history = very good: no (1.6/0.8) ## credit_history = good: ## :...phone = yes: no (54.4/4.6) ## phone = no: ## :...job in {management,unemployed}: no (2.4) ## job = skilled: ## :...savings_balance in {> 1000 DM,100 - 500 DM, ## : : 500 - 1000 DM, ## : : unknown}: no (23.7) ## : savings_balance = < 100 DM: ## : :...years_at_residence <= 2: no (19.6/4.6) ## : years_at_residence > 2: yes (12.4/3.2) ## job = unskilled: ## :...checking_balance = > 200 DM: yes (10.1/2.4) ## checking_balance = unknown: ## :...percent_of_income <= 3: no (9.5) ## percent_of_income > 3: yes (9.3/2.4) ## ## ----- Trial 2: ----- ## ## Decision tree: ## ## months_loan_duration <= 8: ## :...existing_loans_count > 1: no (19.1) ## : existing_loans_count <= 1: ## : :...amount <= 3161: no (45.5/7.7) ## : amount > 3161: yes (6.3/0.6) ## months_loan_duration > 8: ## :...checking_balance in {< 0 DM,> 200 DM,1 - 200 DM}: ## :...employment_duration = unemployed: ## : :...months_loan_duration > 33: no (11.4) ## : : months_loan_duration <= 33: ## : : :...phone = no: yes (11/2.7) ## : : phone = yes: no (15.5/6.5) ## : employment_duration = 4 - 7 years: ## : :...months_loan_duration <= 22: no (44.7/6.7) ## : : months_loan_duration > 22: ## : : :...job = management: no (4.6/0.6) ## : : job in {unemployed,unskilled}: yes (4.6/1.3) ## : : job = skilled: ## : : :...savings_balance in {< 100 DM,> 1000 DM, ## : : : 500 - 1000 DM}: yes (22.5/7.8) ## : : savings_balance in {100 - 500 DM,unknown}: no (8.8/2.1) ## : employment_duration = > 7 years: ## : :...amount > 6948: yes (13/1.3) ## : : amount <= 6948: ## : : :...purpose in {business,car0,education}: yes (22.8/7.8) ## : : purpose = renovations: no (1.3) ## : : purpose = car: ## : : :...job = unemployed: no (0) ## : : : job = unskilled: yes (6.9) ## : : : job in {management,skilled}: ## : : : :...years_at_residence <= 1: yes (2.5) ## : : : years_at_residence > 1: no (28.6/7.1) ## : : purpose = furniture/appliances: ## : : :...other_credit in {bank,store}: yes (7.3/2.1) ## : : other_credit = none: ## : : :...job = management: yes (5.1/1.3) ## : : job in {skilled,unemployed,unskilled}: no (29.2/3.4) ## : employment_duration = 1 - 4 years: ## : :...savings_balance = > 1000 DM: no (6.4) ## : : savings_balance in {< 100 DM,100 - 500 DM,500 - 1000 DM,unknown}: ## : : :...housing = other: yes (12/1.9) ## : : housing in {own,rent}: ## : : :...credit_history in {perfect,poor,very good}: no (29.3/8.6) ## : : credit_history = critical: ## : : :...months_loan_duration <= 16: no (10.6/0.6) ## : : : months_loan_duration > 16: yes (14.5/4.1) ## : : credit_history = good: ## : : :...phone = no: yes (81.6/33.5) ## : : phone = yes: no (30.2/11) ## : employment_duration = < 1 year: ## : :...savings_balance in {> 1000 DM,unknown}: no (9.1/1.9) ## : savings_balance in {100 - 500 DM,500 - 1000 DM}: yes (15.6/3.4) ## : savings_balance = < 100 DM: ## : :...housing = other: no (4.7) ## : housing in {own,rent}: ## : :...years_at_residence > 1: yes (41.6/11) ## : years_at_residence <= 1: ## : :...job in {management,unskilled}: no (13.9/4.9) ## : job = unemployed: yes (2.1) ## : job = skilled: ## : :...percent_of_income <= 3: yes (12.3/3.3) ## : percent_of_income > 3: no (9.7/0.6) ## checking_balance = unknown: ## :...other_credit in {bank,store}: ## :...employment_duration in {1 - 4 years,unemployed}: yes (31.9/7.2) ## : employment_duration in {< 1 year,> 7 years,4 - 7 years}: ## : :...other_credit = store: no (8.1) ## : other_credit = bank: ## : :...age <= 43: yes (22/8.6) ## : age > 43: no (9) ## other_credit = none: ## :...age > 30: no (134.2/16.1) ## age <= 30: ## :...amount > 6458: yes (14.1/2.6) ## amount <= 6458: ## :...age <= 22: yes (17.4/5.9) ## age > 22: ## :...percent_of_income <= 3: no (22.7) ## percent_of_income > 3: ## :...job = unemployed: no (0) ## job in {management,unskilled}: yes (12.5/3.9) ## job = skilled: ## :...purpose in {business,education, ## : renovations}: yes (8.6/1.9) ## purpose in {car,car0, ## furniture/appliances}: no (14.9) ## ## ----- Trial 3: ----- ## ## Decision tree: ## ## amount > 11054: ## :...credit_history in {critical,good,perfect}: yes (24.9/2.7) ## : credit_history in {poor,very good}: no (4.7/0.5) ## amount <= 11054: ## :...checking_balance in {> 200 DM,unknown}: ## :...employment_duration = > 7 years: no (91/18.6) ## : employment_duration = unemployed: yes (14.3/6.6) ## : employment_duration = 1 - 4 years: ## : :...job = management: yes (16.6/6.6) ## : : job in {skilled,unemployed,unskilled}: no (113.5/30.7) ## : employment_duration = 4 - 7 years: ## : :...age <= 22: yes (5.2/1.3) ## : : age > 22: no (49.9/4.7) ## : employment_duration = < 1 year: ## : :...purpose in {car,car0}: no (13.3) ## : purpose in {business,education,furniture/appliances,renovations}: ## : :...amount > 6681: yes (7) ## : amount <= 6681: ## : :...other_credit in {bank,store}: no (4) ## : other_credit = none: ## : :...months_loan_duration > 33: no (2.5) ## : months_loan_duration <= 33: ## : :...amount <= 1503: no (12.7/2.3) ## : amount > 1503: yes (18.3/5) ## checking_balance in {< 0 DM,1 - 200 DM}: ## :...credit_history = perfect: ## :...percent_of_income <= 3: no (16.5/5.9) ## : percent_of_income > 3: yes (6.9) ## credit_history = poor: ## :...percent_of_income <= 1: no (7.2) ## : percent_of_income > 1: ## : :...savings_balance in {< 100 DM,> 1000 DM, ## : : 500 - 1000 DM}: yes (19.2/3.9) ## : savings_balance in {100 - 500 DM,unknown}: no (12.8/2.3) ## credit_history = very good: ## :...other_credit = none: yes (10.6) ## : other_credit in {bank,store}: ## : :...months_loan_duration <= 9: yes (4.7) ## : months_loan_duration > 9: no (20.6/8.1) ## credit_history = critical: ## :...years_at_residence <= 1: no (7.5) ## : years_at_residence > 1: ## : :...savings_balance in {> 1000 DM,100 - 500 DM, ## : : unknown}: no (16.3/2.4) ## : savings_balance = 500 - 1000 DM: yes (2.8/0.5) ## : savings_balance = < 100 DM: ## : :...dependents > 1: no (12.9/0.5) ## : dependents <= 1: ## : :...other_credit = bank: yes (6.7/0.5) ## : other_credit = store: no (1.7/0.5) ## : other_credit = none: ## : :...age > 61: no (6.4) ## : age <= 61: ## : :...existing_loans_count > 2: no (3.3) ## : existing_loans_count <= 2: ## : :...job in {management,unemployed, ## : : unskilled}: yes (14.6/2.7) ## : job = skilled: no (41.3/17.6) ## credit_history = good: ## :...purpose in {business,car0,education,renovations}: no (40.7/12.7) ## purpose = furniture/appliances: ## :...months_loan_duration <= 7: no (10.6) ## : months_loan_duration > 7: ## : :...phone = no: no (123.2/45.7) ## : phone = yes: ## : :...years_at_residence <= 3: yes (27.8/6.6) ## : years_at_residence > 3: no (9.4/3.1) ## purpose = car: ## :...employment_duration = unemployed: no (7.1) ## employment_duration in {< 1 year,> 7 years,1 - 4 years, ## : 4 - 7 years}: ## :...dependents > 1: no (17.3/5.8) ## dependents <= 1: ## :...percent_of_income <= 2: ## :...amount <= 2697: yes (8.6/1.3) ## : amount > 2697: no (15.4/2.2) ## percent_of_income > 2: ## :...percent_of_income <= 3: yes (17/2.3) ## percent_of_income > 3: ## :...phone = yes: no (5.7/1.1) ## phone = no: ## :...checking_balance = < 0 DM: yes (16.7/1.3) ## checking_balance = 1 - 200 DM: no (10.6/4.3) ## ## ----- Trial 4: ----- ## ## Decision tree: ## ## checking_balance = unknown: ## :...other_credit in {bank,store}: ## : :...employment_duration in {< 1 year,4 - 7 years}: no (16.1/3.4) ## : : employment_duration = unemployed: yes (5.8/1.9) ## : : employment_duration = > 7 years: ## : : :...age <= 41: yes (16.4/6) ## : : : age > 41: no (7.7) ## : : employment_duration = 1 - 4 years: ## : : :...years_at_residence <= 1: no (3.1) ## : : years_at_residence > 1: ## : : :...amount <= 1503: no (3.6) ## : : amount > 1503: yes (26.3/6) ## : other_credit = none: ## : :...credit_history in {perfect,very good}: no (2.8) ## : credit_history = critical: ## : :...age > 30: no (45.3) ## : : age <= 30: ## : : :...purpose in {business,car,car0,furniture/appliances, ## : : : renovations}: no (21.4/3.9) ## : : purpose = education: yes (3) ## : credit_history = poor: ## : :...dependents > 1: yes (8.2/1.8) ## : : dependents <= 1: ## : : :...age <= 29: yes (11.8/4.6) ## : : age > 29: no (9.9) ## : credit_history = good: ## : :...existing_loans_count > 1: ## : :...percent_of_income <= 2: yes (13/0.4) ## : : percent_of_income > 2: no (7.5/1.3) ## : existing_loans_count <= 1: ## : :...percent_of_income <= 2: no (30.9/1.3) ## : percent_of_income > 2: ## : :...employment_duration = > 7 years: no (11.2) ## : employment_duration in {< 1 year,1 - 4 years,4 - 7 years, ## : : unemployed}: ## : :...job in {management,unemployed}: no (10.7/4.9) ## : job = unskilled: yes (10/3.6) ## : job = skilled: ## : :...age <= 23: yes (9.3/2.6) ## : age > 23: no (17.9) ## checking_balance in {< 0 DM,> 200 DM,1 - 200 DM}: ## :...credit_history = very good: ## :...age <= 23: no (3) ## : age > 23: yes (38.4/9.1) ## credit_history = perfect: ## :...housing in {other,rent}: yes (9.1) ## : housing = own: ## : :...percent_of_income > 3: yes (5) ## : percent_of_income <= 3: ## : :...other_credit in {bank,none}: no (16/3.9) ## : other_credit = store: yes (2.7) ## credit_history = poor: ## :...savings_balance in {> 1000 DM,500 - 1000 DM}: yes (0) ## : savings_balance = unknown: no (5.9) ## : savings_balance in {< 100 DM,100 - 500 DM}: ## : :...housing = rent: no (4.7/0.4) ## : housing in {other,own}: ## : :...percent_of_income <= 2: no (14.4/3.7) ## : percent_of_income > 2: yes (16.9/2.7) ## credit_history = good: ## :...amount > 8648: yes (19.4/2.4) ## : amount <= 8648: ## : :...purpose in {business,car0}: no (18/4.2) ## : purpose = renovations: yes (7.1/3) ## : purpose = education: ## : :...checking_balance = < 0 DM: yes (12.1/1.4) ## : : checking_balance in {> 200 DM,1 - 200 DM}: no (8.7/1.1) ## : purpose = car: ## : :...employment_duration = unemployed: no (5.9) ## : : employment_duration in {< 1 year,> 7 years,1 - 4 years, ## : : : 4 - 7 years}: ## : : :...job in {management,unemployed}: no (8.6/2.4) ## : : job = unskilled: yes (31.1/10.2) ## : : job = skilled: ## : : :...housing in {other,rent}: yes (24.2/6.4) ## : : housing = own: ## : : :...dependents > 1: no (4.3) ## : : dependents <= 1: ## : : :...existing_loans_count > 1: no (3.6) ## : : existing_loans_count <= 1: ## : : :...months_loan_duration <= 13: no (9.2/1.5) ## : : months_loan_duration > 13: yes (11.6/1.4) ## : purpose = furniture/appliances: ## : :...savings_balance in {> 1000 DM,500 - 1000 DM}: no (8.8/1.8) ## : savings_balance = 100 - 500 DM: yes (16.1/3.5) ## : savings_balance = unknown: ## : :...employment_duration in {< 1 year,> 7 years,1 - 4 years, ## : : : unemployed}: yes (20.8/6.5) ## : : employment_duration = 4 - 7 years: no (4.9) ## : savings_balance = < 100 DM: ## : :...years_at_residence <= 1: no (31.1/9.5) ## : years_at_residence > 1: ## : :...other_credit in {bank,store}: yes (14.9/4.8) ## : other_credit = none: ## : :...months_loan_duration > 42: yes (5.5) ## : months_loan_duration <= 42: ## : :...job in {management,unemployed}: no (11.9/3.4) ## : job in {skilled,unskilled}: ## : :...age <= 22: yes (11.8/2.7) ## : age > 22: no (76.6/33.4) ## credit_history = critical: ## :...savings_balance in {> 1000 DM,100 - 500 DM,unknown}: no (23.6/5.9) ## savings_balance = 500 - 1000 DM: yes (6.2/1.6) ## savings_balance = < 100 DM: ## :...dependents > 1: no (12.2/1.1) ## dependents <= 1: ## :...age > 61: no (5.7) ## age <= 61: ## :...years_at_residence <= 1: no (6.1/1) ## years_at_residence > 1: ## :...other_credit in {bank,store}: yes (8.3/2.1) ## other_credit = none: ## :...amount > 5998: yes (7) ## amount <= 5998: ## :...existing_loans_count <= 1: no (11.6/3.9) ## existing_loans_count > 1: ## :...existing_loans_count > 2: no (2.7) ## existing_loans_count <= 2: ## :...age > 54: yes (3.9) ## age <= 54: ## :...age > 44: no (5) ## age <= 44: ## :...amount <= 5324: yes (30.9/10.8) ## amount > 5324: no (2.7) ## ## ----- Trial 5: ----- ## ## Decision tree: ## ## checking_balance = unknown: ## :...other_credit = store: no (17.3/6.3) ## : other_credit = bank: ## : :...purpose = business: yes (8.9/4.1) ## : : purpose in {car0,education,renovations}: no (7.2/1.8) ## : : purpose = car: ## : : :...credit_history in {critical,perfect,poor,very good}: yes (17.2/4.4) ## : : : credit_history = good: no (3.5) ## : : purpose = furniture/appliances: ## : : :...months_loan_duration <= 13: yes (6.5/1.6) ## : : months_loan_duration > 13: no (17.4/2.5) ## : other_credit = none: ## : :...credit_history in {perfect,very good}: no (2.3) ## : credit_history = critical: ## : :...amount <= 6967: no (50.3/2.5) ## : : amount > 6967: yes (8.9/3.4) ## : credit_history = good: ## : :...existing_loans_count <= 1: no (86.1/19.3) ## : : existing_loans_count > 1: ## : : :...percent_of_income <= 2: yes (11.5/1.2) ## : : percent_of_income > 2: no (7.2/2.1) ## : credit_history = poor: ## : :...percent_of_income <= 3: no (13.6/1) ## : percent_of_income > 3: ## : :...amount <= 1526: no (3.4) ## : amount > 1526: yes (15.7/5.7) ## checking_balance in {< 0 DM,> 200 DM,1 - 200 DM}: ## :...savings_balance in {> 1000 DM,500 - 1000 DM}: no (33.1/13.9) ## savings_balance = 100 - 500 DM: ## :...existing_loans_count > 3: yes (3.5) ## : existing_loans_count <= 3: ## : :...credit_history in {critical,poor}: no (12/1.4) ## : credit_history in {perfect,very good}: yes (9.7/2.8) ## : credit_history = good: ## : :...months_loan_duration > 30: yes (6.3) ## : months_loan_duration <= 30: ## : :...amount <= 836: yes (3.8) ## : amount > 836: no (25.9/7.2) ## savings_balance = unknown: ## :...months_loan_duration <= 11: no (9.8) ## : months_loan_duration > 11: ## : :...months_loan_duration > 36: no (8.3) ## : months_loan_duration <= 36: ## : :...purpose in {business,education,renovations}: no (8.9/1.8) ## : purpose = car0: yes (1.8) ## : purpose = car: ## : :...amount <= 1804: yes (9.3) ## : : amount > 1804: no (14.4/4.1) ## : purpose = furniture/appliances: ## : :...housing in {other,rent}: yes (6.8/0.7) ## : housing = own: no (25.6/8.1) ## savings_balance = < 100 DM: ## :...months_loan_duration > 47: yes (29.2/4.2) ## months_loan_duration <= 47: ## :...credit_history in {perfect,very good}: yes (42.5/16.6) ## credit_history = poor: ## :...phone = no: no (10.6/2.3) ## : phone = yes: yes (15.2/2.5) ## credit_history = critical: ## :...dependents > 1: no (11/2) ## : dependents <= 1: ## : :...housing = other: no (3.3) ## : housing = rent: yes (14.4/4.7) ## : housing = own: ## : :...purpose in {business,car0,renovations}: no (6.4/1.8) ## : purpose = education: yes (2.8/0.4) ## : purpose = car: ## : :...age <= 29: yes (5.3) ## : : age > 29: no (20.3/5.1) ## : purpose = furniture/appliances: ## : :...phone = yes: no (4.6) ## : phone = no: ## : :...years_at_residence <= 3: yes (14.4/4.8) ## : years_at_residence > 3: no (9.3/0.8) ## credit_history = good: ## :...job = unemployed: yes (2.9) ## job = management: ## :...amount <= 7582: no (18.8/3.8) ## : amount > 7582: yes (11.1/2) ## job = skilled: ## :...employment_duration = 1 - 4 years: yes (61/25.1) ## : employment_duration = unemployed: no (2.1) ## : employment_duration = < 1 year: ## : :...amount <= 3124: yes (32.1/10.7) ## : : amount > 3124: no (6.1) ## : employment_duration = > 7 years: ## : :...age <= 33: no (3.7) ## : : age > 33: yes (15.5/4.9) ## : employment_duration = 4 - 7 years: ## : :...dependents <= 1: yes (20/8.3) ## : dependents > 1: no (5.5/0.9) ## job = unskilled: ## :...months_loan_duration <= 8: no (5.7) ## months_loan_duration > 8: ## :...employment_duration in {< 1 year, ## : unemployed}: yes (21.8/5.6) ## employment_duration in {> 7 years, ## : 4 - 7 years}: no (13.3/4.3) ## employment_duration = 1 - 4 years: ## :...phone = yes: yes (3.8/0.4) ## phone = no: ## :...checking_balance in {< 0 DM, ## : 1 - 200 DM}: no (26.4/9.3) ## checking_balance = > 200 DM: yes (4.9) ## ## ----- Trial 6: ----- ## ## Decision tree: ## ## checking_balance in {> 200 DM,unknown}: ## :...purpose in {business,car0}: no (47.2/16.3) ## : purpose = renovations: yes (6.8/3) ## : purpose = education: ## : :...years_at_residence <= 2: yes (11/2) ## : : years_at_residence > 2: no (14.3/4.9) ## : purpose = car: ## : :...other_credit in {none,store}: no (82.6/19.2) ## : : other_credit = bank: ## : : :...existing_loans_count <= 1: no (11.2/3.9) ## : : existing_loans_count > 1: yes (13.4/3.5) ## : purpose = furniture/appliances: ## : :...age > 44: no (20.8) ## : age <= 44: ## : :...credit_history in {critical,poor,very good}: no (42.4/8.2) ## : credit_history = perfect: yes (1.8/0.6) ## : credit_history = good: ## : :...existing_loans_count > 1: yes (14.7/4) ## : existing_loans_count <= 1: ## : :...job in {management,skilled,unemployed}: no (51.6/13.3) ## : job = unskilled: yes (16.8/4.1) ## checking_balance in {< 0 DM,1 - 200 DM}: ## :...years_at_residence <= 1: ## :...employment_duration = unemployed: yes (6.4) ## : employment_duration in {< 1 year,> 7 years,1 - 4 years,4 - 7 years}: ## : :...housing in {other,own}: no (78/21.9) ## : housing = rent: yes (9.7/3.2) ## years_at_residence > 1: ## :...employment_duration = 4 - 7 years: no (67/24.6) ## employment_duration = unemployed: ## :...dependents > 1: yes (7.5/0.6) ## : dependents <= 1: ## : :...credit_history in {critical,good,very good}: no (23/2.7) ## : credit_history in {perfect,poor}: yes (5.5/0.9) ## employment_duration = < 1 year: ## :...savings_balance in {> 1000 DM,100 - 500 DM, ## : : 500 - 1000 DM}: yes (17.6/2.4) ## : savings_balance = unknown: no (6/2.1) ## : savings_balance = < 100 DM: ## : :...months_loan_duration > 22: yes (22.2/3) ## : months_loan_duration <= 22: ## : :...dependents > 1: no (2.1) ## : dependents <= 1: ## : :...months_loan_duration <= 12: yes (8.4/2) ## : months_loan_duration > 12: no (14.6/2.8) ## employment_duration = > 7 years: ## :...months_loan_duration > 40: yes (5.8) ## : months_loan_duration <= 40: ## : :...purpose in {business,car0,education}: yes (20.2/6.2) ## : purpose = renovations: no (1.3) ## : purpose = furniture/appliances: ## : :...years_at_residence <= 3: yes (15.3/3.3) ## : : years_at_residence > 3: ## : : :...amount <= 5711: no (21.9/3.7) ## : : amount > 5711: yes (5.3) ## : purpose = car: ## : :...months_loan_duration <= 18: yes (18.6/4.1) ## : months_loan_duration > 18: ## : :...job = unemployed: no (0) ## : job = unskilled: yes (2.3) ## : job in {management,skilled}: ## : :...existing_loans_count <= 3: no (18.3/2.8) ## : existing_loans_count > 3: yes (2.1) ## employment_duration = 1 - 4 years: ## :...savings_balance = > 1000 DM: no (2.9) ## savings_balance in {100 - 500 DM,500 - 1000 DM}: yes (20.2/6.7) ## savings_balance = unknown: ## :...other_credit in {bank,store}: no (2.8) ## : other_credit = none: yes (17.4/7.3) ## savings_balance = < 100 DM: ## :...months_loan_duration > 36: yes (11.3) ## months_loan_duration <= 36: ## :...purpose in {business,education, ## : renovations}: yes (12.4/3.2) ## purpose = car0: no (3/1.4) ## purpose = car: ## :...housing = other: yes (1) ## : housing = rent: no (13.1/1.3) ## : housing = own: ## : :...dependents > 1: no (3.4) ## : dependents <= 1: ## : :...years_at_residence <= 3: yes (12.5/3.7) ## : years_at_residence > 3: no (6.7/0.7) ## purpose = furniture/appliances: ## :...months_loan_duration > 24: no (5.6) ## months_loan_duration <= 24: ## :...other_credit = bank: yes (9.3/2.7) ## other_credit = store: no (4.3) ## other_credit = none: ## :...percent_of_income <= 1: yes (4.9) ## percent_of_income > 1: ## :...amount <= 1164: no (14) ## amount > 1164: ## :...phone = yes: yes (4.4) ## phone = no: ## :...age <= 24: yes (13.6/1.9) ## age > 24: no (23.6/7.8) ## ## ----- Trial 7: ----- ## ## Decision tree: ## ## months_loan_duration <= 8: ## :...existing_loans_count > 1: no (12.1) ## : existing_loans_count <= 1: ## : :...amount > 3161: yes (6.1/0.6) ## : amount <= 3161: ## : :...age <= 25: yes (9.4/3.2) ## : age > 25: no (33/2.6) ## months_loan_duration > 8: ## :...checking_balance = < 0 DM: ## :...credit_history = critical: no (45.5/18.9) ## : credit_history in {perfect,poor,very good}: yes (50.8/15.6) ## : credit_history = good: ## : :...purpose in {business,car0}: no (7.7/1.3) ## : purpose in {education,renovations}: yes (11.2/2.8) ## : purpose = car: ## : :...savings_balance in {> 1000 DM,100 - 500 DM}: no (2.9) ## : : savings_balance in {500 - 1000 DM,unknown}: yes (10.9/3.1) ## : : savings_balance = < 100 DM: ## : : :...years_at_residence <= 2: yes (18.3/3) ## : : years_at_residence > 2: ## : : :...existing_loans_count <= 1: no (23.8/9.1) ## : : existing_loans_count > 1: yes (2.4) ## : purpose = furniture/appliances: ## : :...savings_balance in {> 1000 DM,500 - 1000 DM}: no (1.7) ## : savings_balance in {100 - 500 DM,unknown}: yes (15/3.3) ## : savings_balance = < 100 DM: ## : :...phone = yes: yes (15.5/2.6) ## : phone = no: ## : :...years_at_residence <= 1: no (13.4/1.3) ## : years_at_residence > 1: ## : :...percent_of_income <= 1: no (6.6/1) ## : percent_of_income > 1: ## : :...age <= 23: yes (7.5) ## : age > 23: ## : :...housing in {other,rent}: no (6.1/0.6) ## : housing = own: ## : :...months_loan_duration > 28: yes (7.7) ## : months_loan_duration <= 28: ## : :...other_credit in {bank, ## : : store}: yes (8.1/2.1) ## : other_credit = none: no (19.7/5.9) ## checking_balance in {> 200 DM,1 - 200 DM,unknown}: ## :...savings_balance = > 1000 DM: ## :...housing = other: yes (5.5) ## : housing in {own,rent}: ## : :...existing_loans_count <= 2: no (16.6) ## : existing_loans_count > 2: yes (2.1) ## savings_balance = 100 - 500 DM: ## :...purpose in {business,car0,renovations}: no (15.7/1.9) ## : purpose = education: yes (7/0.5) ## : purpose = furniture/appliances: ## : :...months_loan_duration <= 27: no (16.6/3.8) ## : : months_loan_duration > 27: yes (3.8) ## : purpose = car: ## : :...dependents > 1: yes (4.2/0.5) ## : dependents <= 1: ## : :...age <= 29: yes (12/2.8) ## : age > 29: no (10.7/1.8) ## savings_balance = 500 - 1000 DM: ## :...existing_loans_count > 2: yes (3.5) ## : existing_loans_count <= 2: ## : :...job in {management,unemployed}: no (6) ## : job in {skilled,unskilled}: ## : :...other_credit = bank: yes (4.7) ## : other_credit in {none,store}: no (21.8/7.7) ## savings_balance = unknown: ## :...purpose in {business,car,car0,education, ## : : renovations}: no (69.3/20.1) ## : purpose = furniture/appliances: ## : :...months_loan_duration > 18: no (16.8/1.2) ## : months_loan_duration <= 18: ## : :...age <= 24: no (5) ## : age > 24: yes (21.6/6.9) ## savings_balance = < 100 DM: ## :...months_loan_duration > 26: ## :...years_at_residence <= 1: no (14.4/4.8) ## : years_at_residence > 1: ## : :...percent_of_income <= 1: no (8.6/2.5) ## : percent_of_income > 1: yes (66.3/17.7) ## months_loan_duration <= 26: ## :...age > 47: no (16/0.6) ## age <= 47: ## :...dependents > 1: ## :...age <= 33: no (5.3) ## : age > 33: ## : :...existing_loans_count > 2: no (2.1) ## : existing_loans_count <= 2: ## : :...months_loan_duration <= 11: no (2.6) ## : months_loan_duration > 11: yes (21.7/1) ## dependents <= 1: ## :...existing_loans_count > 1: ## :...amount <= 8133: no (61.5/14.6) ## : amount > 8133: yes (5.4) ## existing_loans_count <= 1: ## :...purpose in {business,car0, ## : education}: no (15.1/6.6) ## purpose = renovations: yes (2.6) ## purpose in {car,furniture/appliances}: ## :...checking_balance = > 200 DM: no (14.1/3.4) ## checking_balance = unknown: ## :...age <= 25: yes (12.5/4) ## : age > 25: no (11.9) ## checking_balance = 1 - 200 DM: ## :...other_credit in {bank, ## : store}: yes (9.4/2.3) ## other_credit = none: ## :...housing = other: yes (2.3) ## housing = rent: no (12.5/4.1) ## housing = own: ## :...age > 34: yes (7.5/0.5) ## age <= 34: [S1] ## ## SubTree [S1] ## ## months_loan_duration <= 9: yes (8.7/2.2) ## months_loan_duration > 9: no (21.2/7.2) ## ## ----- Trial 8: ----- ## ## Decision tree: ## ## checking_balance = unknown: ## :...employment_duration in {> 7 years,4 - 7 years}: ## : :...months_loan_duration > 24: no (28.1) ## : : months_loan_duration <= 24: ## : : :...amount <= 10222: no (75.5/20.4) ## : : amount > 10222: yes (3.6) ## : employment_duration in {< 1 year,1 - 4 years,unemployed}: ## : :...dependents > 1: ## : :...other_credit in {bank,none}: no (17.2/1.6) ## : : other_credit = store: yes (2.9/0.4) ## : dependents <= 1: ## : :...amount > 4455: yes (50.2/14.1) ## : amount <= 4455: ## : :...percent_of_income <= 2: no (21.9/3.1) ## : percent_of_income > 2: ## : :...employment_duration = < 1 year: no (8.5/1.2) ## : employment_duration = unemployed: yes (6.6/1.6) ## : employment_duration = 1 - 4 years: ## : :...existing_loans_count > 1: no (20.7/4) ## : existing_loans_count <= 1: ## : :...savings_balance in {> 1000 DM, ## : : 100 - 500 DM}: no (4.8) ## : savings_balance in {< 100 DM,500 - 1000 DM,unknown}: ## : :...other_credit in {bank,store}: yes (5.3) ## : other_credit = none: ## : :...age <= 31: yes (21.1/5.9) ## : age > 31: no (5.4) ## checking_balance in {< 0 DM,> 200 DM,1 - 200 DM}: ## :...savings_balance = > 1000 DM: no (12.6/4.4) ## savings_balance = 500 - 1000 DM: ## :...other_credit in {bank,none}: yes (20.8/7.3) ## : other_credit = store: no (1.6) ## savings_balance = 100 - 500 DM: ## :...existing_loans_count > 3: yes (3.8) ## : existing_loans_count <= 3: ## : :...credit_history in {critical,poor}: no (12.9/1.4) ## : credit_history in {perfect,very good}: yes (12.3/3.8) ## : credit_history = good: ## : :...months_loan_duration > 30: yes (4.1) ## : months_loan_duration <= 30: ## : :...housing in {other,rent}: yes (8.3/2.1) ## : housing = own: ## : :...purpose in {business,car,car0,education, ## : : renovations}: no (11.2/0.5) ## : purpose = furniture/appliances: yes (7.8/2.1) ## savings_balance = unknown: ## :...months_loan_duration <= 11: no (6.9) ## : months_loan_duration > 11: ## : :...job in {management,unemployed,unskilled}: no (22.6/7) ## : job = skilled: ## : :...purpose in {car0,renovations}: yes (0) ## : purpose in {business,education}: no (10.5) ## : purpose in {car,furniture/appliances}: ## : :...amount <= 1412: yes (7.7) ## : amount > 1412: ## : :...existing_loans_count > 1: no (4.5) ## : existing_loans_count <= 1: ## : :...amount > 10366: yes (2.6) ## : amount <= 10366: ## : :...purpose = car: no (10.6/3.4) ## : purpose = furniture/appliances: yes (15.5/6.2) ## savings_balance = < 100 DM: ## :...months_loan_duration <= 15: ## :...credit_history in {perfect,very good}: yes (20.7/7.6) ## : credit_history = poor: no (7.6/1.8) ## : credit_history = critical: ## : :...dependents > 1: no (4.5) ## : : dependents <= 1: ## : : :...job in {unemployed,unskilled}: no (5.8) ## : : job in {management,skilled}: ## : : :...percent_of_income <= 3: yes (10.5/2.7) ## : : percent_of_income > 3: no (15.5/3.4) ## : credit_history = good: ## : :...phone = yes: ## : :...amount <= 3021: no (17.8/0.6) ## : : amount > 3021: yes (4.2/0.2) ## : phone = no: ## : :...purpose in {business,car0,renovations}: no (4.3/1) ## : purpose = education: yes (4.7) ## : purpose = car: ## : :...months_loan_duration > 13: no (2.8) ## : : months_loan_duration <= 13: ## : : :...checking_balance in {< 0 DM, ## : : : 1 - 200 DM}: yes (20.6/4.8) ## : : checking_balance = > 200 DM: no (2.3) ## : purpose = furniture/appliances: ## : :...other_credit = bank: no (3.6) ## : other_credit = store: yes (4.1) ## : other_credit = none: ## : :...checking_balance = < 0 DM: no (15.7/1.8) ## : checking_balance in {> 200 DM,1 - 200 DM}: ## : :...months_loan_duration <= 7: no (4.6) ## : months_loan_duration > 7: ## : :...amount > 2625: no (2.8) ## : amount <= 2625: ## : :...amount <= 601: no (2.4) ## : amount > 601: yes (24.7/5.6) ## months_loan_duration > 15: ## :...months_loan_duration > 47: yes (28.1/4.8) ## months_loan_duration <= 47: ## :...amount <= 2320: yes (83.1/19.4) ## amount > 2320: ## :...percent_of_income <= 1: ## :...amount <= 11054: no (24/4.1) ## : amount > 11054: yes (2.3) ## percent_of_income > 1: ## :...credit_history = perfect: no (12.6/5.2) ## credit_history in {poor,very good}: yes (12.1/2.4) ## credit_history = critical: ## :...age <= 31: yes (12.8/2) ## : age > 31: no (16.6/3) ## credit_history = good: ## :...other_credit = store: no (4.9) ## other_credit in {bank,none}: ## :...job = management: no (16.2/6.3) ## job in {unemployed,unskilled}: yes (14.8/4.3) ## job = skilled: ## :...housing = other: no (3.8) ## housing in {own,rent}: [S1] ## ## SubTree [S1] ## ## checking_balance = < 0 DM: yes (22.4/4.4) ## checking_balance in {> 200 DM,1 - 200 DM}: no (19.1/6.6) ## ## ----- Trial 9: ----- ## ## Decision tree: ## ## checking_balance in {> 200 DM,unknown}: ## :...purpose in {business,education,renovations}: ## : :...employment_duration = 4 - 7 years: no (9.1) ## : : employment_duration in {< 1 year,> 7 years,1 - 4 years,unemployed}: ## : : :...other_credit = store: yes (7.1/0.4) ## : : other_credit in {bank,none}: ## : : :...credit_history in {critical,perfect}: no (19.3/4.7) ## : : credit_history in {good,poor,very good}: yes (38.8/11.6) ## : purpose in {car,car0,furniture/appliances}: ## : :...existing_loans_count <= 1: no (147.5/30.2) ## : existing_loans_count > 1: ## : :...checking_balance = > 200 DM: yes (12.9/4.2) ## : checking_balance = unknown: ## : :...housing = other: no (4.5) ## : housing = rent: yes (11.7/3.6) ## : housing = own: ## : :...months_loan_duration <= 16: no (18.7) ## : months_loan_duration > 16: ## : :...credit_history in {good,perfect, ## : : very good}: yes (12.4/1.9) ## : credit_history in {critical,poor}: ## : :...percent_of_income <= 2: no (11.3) ## : percent_of_income > 2: ## : :...employment_duration in {< 1 year,4 - 7 years, ## : : unemployed}: no (10.9) ## : employment_duration in {> 7 years,1 - 4 years}: [S1] ## checking_balance in {< 0 DM,1 - 200 DM}: ## :...months_loan_duration > 26: ## :...months_loan_duration > 47: yes (29.7/5.5) ## : months_loan_duration <= 47: ## : :...dependents <= 1: yes (86.5/30.6) ## : dependents > 1: no (14.8/5.1) ## months_loan_duration <= 26: ## :...housing = other: ## :...amount > 7409: yes (7.9) ## : amount <= 7409: ## : :...job in {management,skilled}: no (22.5/4.7) ## : job in {unemployed,unskilled}: yes (5) ## housing = rent: ## :...credit_history = perfect: yes (6.6) ## : credit_history in {critical,good,poor,very good}: ## : :...other_credit in {bank,store}: yes (18.4/5.6) ## : other_credit = none: ## : :...months_loan_duration > 22: ## : :...percent_of_income <= 2: no (7.3/2.6) ## : : percent_of_income > 2: yes (14.7/1.4) ## : months_loan_duration <= 22: ## : :...job in {management,unemployed,unskilled}: no (16.8/1.9) ## : job = skilled: ## : :...credit_history = very good: yes (0) ## : credit_history in {critical,poor}: no (12/3) ## : credit_history = good: ## : :...amount <= 1382: yes (9.6) ## : amount > 1382: no (16.2/5.5) ## housing = own: ## :...employment_duration = unemployed: yes (16.9/4.2) ## employment_duration in {< 1 year,> 7 years,1 - 4 years,4 - 7 years}: ## :...savings_balance in {> 1000 DM,500 - 1000 DM}: no (16.4/2.8) ## savings_balance = 100 - 500 DM: ## :...amount <= 1442: yes (8.8/1.8) ## : amount > 1442: no (19.2/1.2) ## savings_balance = unknown: ## :...credit_history in {critical,good,perfect, ## : : poor}: no (30.1/6.7) ## : credit_history = very good: yes (3.8) ## savings_balance = < 100 DM: ## :...job = management: yes (6.8/2) ## job in {unemployed,unskilled}: no (69.1/23.1) ## job = skilled: ## :...purpose in {education,renovations}: yes (6.3) ## purpose in {business,car,car0,furniture/appliances}: ## :...months_loan_duration <= 7: no (7) ## months_loan_duration > 7: ## :...credit_history in {perfect, ## : poor}: no (9.4/1.5) ## credit_history = very good: yes (3.1) ## credit_history = critical: ## :...percent_of_income <= 2: no (6.1) ## : percent_of_income > 2: ## : :...percent_of_income <= 3: yes (4.5) ## : percent_of_income > 3: no (16.1/5.3) ## credit_history = good: ## :...years_at_residence > 3: no (19.1/3.6) ## years_at_residence <= 3: ## :...existing_loans_count > 1: no (4.1/0.9) ## existing_loans_count <= 1: ## :...years_at_residence > 2: yes (14.1/2.8) ## years_at_residence <= 2: ## :...amount <= 999: yes (6.8/1.4) ## amount > 999: ## :...age <= 24: yes (10.4/2) ## age > 24: no (18.2/2) ## ## SubTree [S1] ## ## purpose = car: yes (12.4/4.1) ## purpose in {car0,furniture/appliances}: no (6.9/1) ## ## ## Evaluation on training data (900 cases): ## ## Trial Decision Tree ## ----- ---------------- ## Size Errors ## ## 0 66 125(13.9%) ## 1 40 205(22.8%) ## 2 46 196(21.8%) ## 3 45 193(21.4%) ## 4 68 163(18.1%) ## 5 62 175(19.4%) ## 6 56 186(20.7%) ## 7 62 188(20.9%) ## 8 66 156(17.3%) ## 9 49 200(22.2%) ## boost 31( 3.4%) << ## ## ## (a) (b) <-classified as ## ---- ---- ## 626 6 (a): class no ## 25 243 (b): class yes ## ## ## Attribute usage: ## ## 100.00% checking_balance ## 100.00% months_loan_duration ## 100.00% amount ## 100.00% employment_duration ## 98.56% credit_history ## 97.44% savings_balance ## 94.11% purpose ## 88.44% other_credit ## 82.44% existing_loans_count ## 80.89% job ## 75.67% age ## 71.89% percent_of_income ## 71.67% housing ## 70.78% dependents ## 69.33% years_at_residence ## 49.56% phone ## ## ## Time: 0.3 secs credit_boost_pred10 <- predict(credit_boost10, credit_test) CrossTable(credit_test$default, credit_boost_pred10,
prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
dnn = c('actual default', 'predicted default'))
##
##
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table:  100
##
##
##                | predicted default
## actual default |        no |       yes | Row Total |
## ---------------|-----------|-----------|-----------|
##             no |        60 |         8 |        68 |
##                |     0.600 |     0.080 |           |
## ---------------|-----------|-----------|-----------|
##            yes |        15 |        17 |        32 |
##                |     0.150 |     0.170 |           |
## ---------------|-----------|-----------|-----------|
##   Column Total |        75 |        25 |       100 |
## ---------------|-----------|-----------|-----------|
##
## 
# Apply a penalty to different types of errors, because
# some mistakes are more costly than others.
error_cost <- matrix(c(0, 1, 4, 0), nrow = 2)
error_cost
##      [,1] [,2]
## [1,]    0    4
## [2,]    1    0
credit_cost <- C5.0(credit_train[-17], credit_train$default, costs = error_cost) ## Warning in C5.0.default(credit_train[-17], credit_train$default, costs = error_cost):
## no dimnames were given for the cost matrix; the factor levels will be used
credit_cost_pred <- predict(credit_cost, credit_test)

# See if the confusion matrix changes, based on the weighting.
CrossTable(credit_test\$default, credit_cost_pred,
prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,
dnn = c('actual default', 'predicted default'))
##
##
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table:  100
##
##
##                | predicted default
## actual default |        no |       yes | Row Total |
## ---------------|-----------|-----------|-----------|
##             no |        42 |        26 |        68 |
##                |     0.420 |     0.260 |           |
## ---------------|-----------|-----------|-----------|
##            yes |         6 |        26 |        32 |
##                |     0.060 |     0.260 |           |
## ---------------|-----------|-----------|-----------|
##   Column Total |        48 |        52 |       100 |
## ---------------|-----------|-----------|-----------|
##
##