MATLAB Examples

function call_generic_random_forests()

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Name - call_generic_random_forests
% Creation Date - 7th July 2015
% Author - Soumya Banerjee
% Website - https://sites.google.com/site/neelsoumya/
%
% Description - Function to load data and call generic random forests function
%
% Parameters -
%	Input
%
%	Output
%               BaggedEnsemble - ensemble of random forests
%               Plots of out of bag error
%		Example prediction
%
% Example -
%		call_generic_random_forests()
%
% Acknowledgements -
%           Dedicated to my mother Kalyani Banerjee, my father Tarakeswar Banerjee
%				, my wife Joyeeta Ghose and my friend Irene Egli.
%
% License - BSD
%
% Change History -
%                   7th July 2015 - Creation by Soumya Banerjee
%                   12th July 2017 - Modified by Soumya Banerjee to try different leaf node
%                                   and estimate feature importance
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%load fisheriris
%X = meas;
%Y = species;
%BaggedEnsemble = generic_random_forests(X,Y,60,'classification')
%predict(BaggedEnsemble,[5 3 5 1.8])

% load breast cancer data
unix('wget http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data')
data_matrix = importdata('breast-cancer-wisconsin.data');
icol = size(data_matrix,2)
data_predictor = data_matrix(:,1:icol-1); % predictors matrix
label = data_matrix(:,end); % last column is 2 for benign, 4 for malignant

BaggedEnsemble = generic_random_forests(data_predictor, label, 500, 'classification')
predict(BaggedEnsemble, [1000025,5,1,1,1,2,1,3,1,1])

% Model says that x6 (single epithelial cell size) is most important
% predictor
wget: /opt/matlab/bin/glnxa64/libcrypto.so.1.0.0: no version information available (required by wget)
wget: /opt/matlab/bin/glnxa64/libssl.so.1.0.0: no version information available (required by wget)
wget: /opt/matlab/bin/glnxa64/libssl.so.1.0.0: no version information available (required by wget)
--2017-07-12 20:23:46--  http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.249
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.249|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19889 (19K) [text/plain]
Saving to: ‘breast-cancer-wisconsin.data.10’

          breast-ca   0%[                    ]       0  --.-KB/s               breast-cancer-wisco 100%[===================>]  19.42K  --.-KB/s    in 0.1s    

2017-07-12 20:23:47 (137 KB/s) - ‘breast-cancer-wisconsin.data.10’ saved [19889/19889]


ans =

     0


icol =

    11


min_leaf_size =

     5


BaggedEnsemble = 

  TreeBagger
Ensemble with 500 bagged decision trees:
                    Training X:              [24x10]
                    Training Y:               [24x1]
                        Method:       classification
                 NumPredictors:                   10
         NumPredictorsToSample:                    4
                   MinLeafSize:                    1
                 InBagFraction:                    1
         SampleWithReplacement:                    1
          ComputeOOBPrediction:                    1
 ComputeOOBPredictorImportance:                    0
                     Proximity:                   []
                    ClassNames:             '2'             '4'           'NaN'


ans =

  24×1 cell array

    '2'
    '2'
    '2'
    '2'
    '2'
    '4'
    '2'
    '2'
    '2'
    '2'
    '2'
    '2'
    '4'
    '2'
    '4'
    '4'
    '2'
    '2'
    '4'
    '2'
    '4'
    '4'
    '2'
    '2'


Decision tree for classification
1  if x4<4.5 then node 2 elseif x4>=4.5 then node 3 else 2
2  class = 2
3  if x8<4 then node 4 elseif x8>=4 then node 5 else 4
4  class = 2
5  class = 4


idxvar =

  1×0 empty double row vector


BaggedEnsemble = 

  TreeBagger
Ensemble with 500 bagged decision trees:
                    Training X:              [24x10]
                    Training Y:               [24x1]
                        Method:       classification
                 NumPredictors:                   10
         NumPredictorsToSample:                    4
                   MinLeafSize:                    1
                 InBagFraction:                    1
         SampleWithReplacement:                    1
          ComputeOOBPrediction:                    1
 ComputeOOBPredictorImportance:                    0
                     Proximity:                   []
                    ClassNames:             '2'             '4'           'NaN'


ans =

  cell

    '2'