我有ID3决策树的代码,但找不到用于绘制或查看树的适当代码
我已经使用了view(Mdl,'Mode','graph')
之类的几种视图MATLAB函数,但是它们都不起作用,该代码当前产生的决策树结构结果为
DECISION TREE STRUCTURE:
parent: root attribute: IsRich falseChild:HasScholarship trueChild:GoodLetters
parent: IsRich attribute: HasScholarship falseChild:false trueChild:GoodSAT
parent: HasScholarship false
parent: HasScholarship attribute: GoodSAT falseChild:false trueChild:GoodLetters
parent: GoodSAT false
parent: GoodSAT attribute: GoodLetters falseChild:false trueChild:true
parent: GoodLetters false
parent: GoodLetters true
parent: IsRich attribute: GoodLetters falseChild:GoodGrades trueChild:GoodGrades
parent: GoodLetters attribute: GoodGrades falseChild:false trueChild:SchoolActivities
parent: GoodGrades false
parent: GoodGrades attribute: SchoolActivities falseChild:false trueChild:true
parent: SchoolActivities false
parent: SchoolActivities true
parent: GoodLetters attribute: GoodGrades falseChild:GoodSAT trueChild:true
parent: GoodGrades attribute: GoodSAT falseChild:false trueChild:true
parent: GoodSAT false
parent: GoodSAT true
parent: GoodGrades true
所以我想绘制这个树形结构,任何帮助将不胜感激
% ID3 Decision Tree Algorithm
function[] = decisiontree(inputFileName, trainingSetSize, numberOfTrials,...
verbose)
% DECISIONTREE Create a decision tree by following the ID3 algorithm
% args:
% inputFileName - the fully specified path to input file
% trainingSetSize - integer specifying number of examples from input
% used to train the dataset
% numberOfTrials - integer specifying how many times decision tree
% will be built from a randomly selected subset
% of the training examples
% verbose - string that must be eiher '1' or '0', if '1'
% output includes training and test sets, else
% it will only contain description of tree and
% results for the trials
% Read in the specified text file contain the examples
fid = fopen(inputFileName, 'rt');
dataInput = textscan(fid, '%s');
% Close the file
fclose(fid);
% Reformat the data into attribute array and data matrix of 1s and 0s for
% true or false
i = 1;
% First store the attributes into a cell array
while (~strcmp(dataInput{1}{i}, 'CLASS'));
i = i + 1;
end
attributes = cell(1,i);
for j=1:i;
attributes{j} = dataInput{1}{j};
end
% NOTE: The classification will be the final attribute in the data rows
% below
numAttributes = i;
numInstances = (length(dataInput{1}) - numAttributes) / numAttributes;
% Then store the data into matrix
data = zeros(numInstances, numAttributes);
i = i + 1;
for j=1:numInstances
for k=1:numAttributes
data(j, k) = strcmp(dataInput{1}{i}, 'true');
i = i + 1;
end
end
% Here is where the trials start
for i=1:numberOfTrials;
% Print the trial number
fprintf('TRIAL NUMBER: %d\n\n', i);
% Split data into training and testing sets randomly
% Use randsample to get a vector of row numbers for the training set
rows = sort(randsample(numInstances, trainingSetSize));
% Initialize two new matrices, training set and test set
trainingSet = zeros(trainingSetSize, numAttributes);
testingSetSize = (numInstances - trainingSetSize);
testingSet = zeros(testingSetSize, numAttributes);
% Loop through data matrix, copying relevant rows to each matrix
training_index = 1;
testing_index = 1;
for data_index=1:numInstances;
if (rows(training_index) == data_index);
trainingSet(training_index, :) = data(data_index, :);
if (training_index < trainingSetSize);
training_index = training_index + 1;
end
else
testingSet(testing_index, :) = data(data_index, :);
if (testing_index < testingSetSize);
testing_index = testing_index + 1;
end
end
end
% If verbose, print out training set
if (verbose);
for ii=1:numAttributes;
fprintf('%s\t', attributes{ii});
end
fprintf('\n');
for ii=1:trainingSetSize;
for jj=1:numAttributes;
if (trainingSet(ii, jj));
fprintf('%s\t', 'true');
else
fprintf('%s\t', 'false');
end
end
fprintf('\n');
end
end
% Estimate the expected prior probability of TRUE and FALSE based on
% training set
if (sum(trainingSet(:, numAttributes)) >= trainingSetSize);
expectedPrior = 'true';
else
expectedPrior = 'false';
end
% Construct a decision tree on the training set using the ID3 algorithm
activeAttributes = ones(1, length(attributes) - 1);
new_attributes = attributes(1:length(attributes)-1);
tree = ID3(trainingSet, attributes, activeAttributes); % Here ID3 function
% Print out the tree
fprintf('DECISION TREE STRUCTURE:\n');
PrintTree(tree, 'root');
end
ID3功能是
function [tree] = ID3(examples, attributes, activeAttributes)
% ID3 Runs the ID3 algorithm on the matrix of examples and attributes
% args:
% examples - matrix of 1s and 0s for trues and falses, the
% last value in each row being the value of the
% classifying attribute
% attributes - cell array of attribute strings (no CLASS)
% activeAttributes - vector of 1s and 0s, 1 if corresponding attr.
% active (no CLASS)
% return:
% tree - the root node of a decision tree
% tree struct:
% value - will be the string for the splitting
% attribute, or 'true' or 'false' for leaf
% left - left pointer to another tree node (left means
% the splitting attribute was false)
% right - right pointer to another tree node (right
% means the splitting attribute was true)
if (isempty(examples));
error('Must provide examples');
end
% Constants
numberAttributes = length(activeAttributes);
numberExamples = length(examples(:,1));
% Create the tree node
tree = struct('value', 'null', 'left', 'null', 'right', 'null');
% If last value of all rows in examples is 1, return tree labeled 'true'
lastColumnSum = sum(examples(:, numberAttributes + 1));
if (lastColumnSum == numberExamples);
tree.value = 'true';
return
end
% If last value of all rows in examples is 0, return tree labeled 'false'
if (lastColumnSum == 0);
tree.value = 'false';
return
end
% If activeAttributes is empty, then return tree with label as most common
% value
if (sum(activeAttributes) == 0);
if (lastColumnSum >= numberExamples / 2);
tree.value = 'true';
else
tree.value = 'false';
end
return
end
% Find the current entropy
p1 = lastColumnSum / numberExamples;
if (p1 == 0);
p1_eq = 0;
else
p1_eq = -1*p1*log2(p1);
end
p0 = (numberExamples - lastColumnSum) / numberExamples;
if (p0 == 0);
p0_eq = 0;
else
p0_eq = -1*p0*log2(p0);
end
currentEntropy = p1_eq + p0_eq;
% Find the attribute that maximizes information gain
gains = -1*ones(1,numberAttributes); %-1 if inactive, gains for all else
% Loop through attributes updating gains, making sure they are still active
for i=1:numberAttributes;
if (activeAttributes(i)) % this one is still active, update its gain
s0 = 0; s0_and_true = 0;
s1 = 0; s1_and_true = 0;
for j=1:numberExamples;
if (examples(j,i)); % this instance has splitting attr. true
s1 = s1 + 1;
if (examples(j, numberAttributes + 1)); %target attr is true
s1_and_true = s1_and_true + 1;
end
else
s0 = s0 + 1;
if (examples(j, numberAttributes + 1)); %target attr is true
s0_and_true = s0_and_true + 1;
end
end
end
% Entropy for S(v=1)
if (~s1);
p1 = 0;
else
p1 = (s1_and_true / s1);
end
if (p1 == 0);
p1_eq = 0;
else
p1_eq = -1*(p1)*log2(p1);
end
if (~s1);
p0 = 0;
else
p0 = ((s1 - s1_and_true) / s1);
end
if (p0 == 0);
p0_eq = 0;
else
p0_eq = -1*(p0)*log2(p0);
end
entropy_s1 = p1_eq + p0_eq;
% Entropy for S(v=0)
if (~s0);
p1 = 0;
else
p1 = (s0_and_true / s0);
end
if (p1 == 0);
p1_eq = 0;
else
p1_eq = -1*(p1)*log2(p1);
end
if (~s0);
p0 = 0;
else
p0 = ((s0 - s0_and_true) / s0);
end
if (p0 == 0);
p0_eq = 0;
else
p0_eq = -1*(p0)*log2(p0);
end
entropy_s0 = p1_eq + p0_eq;
gains(i) = currentEntropy - ((s1/numberExamples)*entropy_s1) - ((s0/numberExamples)*entropy_s0);
end
end
% Pick the attribute that maximizes gains
[~, bestAttribute] = max(gains);
% Set tree.value to bestAttribute's relevant string
tree.value = attributes{bestAttribute};
% Remove splitting attribute from activeAttributes
activeAttributes(bestAttribute) = 0;
% Initialize and create the new example matrices
examples_0 = []; examples_0_index = 1;
examples_1 = []; examples_1_index = 1;
for i=1:numberExamples;
if (examples(i, bestAttribute)); % this instance has it as 1/true
examples_1(examples_1_index, :) = examples(i, :); % copy over
examples_1_index = examples_1_index + 1;
else
examples_0(examples_0_index, :) = examples(i, :);
examples_0_index = examples_0_index + 1;
end
end
% For both values of the splitting attribute
% For value = false or 0, corresponds to left branch
% If examples_0 is empty, add leaf node to the left with relevant label
if (isempty(examples_0));
leaf = struct('value', 'null', 'left', 'null', 'right', 'null');
if (lastColumnSum >= numberExamples / 2); % for matrix examples
leaf.value = 'true';
else
leaf.value = 'false';
end
tree.left = leaf;
else
% Here is were we can recur
tree.left = ID3(examples_0, attributes, activeAttributes);
end
% For value = true or 1, corresponds to right branch
% If examples_1 is empty, add leaf node to the right with relevant label
if (isempty(examples_1));
leaf = struct('value', 'null', 'left', 'null', 'right', 'null');
if (lastColumnSum >= numberExamples / 2); % for matrix examples
leaf.value = 'true';
else
leaf.value = 'false';
end
tree.right = leaf;
else
% Here is were we can recur
tree.right = ID3(examples_1, attributes, activeAttributes);
end
% Now we can return tree
return
end
PrintTree.m
function [] = PrintTree(tree, parent)
% Prints the tree structure (preorder traversal)
% Print current node
if (strcmp(tree.value, 'true'));
fprintf('parent: %s\ttrue\n', parent);
return
elseif (strcmp(tree.value, 'false'));
fprintf('parent: %s\tfalse\n', parent);
return
else
% Current node an attribute splitter
fprintf('parent: %s\tattribute: %s\tfalseChild:%s\ttrueChild:%s\n', ...
parent, tree.value, tree.left.value, tree.right.value);
end
% Recur the left subtree
PrintTree(tree.left, tree.value);
% Recur the right subtree
PrintTree(tree.right, tree.value);
%treeplot(node)
end