我正在写一个lua脚本来实现Decision Tree.But它在创建树时总是在函数中抛出异常。即使“断言”我也找不到它的错误。对不起我的英语不好: P上。
DTree.lua:
-- Decision Tree (ID3)
-- by Darksun2010
function calcShannonEnt(dataSet)
local labelScore={};
for _,v in pairs(dataSet) do
local curLabel=v[#v];
if (labelScore[curLabel]) then
labelScore[curLabel]=labelScore[curLabel]+1;
else
labelScore[curLabel]=1;
end
end
local result=0.0;
for _,v in pairs(labelScore) do
local prob=v/#dataSet;
result=result-prob*math.log(prob,2);
end
return result;
end
function splitDataSet(dataSet,axis,value)
local reDataSet={};
for _,v in pairs(dataSet) do
if v[axis]==value then
local reSet=table.pack(table.unpack(v,1,axis-1),
table.unpack(v,axis+1,#v));
if (reSet[1]==nil) then
table.remove(reSet,1);
end
if (reSet[#reSet]==nil) then
table.remove(reSet);
end
table.insert(reDataSet,reSet);
end
end
return reDataSet;
end
function chooseBest(dataSet)
local numFeaures=#dataSet[1]-1;
local baseEnt=calcShannonEnt(dataSet);
local maxGain,bestFeature=0,-1;
for i=1,numFeaures do
local featSet={};
local newGain=0;
for _,v in pairs(dataSet) do
featSet[v[i]]=1;
end
for k,_ in pairs(featSet) do
local subDataSet=splitDataSet(dataSet,i,k);
local prob=#subDataSet/#dataSet;
newGain=newGain+prob*calcShannonEnt(subDataSet);
end
local infoGain=baseEnt-newGain;
if (infoGain>maxGain) then
maxGain=infoGain;
bestFeature=i;
end
end
return bestFeature;
end
function majorityCnt(classList)
local classCount={};
for _,v in pairs(classList) do
if (classCount[v]) then
classCount[v]=classCount[v]+1;
else
classCount[v]=1;
end
end
local maxN,maxK=0,nil;
for k,v in pairs(classCount) do
if (v>maxN) then
maxN,maxK=v,k;
end
end
return maxK;
end
function makeDTree(dataSet,labels)
local classList={};
for _,v in pairs(dataSet) do
table.insert(classList,v[#v]);
end
local endFlag=true;
for _v in pairs(classList) do
if (not endFlag) then
break;
end
endFlag=endFlag and (v==classList[1]);
end
if (#dataSet[1]==1) then
return majorityCnt(classList);
end
local bestFeat=chooseBest(dataSet);
local bestFeatLabel=labels[bestFeat];
local myTree,featSet={},{};
for _,v in pairs(dataSet) do
featSet[v[bestFeat]]=1;
end
for k,_ in pairs(featSet) do
local subLabels=labels;
local subTree=makeDTree(splitDataSet(dataSet,bestFeat,k),subLabels);
myTree[bestFeatLabel][k]=subTree;
end
return myTree;
end
我称之为:
> labels = {'no surfacing','flippers'}
>数据集= {{1,1, '是'},{1,1, '是'},{1,0, '否'},{0,1, '否'},{0 ,如图1所示, '否'}}
> makeDTree(数据集时,标签)
DTree.lua:106:尝试索引零值(字段'?')
堆栈追溯:
DTree.lua:106:功能'makeDTree'
DTree.lua:105:功能'makeDTree'
(......尾调...)
[C]:在?
答案 0 :(得分:1)
第106行是myTree[bestFeatLabel][k]=subTree;
,但myTree
为空,因此myTree[bestFeatLabel]
为零。