如何获得J48大小和叶子数量

时间:2015-09-21 10:38:13

标签: r weka

如果我通过以下方式构建J48树:

library(RWeka)

fit <- J48(Species~., data=iris)

我得到以下结果:

> fit
J48 pruned tree
------------------

Petal.Width <= 0.6: setosa (50.0)
Petal.Width > 0.6
|   Petal.Width <= 1.7
|   |   Petal.Length <= 4.9: versicolor (48.0/1.0)
|   |   Petal.Length > 4.9
|   |   |   Petal.Width <= 1.5: virginica (3.0)
|   |   |   Petal.Width > 1.5: versicolor (3.0/1.0)
|   Petal.Width > 1.7: virginica (46.0/1.0)

Number of Leaves  :     5

Size of the tree :  9

我希望将Number of Leaves变为变量N(因此N将获得5),Size of the tree变为S (因此S将获得9)。

有没有办法直接从J48树获取此信息?

1 个答案:

答案 0 :(得分:2)

有趣的是,fit的{​​{1}}函数在.jcall中的print.Weka_classifier函数中创建,因为可以从getAnywhere(print.Weka_classifier)看到library(RWeka) fit <- J48(Species~., data=iris) #store the print output in a a <- capture.output(fit) > a [1] "J48 pruned tree" "------------------" [3] "" "Petal.Width <= 0.6: setosa (50.0)" [5] "Petal.Width > 0.6" "| Petal.Width <= 1.7" [7] "| | Petal.Length <= 4.9: versicolor (48.0/1.0)" "| | Petal.Length > 4.9" [9] "| | | Petal.Width <= 1.5: virginica (3.0)" "| | | Petal.Width > 1.5: versicolor (3.0/1.0)" [11] "| Petal.Width > 1.7: virginica (46.0/1.0)" "" [13] "Number of Leaves : \t5" "" [15] "Size of the tree : \t9" # get the output length, so that this can work for a tree # with any size/number of leaves out_length = length(a) # then save the number from the fourth to last element to N N <- as.numeric(gsub('\\D', '', a[out_length - 3])) #then save the number from second to last element to S S <- as.numeric(gsub('\\D', '', a[out_length - 1])) 。这使得从打印输出中提取值变得更加困难(但并非不可能)。

为了存储这两个值,您可以这样做:

> N
[1] 5
> S
[1] 9

你有它:

create table #x(
    id numeric(10) identity,
    col_name varchar(64),
    col_type varchar(64),
    table_name varchar(64)
)
delete #x
insert #x select 'D_Product','[decimal](19,6)','STAGIN_MEAA'
insert #x select 'D_Store  ','[decimal](19,6)','STAGIN_MEAA'
insert #x select 'D_Product','[decimal](19,6)','STAGIN_MEAA'
insert #x select 'D_Store  ','[decimal](19,6)','STAGIN_MEAA'
insert #x select 'D_Product','[decimal](19,6)','STAGIN_MEBB'
insert #x select 'D_Store  ','[decimal](19,6)','STAGIN_MEBB'
insert #x select 'D_Time   ','[decimal](19,6)','STAGIN_MEBB'
insert #x select 'D_Product','[decimal](19,6)','STAGIN_MEBB'
insert #x select 'D_Store  ','[decimal](19,6)','STAGIN_MEBB'
insert #x select 'D_Time   ','[decimal](19,6)','STAGIN_MEBB'

create table #y(
    line varchar(100)
)

delete from #y
declare @min_id numeric(10),
         @name varchar(64),
         @type varchar(64),
         @table varchar(64),
         @old_table varchar(64),
         @script varchar(1000)
select @min_id = 1

while @min_id is not null
begin
    select @name = col_name, @type = col_type, @table = table_name from #x where id = @min_id

    if @old_table is not null and @old_table != @table 
        insert #y select ") " + char(13) + char(10) +  "go" 

    if @old_table != @table
        insert #y select "create table " + @table + "("

    if @old_table = @table
        insert #y   select " ," + @name + " " + @type
    else
        insert #y   select "  " + @name + " " + @type

    select @old_table = @table
    select @min_id = min(id) from #x where id > @min_id
end

insert #y select ")"
insert #y select "go"

select line from #y

drop table #x
drop table #y