将类规则的对象转换为R中的数据帧

时间:2014-09-08 17:39:48

标签: r machine-learning data-mining apriori

我有一个apriori函数的输出,它可以挖掘数据并提供一组规则。我想将其转换为数据帧以进行进一步处理。 规则对象如下所示:

> inspect(output)
   lhs                          rhs                         support confidence     lift
1  {curtosis=(846,1.27e+03]} => {skewness=(-0.254,419]}   0.2611233  0.8044944 2.418776
2  {variance=(892,1.34e+03]} => {notes.class=FALSE}       0.3231218  0.9888393 1.781470
3  {variance=(-0.336,446]}   => {notes.class=TRUE}        0.2859227  0.8634361 1.940608
4  {skewness=(837,1.26e+03]} => {notes.class=FALSE}       0.2924872  0.8774617 1.580815
5  {entropy=(-0.155,386],                                                              
    class=FALSE}       => {skewness=(837,1.26e+03]} 0.1597374  0.9521739 2.856522
6  {variance=(-0.336,446],                                                             
curtosis=(846,1.27e+03]} => {skewness=(-0.254,419]}   0.1378556  0.8325991 2.503275

我们可以使用数据框创建规则对象。数据框如下所示:

> data
        variance       skewness       curtosis        entropy notes.class
1 (892,1.34e+03] (837,1.26e+03]   (-0.268,424]      (386,771]       FALSE
2 (892,1.34e+03]   (-0.254,419]      (424,846] (771,1.16e+03]       FALSE
3 (892,1.34e+03] (837,1.26e+03]   (-0.268,424]   (-0.155,386]       FALSE
4      (446,892]   (-0.254,419] (846,1.27e+03]      (386,771]       FALSE

我们可以使用这个来获取输出变量:

> output <- apriori(data)

使用了arules包。 dput(output)给出了这个:

new("rules"
    , lhs = new("itemMatrix"
    , data = new("ngCMatrix"
    , i = c(8L, 2L, 0L, 5L, 9L, 12L, 0L, 8L, 0L, 3L, 0L, 8L, 8L, 13L, 8L, 
10L, 3L, 10L, 8L, 11L, 8L, 13L, 3L, 12L, 2L, 5L, 2L, 6L, 2L, 
5L, 2L, 6L, 2L, 10L, 2L, 7L, 2L, 11L, 0L, 3L, 0L, 10L, 0L, 7L, 
11L, 13L, 5L, 6L, 6L, 12L, 5L, 10L, 1L, 5L, 4L, 6L, 6L, 13L, 
0L, 3L, 8L, 0L, 8L, 13L, 3L, 8L, 13L, 0L, 3L, 13L, 2L, 5L, 6L, 
2L, 5L, 12L, 2L, 6L, 12L)
    , p = c(0L, 1L, 2L, 3L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L, 22L, 
24L, 26L, 28L, 30L, 32L, 34L, 36L, 38L, 40L, 42L, 44L, 46L, 48L, 
50L, 52L, 54L, 56L, 58L, 61L, 64L, 67L, 70L, 73L, 76L, 79L)
    , Dim = c(14L, 38L)
    , Dimnames = list(NULL, NULL)
    , factors = list()
)
    , itemInfo = structure(list(labels = structure(c("variance=(-0.336,446]", 
"variance=(446,892]", "variance=(892,1.34e+03]", "skewness=(-0.254,419]", 
"skewness=(419,837]", "skewness=(837,1.26e+03]", "curtosis=(-0.268,424]", 
"curtosis=(424,846]", "curtosis=(846,1.27e+03]", "entropy=(-0.155,386]", 
"entropy=(386,771]", "entropy=(771,1.16e+03]", "notes.class=FALSE", 
"notes.class=TRUE"), class = "AsIs"), variables = structure(c(5L, 
5L, 5L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L), .Label = c("curtosis", 
"entropy", "notes.class", "skewness", "variance"), class = "factor"), 
    levels = structure(c(4L, 8L, 12L, 2L, 6L, 10L, 3L, 7L, 11L, 
    1L, 5L, 9L, 13L, 14L), .Label = c("(-0.155,386]", "(-0.254,419]", 
    "(-0.268,424]", "(-0.336,446]", "(386,771]", "(419,837]", 
    "(424,846]", "(446,892]", "(771,1.16e+03]", "(837,1.26e+03]", 
    "(846,1.27e+03]", "(892,1.34e+03]", "FALSE", "TRUE"), class = "factor")), .Names = c("labels", 
"variables", "levels"), row.names = c(NA, -14L), class = "data.frame")
    , itemsetInfo = structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame")
)
    , rhs = new("itemMatrix"
    , data = new("ngCMatrix"
    , i = c(3L, 12L, 13L, 12L, 5L, 3L, 8L, 13L, 0L, 3L, 8L, 3L, 3L, 8L, 
6L, 5L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 3L, 12L, 5L, 
12L, 12L, 13L, 4L, 13L, 3L, 0L, 8L, 12L, 6L, 5L)
    , p = 0:38
    , Dim = c(14L, 38L)
    , Dimnames = list(NULL, NULL)
    , factors = list()
)
    , itemInfo = structure(list(labels = structure(c("variance=(-0.336,446]", 
"variance=(446,892]", "variance=(892,1.34e+03]", "skewness=(-0.254,419]", 
"skewness=(419,837]", "skewness=(837,1.26e+03]", "curtosis=(-0.268,424]", 
"curtosis=(424,846]", "curtosis=(846,1.27e+03]", "entropy=(-0.155,386]", 
"entropy=(386,771]", "entropy=(771,1.16e+03]", "notes.class=FALSE", 
"notes.class=TRUE"), class = "AsIs"), variables = structure(c(5L, 
5L, 5L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L), .Label = c("curtosis", 
"entropy", "notes.class", "skewness", "variance"), class = "factor"), 
    levels = structure(c(4L, 8L, 12L, 2L, 6L, 10L, 3L, 7L, 11L, 
    1L, 5L, 9L, 13L, 14L), .Label = c("(-0.155,386]", "(-0.254,419]", 
    "(-0.268,424]", "(-0.336,446]", "(386,771]", "(419,837]", 
    "(424,846]", "(446,892]", "(771,1.16e+03]", "(837,1.26e+03]", 
    "(846,1.27e+03]", "(892,1.34e+03]", "FALSE", "TRUE"), class = "factor")), .Names = c("labels", 
"variables", "levels"), row.names = c(NA, -14L), class = "data.frame")
    , itemsetInfo = structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame")
)
    , quality = structure(list(support = c(0.261123267687819, 0.323121808898614, 
0.285922684172137, 0.292487235594457, 0.159737417943107, 0.137855579868709, 
0.137855579868709, 0.142231947483589, 0.142231947483589, 0.110138584974471, 
0.110138584974471, 0.12399708242159, 0.153902261123268, 0.107221006564551, 
0.13056163384391, 0.13056163384391, 0.150984682713348, 0.139314369073669, 
0.100656455142232, 0.107221006564551, 0.154631655725748, 0.165572574762947, 
0.112326768781911, 0.105762217359592, 0.12180889861415, 0.181619256017505, 
0.181619256017505, 0.102844638949672, 0.105762217359592, 0.12837345003647, 
0.12837345003647, 0.137855579868709, 0.137855579868709, 0.137855579868709, 
0.137855579868709, 0.13056163384391, 0.13056163384391, 0.13056163384391
), confidence = c(0.804494382022472, 0.988839285714286, 0.863436123348018, 
0.87746170678337, 0.952173913043478, 0.832599118942731, 0.832599118942731, 
0.859030837004405, 0.898617511520737, 0.853107344632768, 0.915151515151515, 
0.80188679245283, 0.972350230414747, 0.885542168674699, 0.864734299516908, 
0.913265306122449, 1, 0.974489795918367, 1, 1, 0.990654205607477, 
1, 0.980891719745223, 0.873493975903614, 0.814634146341463, 0.943181818181818, 
0.950381679389313, 1, 0.92948717948718, 0.931216931216931, 0.897959183673469, 
1, 0.969230769230769, 0.895734597156398, 0.832599118942731, 1, 
0.864734299516908, 0.93717277486911), lift = c(2.41877587226493, 
1.78146998779801, 1.94060807395104, 1.580814717477, 2.85652173913043, 
2.50327498261071, 2.56515369004603, 1.93070701234925, 2.71366653809456, 
2.56493458221826, 2.81948927477017, 2.41093594836147, 2.92344773223381, 
2.72826587247868, 2.58853870008227, 2.73979591836735, 1.80157687253614, 
1.75561827884899, 1.80157687253614, 1.80157687253614, 1.78473970550309, 
2.24754098360656, 2.20459434060771, 1.96321350977681, 2.44926187419769, 
1.69921455023295, 2.85114503816794, 1.80157687253614, 1.67454260588295, 
2.09294821753838, 2.68799572230639, 2.24754098360656, 2.91406882591093, 
2.70496064471679, 2.56515369004603, 1.80157687253614, 2.58853870008227, 
2.81151832460733)), row.names = c(NA, 38L), .Names = c("support", 
"confidence", "lift"), class = "data.frame")
    , info = structure(list(data = data, ntransactions = 1371L, support = 0.1, 
    confidence = 0.8), .Names = c("data", "ntransactions", "support", 
"confidence"))
)

3 个答案:

答案 0 :(得分:7)

我们无法从您的问题中复制您的数据(哦,您刚刚在我输入时添加了数据!抱歉!),所以我将使用arules包中的示例:

library('arules');

data("Adult")
## Mine association rules.
rules <- apriori(Adult, 
                 parameter = list(supp = 0.5, conf = 0.9,
                                  target = "rules"))

然后我可以复制inspect(rules)输出的东西:

> ruledf = data.frame(
       lhs = labels(lhs(rules))$elements,
       rhs = labels(rhs(rules))$elements, 
       rules@quality)
> head(ruledf)
                         lhs                 rhs   support confidence      lift
1                         {} {capital-gain=None} 0.9173867  0.9173867 1.0000000
2                         {} {capital-loss=None} 0.9532779  0.9532779 1.0000000
3 {hours-per-week=Full-time} {capital-gain=None} 0.5435895  0.9290688 1.0127342
4 {hours-per-week=Full-time} {capital-loss=None} 0.5606650  0.9582531 1.0052191
5                 {sex=Male} {capital-gain=None} 0.6050735  0.9051455 0.9866565
6                 {sex=Male} {capital-loss=None} 0.6331027  0.9470750 0.9934931

通过减少lift来执行订单之类的操作:

head(ruledf[order(-ruledf$lift),])

规则类的帮助:http://www.rdocumentation.org/packages/arules/functions/rules-class.html将告诉您从规则对象中可以获得什么 - 我只是使用该信息来构建数据框。如果它不完全是你想要的,那就用你自己的食谱做一个!

答案 1 :(得分:4)

在数据成人

中运行apriori
rules <- apriori(Adult, parameter = list(supp = 0.5, conf = 0.9, target = 
"rules"))

检查LHS,RHS,支持,信心和提升

arules::inspect(rules)

创建数据框

df = data.frame(
  lhs = labels(lhs(rules)),
  rhs = labels(rhs(rules)), 
  rules@quality)

查看新数据框中的前6行

head(df)

答案 2 :(得分:1)

这可以解决问题

rules_dataframe <- as(output, 'data.frame')