LHS:RHS与data.table中的功能

时间:2015-12-14 20:17:34

标签: r data.table

为什么功能性':='不使用'按'汇总唯一行然而LHS:RHS使用'进行聚合。下面是一个包含58个变量的20行数据的.csv文件。一个简单的复制,粘贴,delim = .csv工作。我仍在尝试找到将样本数据发布到SO的最佳方法。我的代码的两个变体是:

prodMatrix <- so.sample[, ':=' (Count = .N), by = eval(names(so.sample)[2:28])]  

---此版本不使用by ---

聚合rowID
prodMatrix <- so.sample[, (Count = .N), by = eval(names(so.sample)[2:28])]  

---这个版本使用---

汇总了rowID
"CID","NetIncome_length_Auto Advantage","NetIncome_length_Certificates","NetIncome_length_Comm. Share Draft","NetIncome_length_Escrow Shares","NetIncome_length_HE Fixed","NetIncome_length_HE Variable","NetIncome_length_Holiday Club","NetIncome_length_IRA Certificates","NetIncome_length_IRA Shares","NetIncome_length_Indirect Balloon","NetIncome_length_Indirect New","NetIncome_length_Indirect RV","NetIncome_length_Indirect Used","NetIncome_length_Loanline/CR","NetIncome_length_New Auto","NetIncome_length_Non-Owner","NetIncome_length_Personal","NetIncome_length_Preferred Plus Shares","NetIncome_length_Preferred Shares","NetIncome_length_RV","NetIncome_length_Regular Shares","NetIncome_length_S/L Fixed","NetIncome_length_S/L Variable","NetIncome_length_SBA","NetIncome_length_Share Draft","NetIncome_length_Share/CD Secured","NetIncome_length_Used Auto","NetIncome_sum_Auto Advantage","NetIncome_sum_Certificates","NetIncome_sum_Comm. Share Draft","NetIncome_sum_Escrow Shares","NetIncome_sum_HE Fixed","NetIncome_sum_HE Variable","NetIncome_sum_Holiday Club","NetIncome_sum_IRA Certificates","NetIncome_sum_IRA Shares","NetIncome_sum_Indirect Balloon","NetIncome_sum_Indirect New","NetIncome_sum_Indirect RV","NetIncome_sum_Indirect Used","NetIncome_sum_Loanline/CR","NetIncome_sum_New Auto","NetIncome_sum_Non-Owner","NetIncome_sum_Personal","NetIncome_sum_Preferred Plus Shares","NetIncome_sum_Preferred Shares","NetIncome_sum_RV","NetIncome_sum_Regular Shares","NetIncome_sum_S/L Fixed","NetIncome_sum_S/L Variable","NetIncome_sum_SBA","NetIncome_sum_Share Draft","NetIncome_sum_Share/CD Secured","NetIncome_sum_Used Auto","totNI","Count","totalNI"
93,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,-123.2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,212.97,0,0,0,-71.36,0,0,0,49.01,0,0,67.42,6,404.52
114,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,4,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14.54,0,0,0,0,0,-285.44,0,0,0,49.01,0,0,-221.89,90,-19970.1
1112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60.23,0,0,0,0,-101.55,0,-71.36,0,0,0,98.02,0,0,-14.66,28,-410.48
5366,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85
6078,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,7,0,0,0,1,0,0,0,0,0,0,0,0,-17.44,0,0,0,0,0,0,0,14.54,0,0,0,0,0,-499.52,0,0,0,49.01,0,0,-453.41,3,-1360.23
11684,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85
47358,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,-14.43,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-85.79,3194,-274013.26
193761,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-101.55,0,-71.36,0,0,0,49.01,0,0,-123.9,9973,-1235654.7
232530,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85
604897,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85
1021309,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-71.36,43262,-3087176.32
1023633,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-71.36,43262,-3087176.32
1029726,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60.23,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,37.88,8688,329101.44
1040005,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85
1040092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85
1064453,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14.54,0,212.97,0,0,0,-142.72,0,0,0,0,0,0,84.79,49,4154.71
1067508,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,-123.2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-194.56,4162,-809758.72
1080303,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-71.36,43262,-3087176.32
1181005,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-101.55,0,-142.72,0,0,0,98.02,0,0,-146.25,614,-89797.5
1200484,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-101.55,0,-285.44,0,0,0,0,0,0,-386.99,50,-19349.5

1 个答案:

答案 0 :(得分:0)

因为:=通过引用进行操作。这意味着它不会调用数据集的内存副本,但它会就地更新它 对数据集进行聚合是其原始未聚合形式的副本 您可以在Reference semantics小插图中了解有关它的更多信息。

这是data.table中的一个设计概念,:=用于按引用更新和其他形式 - .()list()或直接表达式用于查询数据。并且查询数据不是通过引用操作的 by reference 操作无法聚合行,它只能计算聚合并将其放入数据集中。查询能够聚合数据集,因为查询结果与原始data.table中的内存对象不同。