我希望将以下(我显示子集)表结构转换为下表的结构:
第一张表:
我感兴趣的表格格式(是的,我知道这些数字不匹配:):
请注意,对于每个月/每年,在第二个表中,有一个特定的行,按时间框架汇总所有产品的所有指标。
请记住,我对excel输出不感兴趣,但只对R数据框架感兴趣,所以请忽略Excel格式(仅用于说明)
感谢你提供任何帮助。
实际数据:
> dput(by_Category)
structure(list(month = c("June", "June", "June", "July", "July",
"July", "August", "August", "August", "September", "September",
"September", "October", "October", "October", "November", "November",
"November", "December", "December", "December", "January", "January",
"January"), year = c(2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L
), product = c("Brand", "Mortgage", "Checking", "Brand", "Mortgage",
"Checking", "Checking", "Brand", "Mortgage", "Mortgage", "Checking",
"Brand", "Brand", "Mortgage", "Checking", "Checking", "Brand",
"Mortgage", "Brand", "Mortgage", "Checking", "Checking", "Mortgage",
"Brand"), Impressions = c(11080657L, 765783L, 87563L, 21880672L,
507342L, 172617L, 104574L, 20292846L, 523052L, 214881L, 156690L,
17880970L, 21893633L, 121287L, 124176876L, 3568142L, 20973383L,
133835L, 32668854L, 299156L, 242516L, 101418L, 244092L, 13764648L
), Clicks = c(6239, 1138, 368, 13991, 1075, 863, 832, 14023,
1219, 795, 772, 11850, 13467, 491, 2576, 1053, 13163, 528, 18848,
832, 1276, 453, 526, 8115), Cost = c(275942.27, 85683.48, 15444.74,
557316.24, 51421.06, 25744.83, 14857.63, 411448.37, 23239.77,
16460.48, 17123.59, 371273.42, 633968.81, 8241.01, 274317.83,
29012.48, 591660.75, 10074.87, 935308.2, 29182.41, 33350.83,
8940.7, 26972.85, 343058.68), Leads = c(19, 4, 3, 24, 7, 8, 4,
16, 6, 11, 4, 28, 15, 1, 2, 5, 12, 9, 36, 7, 15, 1, 6, 14), Sales = c(10,
0, 3, 15, 0, 8, 4, 10, 0, 0, 4, 13, 9, 0, 2, 5, 7, 0, 23, 0,
15, 1, 0, 12), CTR = c(0.000563053255777162, 0.00148606067254039,
0.00420268835010221, 0.000639422774583889, 0.00211888627395327,
0.00499950758036578, 0.00795608851148469, 0.000691031706444724,
0.00233055222042933, 0.00369972217180672, 0.00492692577701193,
0.000662715725153613, 0.000615110338243087, 0.00404824919406037,
2.07446030450951e-05, 0.000295111573474374, 0.000627604998201768,
0.003945156349236, 0.000576940960341002, 0.00278115765687467,
0.00526150851902555, 0.00446666272259362, 0.00215492519214067,
0.000589553761200432), CR = c(0.00304535983330662, 0.00351493848857645,
0.00815217391304348, 0.00171538846401258, 0.00651162790697674,
0.00926998841251448, 0.00480769230769231, 0.00114098267132568,
0.00492206726825267, 0.0138364779874214, 0.00518134715025907,
0.00236286919831224, 0.00111383381599465, 0.00203665987780041,
0.00077639751552795, 0.00474833808167141, 0.000911646281242878,
0.0170454545454545, 0.00191001697792869, 0.00841346153846154,
0.0117554858934169, 0.0022075055187638, 0.0114068441064639, 0.00172520024645718
), CPL = c(14523.2773684211, 21420.87, 5148.24666666667, 23221.51,
7345.86571428571, 3218.10375, 3714.4075, 25715.523125, 3873.295,
1496.40727272727, 4280.8975, 13259.765, 42264.5873333333, 8241.01,
137158.915, 5802.496, 49305.0625, 1119.43, 25980.7833333333,
4168.91571428571, 2223.38866666667, 8940.7, 4495.475, 24504.1914285714
), Position = c(1.00284364890993, 3.24110957263005, 1.95605621399421,
1.00642818576169, 3.60166775110044, 2.07929174421411, 1.98501600233384,
1.01374247742477, 3.52345002411718, 3.41760988348958, 2.13315118103254,
1.01430764088476, 1.01265558628881, 3.48525875816657, 2.08975092018214,
2.20535265918782, 1.01536444417941, 3.24182759049732, 1.01389272247512,
3.19728235559245, 2.04401910200155, 2.05710547377685, 2.80872445526243,
1.01353081607188)), .Names = c("month", "year", "product", "Impressions",
"Clicks", "Cost", "Leads", "Sales", "CTR", "CR", "CPL", "Position"
), class = c("data.table", "data.frame"), row.names = c(NA, -24L
), .internal.selfref = <pointer: 0x0000000000220788>)
答案 0 :(得分:3)
我猜你首先必须创建月度摘要,然后将其绑定到数据然后重新排序。请参阅下面的示例。
# create summary by month and year (here I used the sum everywhere, you may want to change that)
dtsums <- by_Category[ , lapply(.SD, sum), by=list(month, year), .SDcols=-'product']
# so you can rbind it
dtsums[, product := '']
# binding
res <- rbind(by_Category, dtsums)
# change months so you can order by month
res[, month := factor(month, levels=month.name)]
# order by year, month and product
setkey(res, 'year', 'month', 'product')
# show the result
head(res)
## month year product Impressions Clicks Cost Leads Sales CTR CR CPL Position
## 1: June 2014 11934003 7745 377070.49 26 13 0.0062518023 0.0147124722 41092.394 6.200009
## 2: June 2014 Brand 11080657 6239 275942.27 19 10 0.0005630533 0.0030453598 14523.277 1.002844
## 3: June 2014 Checking 87563 368 15444.74 3 3 0.0042026884 0.0081521739 5148.247 1.956056
## 4: June 2014 Mortgage 765783 1138 85683.48 4 0 0.0014860607 0.0035149385 21420.870 3.241110
## 5: July 2014 22560631 15929 634482.13 39 23 0.0077578166 0.0174970048 33785.479 6.687388
## 6: July 2014 Brand 21880672 13991 557316.24 24 15 0.0006394228 0.0017153885 23221.510 1.006428
除了总和之外,您可以例如有时使用和和其他时间的平均值,例如
dtsums <- by_Category[ , c(lapply(.SD[, list(Impressions, Clicks, Cost, Leads, Sales)], sum),
lapply(.SD[, list(CTR, CR, CPL, Position)], mean)),
by=list(month, year)]