我必须创建一个修改过的数据集,将一些行组合在一起。我需要3个要求:
(查看下面的图片以了解列的名称)
我可以手动执行此操作,如您在图像中所见,但是我找不到缩放比例的方法。
任何人都可以帮助什么样的数据处理工具来解决此类问题?
要复制第一个数据集:
structure(list(date_time = structure(c(1517516099, 1517516099,
1517516099, 1517516099, 1517516095, 1517516092, 1517516092, 1517516092,
1517516092, 1517516092, 1517516092, 1517516088, 1517516084, 1517516081,
1517516074, 1517516073, 1517516071, 1517516068, 1517516061, 1517516053
), class = c("POSIXct", "POSIXt"), tzone = ""), Buyer_from = c("127 - TULLETT PREBON",
"127 - TULLETT PREBON", "127 - TULLETT PREBON", "3 - XP Investimentos CCTVM S/A",
"85 - BTG Pactual CTVM S.A.", "85 - BTG Pactual CTVM S.A.", "147 - ATIVA INVESTIMENTOS S.A. CTCV",
"147 - ATIVA INVESTIMENTOS S.A. CTCV", "147 - ATIVA INVESTIMENTOS S.A. CTCV",
"147 - ATIVA INVESTIMENTOS S.A. CTCV", "147 - ATIVA INVESTIMENTOS S.A. CTCV",
"147 - ATIVA INVESTIMENTOS S.A. CTCV", "147 - ATIVA INVESTIMENTOS S.A. CTCV",
"115 - H.COMMCOR DTVM LTDA", "115 - H.COMMCOR DTVM LTDA", "8 - UBS BRASIL CCTVM S/A",
"3 - XP Investimentos CCTVM S/A", "8 - UBS BRASIL CCTVM S/A",
"8 - UBS BRASIL CCTVM S/A", "8 - UBS BRASIL CCTVM S/A"), Price = c(3176.5,
3176.5, 3176.5, 3176.5, 3177, 3177, 3177.5, 3177.5, 3177.5, 3177.5,
3177.5, 3177.5, 3177.5, 3177.5, 3177.5, 3178, 3178.5, 3178, 3178,
3178), Quantity = c(10, 5, 50, 5, 5, 5, 55, 5, 5, 5, 30, 70,
30, 10, 10, 5, 5, 10, 5, 10), Seller_from = c("85 - BTG Pactual CTVM S.A.",
"122 - BGC LIQUIDEZ DTVM", "85 - BTG Pactual CTVM S.A.", "3 - XP Investimentos CCTVM S/A",
"88 - CM Capital Markets CCTVM LTDA", "122 - BGC LIQUIDEZ DTVM",
"122 - BGC LIQUIDEZ DTVM", "122 - BGC LIQUIDEZ DTVM", "8 - UBS BRASIL CCTVM S/A",
"8 - UBS BRASIL CCTVM S/A", "92 - RENASCENÇA DTVM LTDA", "92 - RENASCENÇA DTVM LTDA",
"85 - BTG Pactual CTVM S.A.", "85 - BTG Pactual CTVM S.A.", "122 - BGC LIQUIDEZ DTVM",
"122 - BGC LIQUIDEZ DTVM", "3 - XP Investimentos CCTVM S/A",
"77 - CITIGROUP GMB CCTVM S.A.", "386 - RICO INVESTIMENTOS - GRUPO XP",
"386 - RICO INVESTIMENTOS - GRUPO XP"), Type = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 4L, 4L, 1L,
4L, 4L, 4L), .Label = c("Buyer", "3", "4", "Seller"), class = "factor")), row.names = c(NA,
20L), class = "data.frame")
答案 0 :(得分:1)
这将摆脱您的Seller_from列,但会将其汇总为您的规范:
library(dplyr)
df %>% group_by(date_time, Price, Buyer_from, Type) %>% summarize(Quantity = sum(Quantity))
或在data.table
中library(data.table)
setDT(df)
df[,.(Quantity = sum(Quantity)),by=.(date_time,Price,Buyer_from,Type)]
使用Dplyr,我可以使您的卖方脱离专栏!
library(dplyr)
#identify what is repeated
df = df %>% group_by(date_time, Buyer_from, Price) %>% mutate(n = n())
#create 2 dataframes to distingiush what is repeated
once = filter(df, n==1)
dups = filter(df, n>1)
#collapse our duplicates
dups = dups %>% group_by(date_time,Buyer_from,Price,Type) %>% summarize(Quantity = sum(Quantity))
#add back our seller_from column
dups$Seller_from = NA
#remove unnecessary columns from nondups
once = select(once, date_time, Buyer_from, Price, Quantity, Seller_from, Type)
#bring everything back together
final = rbind(once,dups)
# Groups: date_time, Buyer_from, Price [14]
date_time Buyer_from Price Quantity Seller_from Type
<dttm> <chr> <dbl> <dbl> <chr> <fct>
1 2018-02-01 15:14:59 3 - XP Investimentos CCTVM S/A 3176. 5 3 - XP Investimentos CCTVM S/A Seller
2 2018-02-01 15:14:55 85 - BTG Pactual CTVM S.A. 3177 5 88 - CM Capital Markets CCTVM LTDA Seller
3 2018-02-01 15:14:52 85 - BTG Pactual CTVM S.A. 3177 5 122 - BGC LIQUIDEZ DTVM Seller
4 2018-02-01 15:14:48 147 - ATIVA INVESTIMENTOS S.A. CTCV 3178. 70 92 - RENASCENÇA DTVM LTDA Seller
5 2018-02-01 15:14:44 147 - ATIVA INVESTIMENTOS S.A. CTCV 3178. 30 85 - BTG Pactual CTVM S.A. Buyer
6 2018-02-01 15:14:41 115 - H.COMMCOR DTVM LTDA 3178. 10 85 - BTG Pactual CTVM S.A. Seller
7 2018-02-01 15:14:34 115 - H.COMMCOR DTVM LTDA 3178. 10 122 - BGC LIQUIDEZ DTVM Seller
8 2018-02-01 15:14:33 8 - UBS BRASIL CCTVM S/A 3178 5 122 - BGC LIQUIDEZ DTVM Seller
9 2018-02-01 15:14:31 3 - XP Investimentos CCTVM S/A 3178. 5 3 - XP Investimentos CCTVM S/A Buyer
10 2018-02-01 15:14:28 8 - UBS BRASIL CCTVM S/A 3178 10 77 - CITIGROUP GMB CCTVM S.A. Seller
11 2018-02-01 15:14:21 8 - UBS BRASIL CCTVM S/A 3178 5 386 - RICO INVESTIMENTOS - GRUPO XP Seller
12 2018-02-01 15:14:13 8 - UBS BRASIL CCTVM S/A 3178 10 386 - RICO INVESTIMENTOS - GRUPO XP Seller
13 2018-02-01 15:14:52 147 - ATIVA INVESTIMENTOS S.A. CTCV 3178. 45 NA Buyer
14 2018-02-01 15:14:52 147 - ATIVA INVESTIMENTOS S.A. CTCV 3178. 55 NA Seller
15 2018-02-01 15:14:59 127 - TULLETT PREBON 3176. 65 NA Seller