我正在尝试合并数据框以从一个数据框中获取项目名称,最便宜的购买区域和最高销售区域。我试图在一步中使用merge执行此操作,但仍然遇到错误。有人有其他建议吗?
目标
item.name id buy.price buy.region sell.price sell.region
Isogen 37 82.02 Amarr 434.37 Jita
Nocxium 38 395.00 Amarr 449.27 Jita
....
DATA
> eve.data[150:160,]
buy.sell item.id region price date item.name
76 s 37 Amarr 99.94956 2016-10-05 22:30:42 Isogen
1077 b 37 Jita 83.08134 2016-10-05 23:05:27 Isogen
1078 b 37 Amarr 82.02000 2016-10-05 22:30:42 Isogen
77 s 38 Jita 434.37451 2016-10-05 23:05:27 Nocxium
78 s 38 Amarr 449.24791 2016-10-05 22:30:42 Nocxium
1079 b 38 Jita 421.00000 2016-10-05 23:05:27 Nocxium
1080 b 38 Amarr 395.00000 2016-10-05 22:30:42 Nocxium
79 s 39 Jita 1036.06204 2016-10-05 23:05:27 Zydrine
80 s 39 Amarr 1389.18975 2016-10-05 22:30:42 Zydrine
1081 b 39 Jita 1009.97722 2016-10-05 23:05:27 Zydrine
1082 b 39 Amarr 1063.52062 2016-10-05 22:30:42 Zydrine
我的代码:
x<-paste0(0:500,collapse=",")
eve.url<-paste0("http://eve-marketdata.com/api/item_prices2.txt?char_name=demo&type_ids=",x,"®ion_ids=10000002,10000033&buysell=a")
eve.data<-read.table(url(eve.url),sep="\t",col.names=c("buy.sell","item.id","region","price","date"),stringsAsFactors=F)
eve.data$region[eve.data$region==10000002]<-"Jita"
eve.data$region[eve.data$region==10000033]<-"Amarr"
eve.data$item.name <- item.ids[match(eve.data$item.id, item.ids$typeID),2]
#doesnt work
group_by(eve.data$buy.sell)%>%mutate(low.buy.price=XXX,low.buy.region=XXX, high.sell.price=XXX,high.sell.region=XXX)
答案 0 :(得分:1)
我确信在tidyverse
内只需一步即可完成此操作,但由于我缺乏创造力,两步程序也有效。
可重复的例子是:
library(readr)
library(dplyr)
x <- paste0(0:500,collapse=",")
eve.url <- paste0("http://eve-marketdata.com/api/item_prices2.txt?char_name=demo&type_ids=",x,"®ion_ids=10000002,10000033&buysell=a")
eve.data <- read_tsv(url(eve.url), col_names=c("buy.sell", "item.id", "region", "price", "date"))
eve.data$region[eve.data$region==10000002]<-"Jita"
eve.data$region[eve.data$region==10000033]<-"Amarr"
eve.data %>%
filter(buy.sell == 'b') %>%
group_by(buy.sell, item.id) %>%
top_n(1, desc(price)) %>%
ungroup %>%
select(item.id, buy.region = region, low.buy.price = price) ->
buys
eve.data %>%
filter(buy.sell == 's') %>%
group_by(buy.sell, item.id) %>%
top_n(1, price) %>%
ungroup %>%
select(item.id, sell.region = region, high.sell.price = price) ->
sells
buys %>%
full_join(sells, by = c("item.id")) ->
eve.merged
我们可以像这样预览:
eve.merged %>%
filter(item.id %in% 37:39)
# A tibble: 3 × 5
item.id buy.region low.buy.price sell.region high.sell.price
<int> <chr> <dbl> <chr> <dbl>
1 37 Amarr 82.020 Amarr 99.94956
2 38 Amarr 395.000 Amarr 449.24791
3 39 Jita 1012.569 Amarr 1389.18975
答案 1 :(得分:0)
似乎我无法从网上获取相同的数据。我下载的表格中有很多价格为0。
但是,以下代码可以一步获取所有信息:
x <- eve.data %>% group_by(item.id) %>%
summarise(
low.buy.price = min(price[buy.sell == 'b']),
low.buy.region = paste(region[buy.sell == 'b' & price == low.buy.price], collapse = ','),
high.sell.price = max(price[buy.sell == 's']),
high.sell.region = paste(region[buy.sell == 's' & price == high.sell.price], collapse = ',')
)
答案 2 :(得分:0)
也可以将所有东西连在一起......
> dput(test)
structure(list(buy.sell = structure(c(2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L), .Label = c("b", "s"), class = "factor"),
id = c(37L, 37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L,
39L), region = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L), .Label = c("Amarr", "Jita"), class = "factor"),
item.price = c(99.94956, 83.08134, 82.02, 434.37451, 449.24791,
421, 395, 1036.06204, 1389.18975, 1009.97722, 1063.52062),
date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "5/10/2016", class = "factor"), time = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("22:30:42",
"23:05:27"), class = "factor"), item.name = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("Isogen",
"Nocxium", "Zydrine"), class = "factor")), .Names = c("buy.sell",
"id", "region", "item.price", "date", "time", "item.name"), class ="data.frame", row.names = c(NA,
-11L))
代码
full_join(tidyr::spread(test, buy.sell, item.price) %>%
group_by(item.name) %>%
dplyr::filter(b==min(b, na.rm=T)) %>%
select(item.name,buy.region = region, buy.low.price=b),
tidyr::spread(test, buy.sell, item.price) %>%
group_by(item.name) %>%
dplyr::filter(s==max(s, na.rm=T)) %>%
select(item.name, sell.region = region, sell.high.price=s))
输出
Joining, by = "item.name"
Source: local data frame [3 x 5]
Groups: item.name [?]
item.name buy.region buy.low.price sell.region sell.high.price
<fctr> <fctr> <dbl> <fctr> <dbl>
1 Isogen Amarr 82.020 Amarr 99.94956
2 Nocxium Amarr 395.000 Amarr 449.24791
3 Zydrine Jita 1009.977 Amarr 1389.18975