组合基于数据帧的子集的最低值和最高值

时间:2016-10-06 00:10:05

标签: r

我正在尝试合并数据框以从一个数据框中获取项目名称,最便宜的购买区域和最高销售区域。我试图在一步中使用merge执行此操作,但仍然遇到错误。有人有其他建议吗?

目标

item.name id buy.price buy.region sell.price sell.region
Isogen    37 82.02     Amarr      434.37     Jita
Nocxium   38 395.00    Amarr      449.27     Jita
....

DATA

> eve.data[150:160,]
     buy.sell item.id region      price                date item.name
76          s      37  Amarr   99.94956 2016-10-05 22:30:42    Isogen
1077        b      37   Jita   83.08134 2016-10-05 23:05:27    Isogen
1078        b      37  Amarr   82.02000 2016-10-05 22:30:42    Isogen
77          s      38   Jita  434.37451 2016-10-05 23:05:27   Nocxium
78          s      38  Amarr  449.24791 2016-10-05 22:30:42   Nocxium
1079        b      38   Jita  421.00000 2016-10-05 23:05:27   Nocxium
1080        b      38  Amarr  395.00000 2016-10-05 22:30:42   Nocxium
79          s      39   Jita 1036.06204 2016-10-05 23:05:27   Zydrine
80          s      39  Amarr 1389.18975 2016-10-05 22:30:42   Zydrine
1081        b      39   Jita 1009.97722 2016-10-05 23:05:27   Zydrine
1082        b      39  Amarr 1063.52062 2016-10-05 22:30:42   Zydrine

我的代码:

x<-paste0(0:500,collapse=",")
eve.url<-paste0("http://eve-marketdata.com/api/item_prices2.txt?char_name=demo&type_ids=",x,"&region_ids=10000002,10000033&buysell=a")
eve.data<-read.table(url(eve.url),sep="\t",col.names=c("buy.sell","item.id","region","price","date"),stringsAsFactors=F)
eve.data$region[eve.data$region==10000002]<-"Jita"
eve.data$region[eve.data$region==10000033]<-"Amarr"
eve.data$item.name <- item.ids[match(eve.data$item.id, item.ids$typeID),2]
#doesnt work
group_by(eve.data$buy.sell)%>%mutate(low.buy.price=XXX,low.buy.region=XXX, high.sell.price=XXX,high.sell.region=XXX)

3 个答案:

答案 0 :(得分:1)

我确信在tidyverse内只需一步即可完成此操作,但由于我缺乏创造力,两步程序也有效。

  1. 创建一个持有低卖价的数据框。
  2. 创建一个持有高卖价的数据框。
  3. 将数据合并在一起。
  4. 可重复的例子是:

    library(readr)
    library(dplyr)
    
    x <- paste0(0:500,collapse=",")
    eve.url <- paste0("http://eve-marketdata.com/api/item_prices2.txt?char_name=demo&type_ids=",x,"&region_ids=10000002,10000033&buysell=a")
    
    eve.data <- read_tsv(url(eve.url), col_names=c("buy.sell", "item.id", "region", "price", "date"))
    eve.data$region[eve.data$region==10000002]<-"Jita"
    eve.data$region[eve.data$region==10000033]<-"Amarr"
    
    eve.data %>%
      filter(buy.sell == 'b') %>%
      group_by(buy.sell, item.id) %>%
      top_n(1, desc(price)) %>%
      ungroup %>%
      select(item.id, buy.region = region, low.buy.price = price) ->
      buys
    
    eve.data %>%
      filter(buy.sell == 's') %>%
      group_by(buy.sell, item.id) %>%
      top_n(1, price) %>%
      ungroup %>%
      select(item.id, sell.region = region, high.sell.price = price) ->
      sells
    
    buys %>%
      full_join(sells, by = c("item.id")) ->
      eve.merged
    

    我们可以像这样预览:

    eve.merged %>%
      filter(item.id %in% 37:39)
    
    # A tibble: 3 × 5
      item.id buy.region low.buy.price sell.region high.sell.price
        <int>      <chr>         <dbl>       <chr>           <dbl>
    1      37      Amarr        82.020       Amarr        99.94956
    2      38      Amarr       395.000       Amarr       449.24791
    3      39       Jita      1012.569       Amarr      1389.18975
    

答案 1 :(得分:0)

似乎我无法从网上获取相同的数据。我下载的表格中有很多价格为0。

但是,以下代码可以一步获取所有信息:

x <- eve.data %>% group_by(item.id) %>%
    summarise(
        low.buy.price = min(price[buy.sell == 'b']),
        low.buy.region = paste(region[buy.sell == 'b' & price == low.buy.price], collapse = ','),
        high.sell.price = max(price[buy.sell == 's']),
        high.sell.region = paste(region[buy.sell == 's' & price == high.sell.price], collapse = ',')
    )

答案 2 :(得分:0)

也可以将所有东西连在一起......

> dput(test)
  structure(list(buy.sell = structure(c(2L, 1L, 1L, 2L, 2L, 1L, 
  1L, 2L, 2L, 1L, 1L), .Label = c("b", "s"), class = "factor"), 
  id = c(37L, 37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L, 
  39L), region = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
  1L, 2L, 1L), .Label = c("Amarr", "Jita"), class = "factor"), 
  item.price = c(99.94956, 83.08134, 82.02, 434.37451, 449.24791, 
  421, 395, 1036.06204, 1389.18975, 1009.97722, 1063.52062), 
  date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
  1L), .Label = "5/10/2016", class = "factor"), time = structure(c(1L, 
  2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("22:30:42", 
  "23:05:27"), class = "factor"), item.name = structure(c(1L, 
  1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("Isogen", 
  "Nocxium", "Zydrine"), class = "factor")), .Names = c("buy.sell", 
  "id", "region", "item.price", "date", "time", "item.name"), class ="data.frame", row.names = c(NA, 
  -11L))

代码

full_join(tidyr::spread(test, buy.sell, item.price) %>%     
group_by(item.name) %>% 
dplyr::filter(b==min(b, na.rm=T)) %>% 
select(item.name,buy.region = region, buy.low.price=b), 
tidyr::spread(test, buy.sell, item.price) %>% 
group_by(item.name) %>% 
dplyr::filter(s==max(s, na.rm=T)) %>% 
select(item.name, sell.region = region, sell.high.price=s))

输出

Joining, by = "item.name"
Source: local data frame [3 x 5]
Groups: item.name [?]

  item.name buy.region buy.low.price sell.region sell.high.price
     <fctr>     <fctr>         <dbl>      <fctr>           <dbl>
1    Isogen      Amarr        82.020       Amarr        99.94956
2   Nocxium      Amarr       395.000       Amarr       449.24791
3   Zydrine       Jita      1009.977       Amarr      1389.18975