将数据框列表与一列字符组合在一起

时间:2015-10-22 05:25:14

标签: r rbind

我正在学习如何获取,清理和组合数据。我很困惑,为什么在循环rbind命令中返回10个数据而不是预期的30个数据,就像我手动组合时一样(i by i)。

library(XML)
mergeal <- NULL 
tabnums <- 3
for (i in 1:length(tabnums)) {
bnn <- paste0("http://www.ngchanmau.com/listing_browse.php?cur_page=", 
              tabnums[i], "&&coming=22-Oct-2015&coming=22-Oct-2015")
tem <- readHTMLTable(bnn, header=T, stringsAsFactors=F)
#data cleaning
ff <- tem[8]   #wanted data
ff1 <- as.data.frame(ff)
ff2 <- ff1[ , 1]         #get 1st col data only
ff3 <- unique(ff2)
ff4 <- ff3[c(2,5:13)]    #wanted list only
#merging dataset
mergeal <- rbind(mergeal, ff4)
}

我已尝试使用list rbind list of data frames with one column of characters and numerics,但仍然具有与上述相同的结果。感谢我对错过的任何帮助。

1 个答案:

答案 0 :(得分:0)

我清理了数据,因为我很无聊。

library(plyr)
library(XML)
library(dplyr)
library(magrittr)
library(stringi)
library(tidyr)
library(lubridate)

answer = 
  data_frame(tabnums = 1:3) %>%
  group_by(tabnums) %>%
  do(.$tabnums %>%
       paste0("http://www.ngchanmau.com/listing_browse.php?cur_page=", 
              ., "&&coming=22-Oct-2015&coming=22-Oct-2015") %>%
       readHTMLTable(header = T, stringsAsFactors = F) %>%
       extract2(8)) %>%
  ungroup %>%
  select(V1) %>%
  distinct %>%
  mutate(V1 = 
           V1 %>%
           stri_replace_all_fixed("Â", "\n") %>%
           stri_replace_all_fixed("Type:", "\nType:") %>%
           stri_replace_all_fixed("Time:", "\nTime:") %>%
           stri_replace_all_fixed("Area:", "\nArea:") %>%
           stri_split_fixed("\n")) %>%
  unnest(V1) %>%
  mutate(V1 = V1 %>% stri_trim) %>%
  filter(V1 %>% stri_detect_regex("^There are currently") %>% `!`) %>%
  filter(V1 != "") %>%
  separate(V1, c("variable", "value"), sep = ":", fill = "left") %>%
  mutate(variable = variable %>% mapvalues(NA, "Description"),
         ID = variable %>% `==`("Description") %>% cumsum) %>%
  spread(variable, value) %>%
  mutate(Area = Area %>% extract_numeric,
         Price = Price %>% extract_numeric,
         Datetime = 
           Time %>% 
           stri_replace_all_fixed("a.m.", "am") %>%
           stri_replace_all_fixed("p.m.", "pm") %>%
           paste(Date, .) %>%
           dmy_hm) %>%
  select(-Date, -Time)