该代码正在抓取网站以获取股票数据,并为每只股票返回1x18数据框。我正在尝试将数据帧转换为向量,而不将数字列转换为正在发生的因素。我也尝试将数据框转换为矩阵,但数值列仍在转换为因数。总之,我想将所有字符都保留为矢量,而字符应保留为数字。谢谢。
#get.dates is a function I created to scrape
data = get.dates("AAPL")
class(data)
[1] "data.frame"
class(data$surprise)
[1] "numeric"
dput(data)
structure(list(date = "2019-05-07T00:00:00", company = "Apple",
ticker = "AAPL", periodEnding = "Mar 2019", eps = "2.37",
reportedEPS = NA_character_, lastEps = "2.73", consensus = 4L,
bpConsensus = 4L, ratingsAndPT = structure(list(priceTarget = 177.34,
numBuys = 17L, numHolds = 18L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), bpRatingsAndPT = structure(list(priceTarget = 176.88,
numBuys = 14L, numHolds = 14L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), marketCap = 827573630900, sector = 18731L, stockId = 7624L,
stockTypeId = 1L, surprise = NA_real_, timeOfDay = 4L, isConfirmed = FALSE), class = "data.frame", row.names = c(NA,
-1L))
data = unlist(data)
class(data)
[1] "character"
因此,最终输出是将每个输出重新绑定到单个data.frame中。 我想我必须将每个1x18数据帧转换为rbind向量,因为在尝试使用foreach包rbind列时出现错误。
tickers = c("AAPL", "PEP", "KO")
system.time({
data = foreach(r = tickers, .packages = c("jsonlite", "dplyr"), .combine = rbind) %dopar% {get.dates(r)}
})
error calling combine function:
<simpleError in `.rowNamesDF<-`(x, value = value): duplicate 'row.names' are not allowed>
user system elapsed
0.02 0.00 0.56
Warning message:
non-unique value when setting 'row.names': ‘1’
print(data)
NULL
#I will do the same thing outside of the foreach loop to give some more context
data = lapply(tickers, get.dates)
do.call(rbind, data)
Error in `.rowNamesDF<-`(x, value = value) :
duplicate 'row.names' are not allowed
In addition: Warning message:
non-unique value when setting 'row.names': ‘1’
dput(data)
list(structure(list(date = "2019-05-07T00:00:00", company = "Apple",
ticker = "AAPL", periodEnding = "Mar 2019", eps = "2.37",
reportedEPS = NA_character_, lastEps = "2.73", consensus = 4L,
bpConsensus = 4L, ratingsAndPT = structure(list(priceTarget = 177.34,
numBuys = 17L, numHolds = 18L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), bpRatingsAndPT = structure(list(priceTarget = 176.88,
numBuys = 14L, numHolds = 14L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), marketCap = 827573630900, sector = 18731L, stockId = 7624L,
stockTypeId = 1L, surprise = NA_real_, timeOfDay = 4L, isConfirmed = FALSE), class = "data.frame", row.names = c(NA,
-1L)), structure(list(date = "2019-04-23T00:00:00", company = "Coca-Cola",
ticker = "KO", periodEnding = "Mar 2019", eps = "0.46", reportedEPS = NA_character_,
lastEps = "0.47", consensus = 4L, bpConsensus = 5L, ratingsAndPT = structure(list(
priceTarget = 50.89, numBuys = 4L, numHolds = 5L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), bpRatingsAndPT = structure(list(priceTarget = 51.25,
numBuys = 3L, numHolds = 1L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), marketCap = 193681840000, sector = 18731L, stockId = 8359L,
stockTypeId = 1L, surprise = NA_real_, timeOfDay = 4L, isConfirmed = FALSE), class = "data.frame", row.names = c(NA,
-1L)), structure(list(date = "2019-04-25T00:00:00", company = "PepsiCo",
ticker = "PEP", periodEnding = "Mar 2019", eps = "0.92",
reportedEPS = NA_character_, lastEps = "0.96", consensus = 4L,
bpConsensus = 4L, ratingsAndPT = structure(list(priceTarget = 123.67,
numBuys = 4L, numHolds = 3L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), bpRatingsAndPT = structure(list(priceTarget = 126,
numBuys = 1L, numHolds = 1L, numSells = 0L), class = "data.frame", row.names = c(NA,
-1L)), marketCap = 163697620000, sector = 18731L, stockId = 10962L,
stockTypeId = 1L, surprise = NA_real_, timeOfDay = 4L, isConfirmed = FALSE), class = "data.frame", row.names = c(NA,
-1L)))
答案 0 :(得分:1)
基本上,您必须在这里自己整理列表,这是不希望的。最初获取json数据时,这样做更容易。 https://rdrr.io/cran/jsonlite/man/flatten.html
以下解决方案用户purrr
,但您可以使用for循环来执行此操作,也可以根据需要应用函数。这里有两个主要想法:
1.将数据框类型的列与数据框的没有任何嵌套列的部分绑定在一起。在您的示例中,我们将3个单独的部分绑定在一起:1个原始数据帧(删除了df_cols),另两个数据帧列。您可以使用bind_cols
进行此操作。它有助于在原始列名之前添加前缀,以避免重复。
2.用rbind等折叠所有行。
flatten_df_cols <- function(df) {
df_cols <- map_lgl(df, is.data.frame)
imap_dfc(df[, df_cols], ~setNames(.x, paste0(.y, ".", names(.x)))) %>%
bind_cols(list(df[, !df_cols]), .)
}
map_dfr(data, flatten_df_cols)
Observations: 3
Variables: 24
$ date <chr> "2019-05-07T00:00:00", "2019-04...
$ company <chr> "Apple", "Coca-Cola", "PepsiCo"
$ ticker <chr> "AAPL", "KO", "PEP"
$ periodEnding <chr> "Mar 2019", "Mar 2019", "Mar 2019"
$ eps <chr> "2.37", "0.46", "0.92"
$ reportedEPS <chr> NA, NA, NA
$ lastEps <chr> "2.73", "0.47", "0.96"
$ consensus <int> 4, 4, 4
$ bpConsensus <int> 4, 5, 4
$ marketCap <dbl> 827573630900, 193681840000, 163...
$ sector <int> 18731, 18731, 18731
$ stockId <int> 7624, 8359, 10962
$ stockTypeId <int> 1, 1, 1
$ surprise <dbl> NA, NA, NA
$ timeOfDay <int> 4, 4, 4
$ isConfirmed <lgl> FALSE, FALSE, FALSE
$ ratingsAndPT.priceTarget <dbl> 177.34, 50.89, 123.67
$ ratingsAndPT.numBuys <int> 17, 4, 4
$ ratingsAndPT.numHolds <int> 18, 5, 3
$ ratingsAndPT.numSells <int> 0, 0, 0
$ bpRatingsAndPT.priceTarget <dbl> 176.88, 51.25, 126.00
$ bpRatingsAndPT.numBuys <int> 14, 3, 1
$ bpRatingsAndPT.numHolds <int> 14, 1, 1
$ bpRatingsAndPT.numSells <int> 0, 0, 0