当我尝试在R中创建表格然后将该文件导出为excel时,为什么我一直收到错误。我收到一个错误:
write.table中的错误(data1,“data1.csv”,col.names = NA,sep =“,”,dec =“。”,'EncodeElement'中未实现的类型'list'
我知道我需要压扁列表,但我不确定如何压扁它。
我的代码:
library(httr)
library(jsonlite)
library(xml2)
library("rio")
library("magrittr")
query <- "http://api.erg.kcl.ac.uk/AirQuality/Information/MonitoringSiteSpecies/GroupName=London/Json"
out <- GET(url=query)
http_status(out)
data1 <- content(out)
data1 <- fromJSON( "http://api.erg.kcl.ac.uk/AirQuality/Information/MonitoringSiteSpecies/GroupName=London/Json")
data1 <- as.data.frame(data1)
write.table(data1, file="data1.csv", row.names=FALSE, na="", col.names = FALSE, sep=",")
View(data1)
setwd("F:/")
write.csv(data1,'data1.csv')
答案 0 :(得分:1)
试试这个:
data1<-data.frame(lapply(data1, as.character), stringsAsFactors=FALSE)
write.table(data1, file="data1.csv", row.names=FALSE, na="", col.names = FALSE, sep=",")
答案 1 :(得分:1)
将httr::GET()
请求的内容从json转换为r对象需要一些数据操作步骤,然后才能导出为csv文件。
httr::GET()
返回多个对象# install necessary packages
install.packages( pkgs = c( "httr", "jsonlite", "magrittr" ))
# load necessary packages
library( httr )
library( jsonlite )
library( magrittr )
# store query
query <- "http://api.erg.kcl.ac.uk/AirQuality/Information/MonitoringSiteSpecies/GroupName=London/Json"
# GET the query
out <- httr::GET( url = query )
# base method
# Convert content from raw bytes to character
contents.out.base <- base::rawToChar( x = out$content )
# examine the first 30 characters
# from the contents in JSON form
base::substr( x = contents.out.base
, start = 0
, stop = 30
)
# [1] "{\"Sites\":{\"Site\":[{\"@LocalAuth"
# transfrom from JSON string
# into a data frame
# set 'flatten' equal to TRUE
# to break out lists into individual columns
contents.out.base.df <-
jsonlite::fromJSON( txt = contents.out.base
, flatten = TRUE
)
# view the data
class( contents.out.base.df ) # [1] "list"
# Interesting! It didn't return a data frame
names( contents.out.base.df ) # [1] "Sites"
names( contents.out.base.df$Sites ) # [1] "Site"
names( contents.out.base.df$Sites$Site )
# [1] "@LocalAuthorityCode" "@LocalAuthorityName" "@SiteCode"
# [4] "@SiteName" "@SiteType" "@DateClosed"
# [7] "@DateOpened" "@Latitude" "@Longitude"
# [10] "@LatitudeWGS84" "@LongitudeWGS84" "@DataOwner"
# [13] "@DataManager" "@SiteLink" "Species"
# Note that 'Species' doesn't contain an '@' in front of it
# Why?
lapply( X = contents.out.base.df$Sites$Site, FUN = class )
# $`@LocalAuthorityCode`
# [1] "character"
#
# $`@LocalAuthorityName`
# [1] "character"
#
# $`@SiteCode`
# [1] "character"
#
# $`@SiteName`
# [1] "character"
#
# $`@SiteType`
# [1] "character"
#
# $`@DateClosed`
# [1] "character"
#
# $`@DateOpened`
# [1] "character"
#
# $`@Latitude`
# [1] "character"
#
# $`@Longitude`
# [1] "character"
#
# $`@LatitudeWGS84`
# [1] "character"
#
# $`@LongitudeWGS84`
# [1] "character"
#
# $`@DataOwner`
# [1] "character"
#
# $`@DataManager`
# [1] "character"
#
# $`@SiteLink`
# [1] "character"
#
# $Species
# [1] "list"
# save contents.out.base.df$Sites$Site as its own data frame
# without $Species
website.df <-
contents.out.base.df$Sites$Site[
, which( colnames( contents.out.base.df$Sites$Site ) != "Species" )
]
# check dim
dim( website.df ) # [1] 212 14
# view the first six rows
head( x = website.df )
#' @LocalAuthorityCode @LocalAuthorityName @SiteCode
#' 1 1 Barking and Dagenham BG3
#' 2 1 Barking and Dagenham BG1
#' 3 1 Barking and Dagenham BG2
#' 4 2 Barnet BN2
#' 5 2 Barnet BN3
#' 6 2 Barnet BN1
#' @SiteName @SiteType
#' 1 Barking and Dagenham - North Street Kerbside
#' 2 Barking and Dagenham - Rush Green Suburban
#' 3 Barking and Dagenham - Scrattons Farm Suburban
#' 4 Barnet - Finchley Urban Background
#' 5 Barnet - Strawberry Vale Urban Background
#' 6 Barnet - Tally Ho Corner Kerbside
#' @DateClosed @DateOpened @Latitude
#' 1 2011-05-25 00:00:00 2007-03-16 00:00:00 51.540444
#' 2 1999-11-02 00:00:00 51.563752
#' 3 1999-10-17 00:00:00 51.529389
#' 4 2012-04-20 00:00:00 2000-08-09 13:00:00 51.591901
#' 5 2002-05-15 00:00:00 2000-08-14 14:00:00 51.6008848453589
#' 6 2012-04-20 00:00:00 1998-12-20 12:00:00 51.614675
#' @Longitude @LatitudeWGS84 @LongitudeWGS84
#' 1 0.074418 6717454.5833 8284.17386585
#' 2 0.177891 6721627.34498 19802.7355367
#' 3 0.132857 6715476.18683 14789.5735883
#' 4 -0.205992 6726669.62886 -22930.9245475
#' 5 -0.172297542087178 6728279.54795 -19180.0746501
#' 6 -0.176607 6730751.38494 -19659.8013105
#' @DataOwner @DataManager
#' 1 Barking and Dagenham King's College London
#' 2 Barking and Dagenham King's College London
#' 3 Barking and Dagenham King's College London
#' 4 Barnet King's College London
#' 5 Barnet King's College London
#' 6 Barnet King's College London
#' @SiteLink
#' 1 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG3
#' 2 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG1
#' 3 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG2
#' 4 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BN2
#' 5 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BN3
#' 6 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BN1
website.df
重新整形为Long Data Frame TL; DR最终数据框将超过212行。
每个地点的一个地方当局能够识别一个以上的物种。
此one-to-many relationship会将website.df
的最终版本重新整理为'long' format,其中一个地方当局的信息可能重复,因为他们在某个网站上识别出多种类型的物种
要将contents.out.base.df$Site$Sites$Species
中的每个对象与website.df
中的相应行组合,我使用了counter
个对象。在使用lapply()
之前,counter
设置为零值。
使用<<-
- super assignment operator - 允许我每次调用website.df
中的新对象时提取相应的contents.out.base.df$Site$Sites$Species
行。阅读Using a counter inside an apply structured loop in R非常有助于学习如何正确地做到这一点。
注意:使用cbind()
组合contents.out.base.df$Site$Sites$Species
中的对象会产生多个warnings()
。 SO帖子cbind warnings : row names were found from a short variable and have been discarded
显示cbind()
导致重复row names。为了防止重复的行名称,它忽略了它们。
# create counter
# and set its value to zero
counter <- 0
# construct the column binding
# and replace the objects within the list
# with the concated version of
# that particular object's row in website.df
contents.out.base.df$Sites$Site$Species <-
lapply( X = contents.out.base.df$Sites$Site$Species
, FUN = function( i ){
# add to counter
counter <<- counter + 1
# add columns from
# the counter row in website.df
# onto the i object in X
cbind(
website.df[ counter , ]
, i
, stringsAsFactors = FALSE
)
} # end of anonymous function
)
# There were 50 or more warnings (use warnings() to see the first 50)
warnings()
# Warning messages:
# 1: In data.frame(..., check.names = FALSE) :
# row names were found from a short variable and have been discarded
将contents.out.base.df$Site$Sites$Species
中的对象折叠到一个数据框后,我清理了website.df
的行名和列名。最后,website.df
已准备好使用write.csv()
函数导出到您的工作目录中。
# collapse the individual objects
# in the list into one data frame
website.df <-
data.frame(
do.call( what = rbind
, args = contents.out.base.df$Sites$Site$Species
)
, stringsAsFactors = FALSE
)
# check dim
dim( website.df ) # [1] 524 18
# rename the rows
rownames( x = website.df ) <-
as.character( x = 1:nrow( x = website.df ) )
# Make syntactically valid column names
colnames( x = website.df ) <-
base::gsub( pattern = "X."
, replacement = ""
, x = colnames( website.df )
)
# view the first six rows
head( x = website.df )
# LocalAuthorityCode LocalAuthorityName SiteCode
# 1 1 Barking and Dagenham BG3
# 2 1 Barking and Dagenham BG1
# 3 1 Barking and Dagenham BG1
# 4 1 Barking and Dagenham BG2
# 5 1 Barking and Dagenham BG2
# 6 2 Barnet BN2
# SiteName SiteType
# 1 Barking and Dagenham - North Street Kerbside
# 2 Barking and Dagenham - Rush Green Suburban
# 3 Barking and Dagenham - Rush Green Suburban
# 4 Barking and Dagenham - Scrattons Farm Suburban
# 5 Barking and Dagenham - Scrattons Farm Suburban
# 6 Barnet - Finchley Urban Background
# DateClosed DateOpened Latitude Longitude
# 1 2011-05-25 00:00:00 2007-03-16 00:00:00 51.540444 0.074418
# 2 1999-11-02 00:00:00 51.563752 0.177891
# 3 1999-11-02 00:00:00 51.563752 0.177891
# 4 1999-10-17 00:00:00 51.529389 0.132857
# 5 1999-10-17 00:00:00 51.529389 0.132857
# 6 2012-04-20 00:00:00 2000-08-09 13:00:00 51.591901 -0.205992
# LatitudeWGS84 LongitudeWGS84 DataOwner
# 1 6717454.5833 8284.17386585 Barking and Dagenham
# 2 6721627.34498 19802.7355367 Barking and Dagenham
# 3 6721627.34498 19802.7355367 Barking and Dagenham
# 4 6715476.18683 14789.5735883 Barking and Dagenham
# 5 6715476.18683 14789.5735883 Barking and Dagenham
# 6 6726669.62886 -22930.9245475 Barnet
# DataManager
# 1 King's College London
# 2 King's College London
# 3 King's College London
# 4 King's College London
# 5 King's College London
# 6 King's College London
# SiteLink
# 1 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG3
# 2 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG1
# 3 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG1
# 4 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG2
# 5 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG2
# 6 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BN2
# SpeciesCode SpeciesDescription DateMeasurementStarted
# 1 NO2 Nitrogen Dioxide 2008-01-01 00:00:00
# 2 NO2 Nitrogen Dioxide 2008-01-01 00:00:00
# 3 SO2 Sulphur Dioxide 1999-10-23 00:00:00
# 4 NO2 Nitrogen Dioxide 2007-11-21 00:00:00
# 5 PM10 PM10 Particulate 1999-10-17 00:00:00
# 6 NO2 Nitrogen Dioxide 2008-01-01 00:00:00
# DateMeasurementFinished
# 1 2011-05-25 00:00:00
# 2
# 3
# 4
# 5
# 6 2012-04-20 00:00:00
# Export as CSV
write.csv( x = website.df
, file = "web_scrape.csv"
, row.names = FALSE
)
# end of script #
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods
[7] base
other attached packages:
[1] magrittr_1.5 jsonlite_1.5 httr_1.3.1
loaded via a namespace (and not attached):
[1] compiler_3.4.3 R6_2.2.2 rgdal_1.2-16 tools_3.4.3
[5] sp_1.2-7 curl_3.1 yaml_2.1.16 grid_3.4.3
[9] lattice_0.20-35