为什么我在R中创建一个csv文件时一直出错

时间:2018-02-09 15:33:24

标签: r export-to-csv

当我尝试在R中创建表格然后将该文件导出为ex​​cel时,为什么我一直收到错误。我收到一个错误:

  

write.table中的错误(data1,“data1.csv”,col.names = NA,sep =“,”,dec =“。”,'EncodeElement'中未实现的类型'list'

我知道我需要压扁列表,但我不确定如何压扁它。

我的代码:

library(httr)
library(jsonlite)
library(xml2)
library("rio")    
library("magrittr")

query <- "http://api.erg.kcl.ac.uk/AirQuality/Information/MonitoringSiteSpecies/GroupName=London/Json"

out <- GET(url=query)
http_status(out)
data1 <- content(out)

data1 <- fromJSON( "http://api.erg.kcl.ac.uk/AirQuality/Information/MonitoringSiteSpecies/GroupName=London/Json")
data1 <- as.data.frame(data1)


write.table(data1, file="data1.csv", row.names=FALSE, na="", col.names = FALSE, sep=",")

View(data1)

setwd("F:/")    
write.csv(data1,'data1.csv')

2 个答案:

答案 0 :(得分:1)

试试这个:

data1<-data.frame(lapply(data1, as.character), stringsAsFactors=FALSE)

write.table(data1, file="data1.csv", row.names=FALSE, na="", col.names = FALSE, sep=",")

答案 1 :(得分:1)

概述

httr::GET()请求的内容从转换为对象需要一些数据操作步骤,然后才能导出为文件。

可重复的示例

初始httr::GET()返回多个对象

# install necessary packages
install.packages( pkgs = c( "httr", "jsonlite", "magrittr" ))

# load necessary packages
library( httr )
library( jsonlite )
library( magrittr )

# store query
query <- "http://api.erg.kcl.ac.uk/AirQuality/Information/MonitoringSiteSpecies/GroupName=London/Json"

# GET the query
out <- httr::GET( url = query )

# base method
# Convert content from raw bytes to character
contents.out.base <- base::rawToChar( x = out$content )

# examine the first 30 characters
# from the contents in JSON form
base::substr( x = contents.out.base
              , start = 0
              , stop = 30
              )
# [1] "{\"Sites\":{\"Site\":[{\"@LocalAuth"

# transfrom from JSON string
# into a data frame
# set 'flatten' equal to TRUE
# to break out lists into individual columns
contents.out.base.df <- 
  jsonlite::fromJSON( txt = contents.out.base
                      , flatten = TRUE
  )

# view the data
class( contents.out.base.df ) # [1] "list"

# Interesting! It didn't return a data frame
names( contents.out.base.df )       # [1] "Sites"
names( contents.out.base.df$Sites ) # [1] "Site"
names( contents.out.base.df$Sites$Site )
# [1] "@LocalAuthorityCode" "@LocalAuthorityName" "@SiteCode"          
# [4] "@SiteName"           "@SiteType"           "@DateClosed"        
# [7] "@DateOpened"         "@Latitude"           "@Longitude"         
# [10] "@LatitudeWGS84"      "@LongitudeWGS84"     "@DataOwner"         
# [13] "@DataManager"        "@SiteLink"           "Species" 

# Note that 'Species' doesn't contain an '@' in front of it
# Why?
lapply( X = contents.out.base.df$Sites$Site, FUN = class )
# $`@LocalAuthorityCode`
# [1] "character"
# 
# $`@LocalAuthorityName`
# [1] "character"
# 
# $`@SiteCode`
# [1] "character"
# 
# $`@SiteName`
# [1] "character"
# 
# $`@SiteType`
# [1] "character"
# 
# $`@DateClosed`
# [1] "character"
# 
# $`@DateOpened`
# [1] "character"
# 
# $`@Latitude`
# [1] "character"
# 
# $`@Longitude`
# [1] "character"
# 
# $`@LatitudeWGS84`
# [1] "character"
# 
# $`@LongitudeWGS84`
# [1] "character"
# 
# $`@DataOwner`
# [1] "character"
# 
# $`@DataManager`
# [1] "character"
# 
# $`@SiteLink`
# [1] "character"
# 
# $Species
# [1] "list"

# save contents.out.base.df$Sites$Site as its own data frame
# without $Species
website.df <-
  contents.out.base.df$Sites$Site[
    , which( colnames( contents.out.base.df$Sites$Site ) != "Species" )
     ]

# check dim
dim( website.df ) # [1] 212  14

# view the first six rows
head( x = website.df )
#' @LocalAuthorityCode  @LocalAuthorityName @SiteCode
#' 1                   1 Barking and Dagenham       BG3
#' 2                   1 Barking and Dagenham       BG1
#' 3                   1 Barking and Dagenham       BG2
#' 4                   2               Barnet       BN2
#' 5                   2               Barnet       BN3
#' 6                   2               Barnet       BN1
#' @SiteName        @SiteType
#' 1   Barking and Dagenham - North Street         Kerbside
#' 2     Barking and Dagenham - Rush Green         Suburban
#' 3 Barking and Dagenham - Scrattons Farm         Suburban
#' 4                     Barnet - Finchley Urban Background
#' 5             Barnet - Strawberry Vale  Urban Background
#' 6              Barnet - Tally Ho Corner         Kerbside
#' @DateClosed         @DateOpened        @Latitude
#' 1 2011-05-25 00:00:00 2007-03-16 00:00:00        51.540444
#' 2                     1999-11-02 00:00:00        51.563752
#' 3                     1999-10-17 00:00:00        51.529389
#' 4 2012-04-20 00:00:00 2000-08-09 13:00:00        51.591901
#' 5 2002-05-15 00:00:00 2000-08-14 14:00:00 51.6008848453589
#' 6 2012-04-20 00:00:00 1998-12-20 12:00:00        51.614675
#' @Longitude @LatitudeWGS84 @LongitudeWGS84
#' 1           0.074418   6717454.5833   8284.17386585
#' 2           0.177891  6721627.34498   19802.7355367
#' 3           0.132857  6715476.18683   14789.5735883
#' 4          -0.205992  6726669.62886  -22930.9245475
#' 5 -0.172297542087178  6728279.54795  -19180.0746501
#' 6          -0.176607  6730751.38494  -19659.8013105
#' @DataOwner          @DataManager
#' 1 Barking and Dagenham King's College London
#' 2 Barking and Dagenham King's College London
#' 3 Barking and Dagenham King's College London
#' 4               Barnet King's College London
#' 5               Barnet King's College London
#' 6               Barnet King's College London
#' @SiteLink
#' 1 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG3
#' 2 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG1
#' 3 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG2
#' 4 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BN2
#' 5 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BN3
#' 6 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BN1

One to Many Relationship要求将website.df重新整形为Long Data Frame

TL; DR最终数据框将超过212行。

每个地点的一个地方当局能够识别一个以上的物种。

one-to-many relationship会将website.df的最终版本重新整理为'long' format,其中一个地方当局的信息可能重复,因为他们在某个网站上识别出多种类型的物种

要将contents.out.base.df$Site$Sites$Species中的每个对象与website.df中的相应行组合,我使用了counter个对象。在使用lapply()之前,counter设置为零值。

使用<<- - super assignment operator - 允许我每次调用website.df中的新对象时提取相应的contents.out.base.df$Site$Sites$Species行。阅读Using a counter inside an apply structured loop in R非常有助于学习如何正确地做到这一点。

注意:使用cbind()组合contents.out.base.df$Site$Sites$Species中的对象会产生多个warnings()。 SO帖子cbind warnings : row names were found from a short variable and have been discarded 显示cbind()导致重复row names。为了防止重复的行名称,它忽略了它们。

# create counter
# and set its value to zero
counter <- 0

# construct the column binding
# and replace the objects within the list
# with the concated version of 
# that particular object's row in website.df
contents.out.base.df$Sites$Site$Species <-
  lapply( X = contents.out.base.df$Sites$Site$Species
          , FUN = function( i ){
            # add to counter
            counter <<- counter + 1

            # add columns from 
            # the counter row in website.df
            # onto the i object in X
            cbind(
              website.df[ counter , ]
              , i
              , stringsAsFactors = FALSE
            )
          } # end of anonymous function
  )

# There were 50 or more warnings (use warnings() to see the first 50)
warnings()
# Warning messages:
#   1: In data.frame(..., check.names = FALSE) :
#   row names were found from a short variable and have been discarded

将折叠列表导出为CSV

contents.out.base.df$Site$Sites$Species中的对象折叠到一个数据框后,我清理了website.df的行名和列名。最后,website.df已准备好使用write.csv()函数导出到您的工作目录中。

# collapse the individual objects
# in the list into one data frame
website.df <-
  data.frame( 
    do.call( what = rbind
             , args = contents.out.base.df$Sites$Site$Species
             )
    , stringsAsFactors = FALSE
    )

# check dim
dim( website.df ) # [1] 524  18

# rename the rows
rownames( x = website.df ) <-
  as.character( x = 1:nrow( x = website.df ) )

# Make syntactically valid column names
colnames( x = website.df ) <-
  base::gsub( pattern = "X."
              , replacement = ""
              , x = colnames( website.df )
  )

# view the first six rows
head( x = website.df )
# LocalAuthorityCode   LocalAuthorityName SiteCode
# 1                  1 Barking and Dagenham      BG3
# 2                  1 Barking and Dagenham      BG1
# 3                  1 Barking and Dagenham      BG1
# 4                  1 Barking and Dagenham      BG2
# 5                  1 Barking and Dagenham      BG2
# 6                  2               Barnet      BN2
# SiteName         SiteType
# 1   Barking and Dagenham - North Street         Kerbside
# 2     Barking and Dagenham - Rush Green         Suburban
# 3     Barking and Dagenham - Rush Green         Suburban
# 4 Barking and Dagenham - Scrattons Farm         Suburban
# 5 Barking and Dagenham - Scrattons Farm         Suburban
# 6                     Barnet - Finchley Urban Background
# DateClosed          DateOpened  Latitude Longitude
# 1 2011-05-25 00:00:00 2007-03-16 00:00:00 51.540444  0.074418
# 2                     1999-11-02 00:00:00 51.563752  0.177891
# 3                     1999-11-02 00:00:00 51.563752  0.177891
# 4                     1999-10-17 00:00:00 51.529389  0.132857
# 5                     1999-10-17 00:00:00 51.529389  0.132857
# 6 2012-04-20 00:00:00 2000-08-09 13:00:00 51.591901 -0.205992
# LatitudeWGS84 LongitudeWGS84            DataOwner
# 1  6717454.5833  8284.17386585 Barking and Dagenham
# 2 6721627.34498  19802.7355367 Barking and Dagenham
# 3 6721627.34498  19802.7355367 Barking and Dagenham
# 4 6715476.18683  14789.5735883 Barking and Dagenham
# 5 6715476.18683  14789.5735883 Barking and Dagenham
# 6 6726669.62886 -22930.9245475               Barnet
# DataManager
# 1 King's College London
# 2 King's College London
# 3 King's College London
# 4 King's College London
# 5 King's College London
# 6 King's College London
# SiteLink
# 1 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG3
# 2 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG1
# 3 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG1
# 4 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG2
# 5 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BG2
# 6 http://www.londonair.org.uk/london/asp/publicdetails.asp?site=BN2
# SpeciesCode SpeciesDescription DateMeasurementStarted
# 1         NO2   Nitrogen Dioxide    2008-01-01 00:00:00
# 2         NO2   Nitrogen Dioxide    2008-01-01 00:00:00
# 3         SO2    Sulphur Dioxide    1999-10-23 00:00:00
# 4         NO2   Nitrogen Dioxide    2007-11-21 00:00:00
# 5        PM10   PM10 Particulate    1999-10-17 00:00:00
# 6         NO2   Nitrogen Dioxide    2008-01-01 00:00:00
# DateMeasurementFinished
# 1     2011-05-25 00:00:00
# 2                        
# 3                        
# 4                        
# 5                        
# 6     2012-04-20 00:00:00

# Export as CSV
write.csv( x = website.df
           , file = "web_scrape.csv"
           , row.names = FALSE
)

# end of script #

会话信息

使用sessionInfo()

R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2

Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods  
[7] base     

other attached packages:
[1] magrittr_1.5 jsonlite_1.5 httr_1.3.1  

loaded via a namespace (and not attached):
[1] compiler_3.4.3  R6_2.2.2        rgdal_1.2-16    tools_3.4.3    
[5] sp_1.2-7        curl_3.1        yaml_2.1.16     grid_3.4.3     
[9] lattice_0.20-35