我有一个为我准备好的数据框,显然有些列是通过一些基础机制组合在一起的。如何以这种方式对列名称进行分组以及如何再次将它们分开?
写y.1$Address
访问以“Address.XXX”
> y.1
Address.streetAddress Address.position.latitude Address.position.longitude Address.namedAreas Address.region.municipalityName Address.region.countyName Address.ocean nothing rent floor livingArea
19 Västmannagatan 85C 59.34500 18.04370 Vasastan Stockholm Stockholms län 2325 4100000 1586 1.0 40.0
29 Redargatan 3 59.30279 18.09048 Hammarby Sjöstad Stockholm Stockholms län 1570 2800000 2829 4.0 43.5
18 Doktor Abelins gata 6 59.31596 18.05454 Södermalm Stockholm Stockholms län 1223 4875000 3092 NA 70.0
75 Sibeliusgången 34 59.41581 17.91272 Akalla Stockholm Stockholms län NA 1800000 4876 4.0 80.9
16 Standarvägen 1 59.27604 18.00459 Gamla Älvsjö Stockholm Stockholms län 6360 2950000 3983 1.0 91.0
32 Kungsbro Strand 17 59.33027 18.05143 Kungsholmen Stockholm Stockholms län 1086 1995000 2017 1.0 25.5
54 Pipersgatan 16 59.33057 18.04588 Kungsholmen Stockholm Stockholms län 1405 2195000 2105 3.0 27.0
22 Alva Myrdals gata 4 59.28650 17.95199 Fruängen-Hägersten Stockholm Stockholms län NA 1995000 2587 3.0 37.0
35 Norr Mälarstrand 24 59.32687 18.04522 Kungsholmen Stockholm Stockholms län 1437 2195000 910 4.0 23.0
4 Beckbrännarbacken 7 59.31487 18.08901 Södermalm Stockholm Stockholms län 329 1395000 520 0.5 11.0
> colnames(y.1)[1] <- "nothing"
> y.1
nothing.streetAddress nothing.position.latitude nothing.position.longitude nothing.namedAreas nothing.region.municipalityName nothing.region.countyName nothing.ocean listPrice rent floor livingArea
19 Västmannagatan 85C 59.34500 18.04370 Vasastan Stockholm Stockholms län 2325 4100000 1586 1.0 40.0
29 Redargatan 3 59.30279 18.09048 Hammarby Sjöstad Stockholm Stockholms län 1570 2800000 2829 4.0 43.5
18 Doktor Abelins gata 6 59.31596 18.05454 Södermalm Stockholm Stockholms län 1223 4875000 3092 NA 70.0
75 Sibeliusgången 34 59.41581 17.91272 Akalla Stockholm Stockholms län NA 1800000 4876 4.0 80.9
16 Standarvägen 1 59.27604 18.00459 Gamla Älvsjö Stockholm Stockholms län 6360 2950000 3983 1.0 91.0
32 Kungsbro Strand 17 59.33027 18.05143 Kungsholmen Stockholm Stockholms län 1086 1995000 2017 1.0 25.5
54 Pipersgatan 16 59.33057 18.04588 Kungsholmen Stockholm Stockholms län 1405 2195000 2105 3.0 27.0
22 Alva Myrdals gata 4 59.28650 17.95199 Fruängen-Hägersten Stockholm Stockholms län NA 1995000 2587 3.0 37.0
35 Norr Mälarstrand 24 59.32687 18.04522 Kungsholmen Stockholm Stockholms län 1437 2195000 910 4.0 23.0
4 Beckbrännarbacken 7 59.31487 18.08901 Södermalm Stockholm Stockholms län 329 1395000 520 0.5 11.0
> dput(y.1)
structure(list(Address = structure(list(address = structure(list(
streetAddress = c("Västmannagatan 85C", "Redargatan 3", "Doktor Abelins gata 6",
"Sibeliusgången 34", "Standarvägen 1", "Kungsbro Strand 17",
"Pipersgatan 16", "Alva Myrdals gata 4", "Norr Mälarstrand 24",
"Beckbrännarbacken 7")), .Names = "streetAddress", row.names = c(19L,
29L, 18L, 75L, 16L, 32L, 54L, 22L, 35L, 4L), class = "data.frame"),
position = structure(list(latitude = c(59.3449965, 59.3027897,
59.3159556, 59.4158109, 59.27603539, 59.33027358, 59.330567,
59.28649604, 59.326869, 59.314867), longitude = c(18.0437004,
18.0904824, 18.054536, 17.91271847, 18.00459327, 18.05143325,
18.045882, 17.95199275, 18.045217, 18.089009)), .Names = c("latitude",
"longitude"), row.names = c(19L, 29L, 18L, 75L, 16L, 32L,
54L, 22L, 35L, 4L), class = "data.frame"), namedAreas = list(
"Vasastan", "Hammarby Sjöstad", "Södermalm", "Akalla",
"Gamla Älvsjö", "Kungsholmen", "Kungsholmen", "Fruängen-Hägersten",
"Kungsholmen", "Södermalm"), region = structure(list(
municipalityName = c("Stockholm", "Stockholm", "Stockholm",
"Stockholm", "Stockholm", "Stockholm", "Stockholm", "Stockholm",
"Stockholm", "Stockholm"), countyName = c("Stockholms län",
"Stockholms län", "Stockholms län", "Stockholms län",
"Stockholms län", "Stockholms län", "Stockholms län",
"Stockholms län", "Stockholms län", "Stockholms län")), .Names = c("municipalityName",
"countyName"), row.names = c(19L, 29L, 18L, 75L, 16L, 32L,
54L, 22L, 35L, 4L), class = "data.frame"), distance = structure(list(
ocean = c(2325L, 1570L, 1223L, NA, 6360L, 1086L, 1405L,
NA, 1437L, 329L)), .Names = "ocean", row.names = c(19L,
29L, 18L, 75L, 16L, 32L, 54L, 22L, 35L, 4L), class = "data.frame")), .Names = c("address",
"position", "namedAreas", "region", "distance"), row.names = c(19L,
29L, 18L, 75L, 16L, 32L, 54L, 22L, 35L, 4L), class = "data.frame"),
nothing = c(4100000L, 2800000L, 4875000L, 1800000L, 2950000L,
1995000L, 2195000L, 1995000L, 2195000L, 1395000L), rent = c(1586L,
2829L, 3092L, 4876L, 3983L, 2017L, 2105L, 2587L, 910L, 520L
), floor = c(1, 4, NA, 4, 1, 1, 3, 3, 4, 0.5), livingArea = c(40,
43.5, 70, 80.9, 91, 25.5, 27, 37, 23, 11), source = structure(list(
name = c("BOSTHLM", "Fastighetsbyrån", "Gripsholms Fastighetsförmedling",
"Fastighetsbyrån", "Fastighetsbyrån", "Mäklarhuset",
"SkandiaMäklarna", "Svenska Mäklarhuset", "Svensk Fastighetsförmedling",
"Svensk Fastighetsförmedling"), id = c(1499L, 1573L,
9895524L, 1573L, 1573L, 204L, 1570L, 58L, 713L, 713L),
type = c("Broker", "Broker", "Broker", "Broker", "Broker",
"Broker", "Broker", "Broker", "Broker", "Broker"), url = c("http://www.bosthlm.se/",
"http://www.fastighetsbyran.se/", "http://gripsholms.se/",
"http://www.fastighetsbyran.se/", "http://www.fastighetsbyran.se/",
"http://www.maklarhuset.se/", "http://www.skandiamaklarna.se/",
"http://www.svenskamaklarhuset.se/", "http://www.svenskfast.se/",
"http://www.svenskfast.se/")), .Names = c("name", "id",
"type", "url"), row.names = c(19L, 29L, 18L, 75L, 16L, 32L,
54L, 22L, 35L, 4L), class = "data.frame"), rooms = c(2, 1.5,
2.5, 3, 3.5, 1, 1, 2, 1, 1), published = structure(c(16632,
16631, 16631, 16629, 16626, 16626, 16626, 16626, 16626, 16626
), class = "Date"), constructionYear = c(NA, 2008L, 1929L,
1977L, 1937L, 1934L, 1934L, NA, 1907L, 1929L), objectType = c("Lägenhet",
"Lägenhet", "Lägenhet", "Lägenhet", "Lägenhet", "Lägenhet",
"Lägenhet", "Lägenhet", "Lägenhet", "Lägenhet"), booliId = c(1919949L,
1893141L, 1896584L, 1898347L, 1917520L, 1918305L, 1918270L,
1918145L, 1918063L, 1918049L), soldDate = structure(c(16635,
16633, 16636, 16630, 16636, 16632, 16632, 16635, 16632, 16636
), class = "Date"), soldPrice = c(4100000L, 2950000L, 5175000L,
1800000L, 4200000L, 2510000L, 2610000L, 2500000L, 2950000L,
1850000L), url = c("https://www.booli.se/bostad/lagenhet/vasastan/vastmannagatan+85c/1919949",
"https://www.booli.se/bostad/lagenhet/hammarby+sjostad/redargatan+3/1893141",
"https://www.booli.se/bostad/lagenhet/sodermalm/doktor+abelins+gata+6/1896584",
"https://www.booli.se/bostad/lagenhet/akalla/sibeliusgangen+34/1898347",
"https://www.booli.se/bostad/lagenhet/gamla+alvsjo/standarvagen+1/1917520",
"https://www.booli.se/bostad/lagenhet/kungsholmen/kungsbro+strand+17/1918305",
"https://www.booli.se/bostad/lagenhet/kungsholmen/pipersgatan+16/1918270",
"https://www.booli.se/bostad/lagenhet/fruangen-hagersten/alva+myrdals+gata+4/1918145",
"https://www.booli.se/bostad/lagenhet/kungsholmen/norr+malarstrand+24/1918063",
"https://www.booli.se/bostad/lagenhet/sodermalm/beckbrannarbacken+7/1918049"
), isNewConstruction = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), plotArea = c(NA, NA, NA, NA, NA,
0L, NA, 0L, NA, NA), additionalArea = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), AreaSize = structure(c(4L,
4L, 7L, 8L, 9L, 2L, 2L, 3L, 2L, 1L), .Label = c("10", "20",
"30", "40", "50", "60", "70", "80", "90", "100", "110", "120",
"130"), class = "factor"), PriceDiff = c(0L, 150000L, 300000L,
0L, 1250000L, 515000L, 415000L, 505000L, 755000L, 455000L
)), .Names = c("Address", "nothing", "rent", "floor", "livingArea",
"source", "rooms", "published", "constructionYear", "objectType",
"booliId", "soldDate", "soldPrice", "url", "isNewConstruction",
"plotArea", "additionalArea", "AreaSize", "PriceDiff"), row.names = c(19L,
29L, 18L, 75L, 16L, 32L, 54L, 22L, 35L, 4L), class = "data.frame")
答案 0 :(得分:0)
看起来数据结构来自解析JSON。您可以使用flatten
data.frame
改为常规jsonlite
library(jsonlite)
flat <- flatten(y.1)
str(flat)
# 'data.frame': 10 obs. of 28 variables:
# $ nothing : int 4100000 2800000 4875000 1800000 2950000 1995000 2195000 1995000 2195000 1395000
# $ rent : int 1586 2829 3092 4876 3983 2017 2105 2587 910 520
# $ floor : num 1 4 NA 4 1 1 3 3 4 0.5
# ...
因此,嵌套的data.frames
已扩展到各自的列中。回到另一个方向似乎更难,它看起来不像包提供该功能。您可以使用生成嵌套列表的递归函数获得类似的结构,其中基本元素可以是data.frames
。 data.frames
基本上都是列表,所以这个结构并不太远。这里有两个函数,一个生成嵌套data.frames
,另一个生成嵌套列表(我认为标准结构更多)。
## A function to create the nested data.frames structure
collapse <- function(dat) {
prefixes <- gsub("([A-Za-z]+)\\..*", "\\1", names(dat))
ns <- table(prefixes)
out <- dat[, !(prefixes %in% names(ns)[ns>1])]
for (n in names(ns)[ns>1]) {
inds <- grepl(n, names(dat))
subDat <- setNames(dat[, inds], gsub(sprintf("^%s\\.", n), "", names(dat[, inds])))
res <- collapse(subDat)
out[[n]] <- res
}
return( out )
}
out <- collapse(flat)
str(out)
# 'data.frame': 10 obs. of 19 variables:
# $ nothing : int 4100000 2800000 4875000 1800000 2950000 1995000 2195000 1995000 2195000 1395000
# $ rent : int 1586 2829 3092 4876 3983 2017 2105 2587 910 520
# $ floor : num 1 4 NA 4 1 1 3 3 4 0.5
# ...
# $ source :'data.frame': 10 obs. of 4 variables:
# ..$ name: chr "BOSTHLM" "Fastighetsbyrån" "Gripsholms Fastighetsförmedling" "Fastighetsbyrån" ...
# ..$ id : int 1499 1573 9895524 1573 1573 204 1570 58 713 713
# ..$ type: chr "Broker" "Broker" "Broker" "Broker" ...
# ...
## Function to produce nested lists that may contain data.frames as
## base elements
collapseNestedLists <- function(dat) {
prefixes <- gsub("([A-Za-z]+)\\..*", "\\1", names(dat))
ns <- table(prefixes)
out <- as.list(dat[, !(prefixes %in% names(ns)[ns>1])])
for (n in names(ns)[ns>1]) {
inds <- grepl(n, names(dat))
subDat <- setNames(dat[, inds], gsub(sprintf("^%s\\.", n), "", names(dat[, inds])))
res <- collapse(subDat)
out[[n]] <- as.list(res)
}
return( out )
}
outList <- collapseNestedLists(flat)
这两种数据结构都可以基本相同的方式访问。如果你想拉出一个地址的位置(所以data.frame
中的一个元素嵌套在原始结构的data.frame
内)或者这些,
all.equal(
outList$Address$position$latitude[1],
out$Address$position$latitude[1],
y.1$Address$position$latitude[1],
y.1[["Address"]][["position"]][["latitude"]][1]
)
# TRUE