我正在尝试将数据帧从长格式转换为宽格式。当前有一个InputCode列,其中包含输入A和B,并且这些列必须使用“ DataValue”中的值作为自己的列。我一直在尝试传播和传播,
data_wide <- spread(oldData_long, InputCode, DataValue)
或
data_wide2 <- dcast(oldData_long, Indicator + IndicatorID + InputName DataYear + Country + Division + InputUnit ~ InputCode, value.var="DataValue")
但是,尽管创建了输入A和输入B列,但数据框中的行数却保持不变(84)而不是变为42。只要输入A有一个值,输入B的列中就会有NA,反之亦然。
此外,理想情况下,每个输入代码都应有一个InputUnit列,例如“ InputAUnit”,因为在尝试分发数据时该值也将是唯一的,并且可能导致上述问题。对于InputName来说也是一样,但是我不知道如何将信息整齐地拉出来。
任何帮助将不胜感激!
dput:
structure(list(ID = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Indicator = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "Waste Generated", class = "factor"), IndicatorID = c(11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L), InputCode = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("InputA", "InputB"), class = "factor"), InputName = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Waste Generated - Waste incinerated",
"Waste Generated - Waste sent to landfill"), class = "factor"),
DataValue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 5L, 1L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 6L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 9L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 10L,
3L), .Label = c("0", "155", "19", "2,898.00", "20,462.34",
"22.317", "4.368", "40", "6,695.65", "8.998"), class = "factor"),
UnitCode = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "t", class = "factor"), DataYear = c(2009L, 2009L,
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,
2009L, 2009L, 2009L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2009L, 2009L, 2009L, 2009L, 2009L,
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2011L, 2011L, 2011L, 2011L,
2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L, 2011L,
2011L), Country = structure(c(4L, 1L, 2L, 3L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L, 3L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L, 3L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L, 3L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L, 2L,
3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 4L, 1L,
2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L), .Label = c("Afghanistan",
"Albania", "Algeria", "All", "American Samoa", "Andorra",
"Angola", "Antigua and Barbuda", "Argentina", "Armenia",
"Aruba", "Australia", "Austria", "Azerbaijan"), class = "factor"),
ISO = structure(c(5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L, 7L, 8L,
1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L, 7L,
8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L, 5L, 2L, 4L, 14L, 9L, 6L, 3L, 10L,
7L, 8L, 1L, 11L, 12L, 13L), .Label = c("ABW", "AFG", "AGO",
"ALB", "ALL", "AND", "ARG", "ARM", "ASM", "ATG", "AUS", "AUT",
"AZE", "DZA"), class = "factor"), Division = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Test", class = "factor"),
FurtherDetails1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "Test1", class = "factor"), FurtherDetails2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Test2", class = "factor")), class = "data.frame", row.names = c(NA,
-84L))
这将是理想的输出:
structure(list(ID = c(NA, NA, NA, NA, NA, NA), Indicator = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "Waste Generated", class = "factor"),
IndicatorID = c(11L, 11L, 11L, 11L, 11L, 11L), DataYear = c(2009L,
2009L, 2009L, 2009L, 2009L, 2009L), Country = structure(c(4L,
1L, 2L, 3L, 5L, 6L), .Label = c("Afghanistan", "Albania",
"Algeria", "All", "American Samoa", "Andorra", "Angola",
"Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia",
"Austria", "Azerbaijan"), class = "factor"), ISO = structure(c(5L,
2L, 4L, 14L, 9L, 6L), .Label = c("ABW", "AFG", "AGO", "ALB",
"ALL", "AND", "ARG", "ARM", "ASM", "ATG", "AUS", "AUT", "AZE",
"DZA"), class = "factor"), Division = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = "Test", class = "factor"), FurtherDetails1 = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "Test1", class = "factor"),
FurtherDetails2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "Test2", class = "factor"),
InputA = c(0L, 0L, 0L, 0L, 0L, 0L), InputAUnit = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("", "t"), class = "factor"),
InputAName = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Waste Generated - Waste sent to landfill"), class = "factor"),
InputB = c(0L, 0L, 0L, 0L, 0L, 0L), InputBUnit = structure(c(2L,
2L, 2L, 2L, 2L, 2L), .Label = c("", "t"), class = "factor"),
InputBName = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Waste Generated - Waste incinerated"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
谢谢!
答案 0 :(得分:2)
可能的tidyr解决方案。
library(tidyr)
out <- pivot_wider(oldData_long, names_from = InputCode, values_from = c(DataValue, UnitCode, InputName))
out
# A tibble: 42 x 15
ID Indicator IndicatorID DataYear Country ISO Division FurtherDetails1 FurtherDetails2 DataValue_InputA DataValue_InputB
<lgl> <fct> <int> <int> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
1 NA Waste Ge… 11 2009 All ALL Test Test1 Test2 0 0
2 NA Waste Ge… 11 2009 Afghan… AFG Test Test1 Test2 0 0
3 NA Waste Ge… 11 2009 Albania ALB Test Test1 Test2 0 0
4 NA Waste Ge… 11 2009 Algeria DZA Test Test1 Test2 0 0
5 NA Waste Ge… 11 2009 Americ… ASM Test Test1 Test2 0 0
6 NA Waste Ge… 11 2009 Andorra AND Test Test1 Test2 0 0
7 NA Waste Ge… 11 2009 Angola AGO Test Test1 Test2 0 0
8 NA Waste Ge… 11 2009 Antigu… ATG Test Test1 Test2 0 0
9 NA Waste Ge… 11 2009 Argent… ARG Test Test1 Test2 0 0
10 NA Waste Ge… 11 2009 Armenia ARM Test Test1 Test2 0 0
# … with 32 more rows, and 4 more variables: UnitCode_InputA <fct>, UnitCode_InputB <fct>, InputName_InputA <fct>, InputName_InputB <fct>
str(out)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 42 obs. of 15 variables:
$ ID : logi NA NA NA NA NA NA ...
$ Indicator : Factor w/ 1 level "Waste Generated": 1 1 1 1 1 1 1 1 1 1 ...
$ IndicatorID : int 11 11 11 11 11 11 11 11 11 11 ...
$ DataYear : int 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 ...
$ Country : Factor w/ 14 levels "Afghanistan",..: 4 1 2 3 5 6 7 8 9 10 ...
$ ISO : Factor w/ 14 levels "ABW","AFG","AGO",..: 5 2 4 14 9 6 3 10 7 8 ...
$ Division : Factor w/ 1 level "Test": 1 1 1 1 1 1 1 1 1 1 ...
$ FurtherDetails1 : Factor w/ 1 level "Test1": 1 1 1 1 1 1 1 1 1 1 ...
$ FurtherDetails2 : Factor w/ 1 level "Test2": 1 1 1 1 1 1 1 1 1 1 ...
$ DataValue_InputA: Factor w/ 10 levels "0","155","19",..: 1 1 1 1 1 1 1 1 1 1 ...
$ DataValue_InputB: Factor w/ 10 levels "0","155","19",..: 1 1 1 1 1 1 1 1 1 1 ...
$ UnitCode_InputA : Factor w/ 1 level "t": 1 1 1 1 1 1 1 1 1 1 ...
$ UnitCode_InputB : Factor w/ 1 level "t": 1 1 1 1 1 1 1 1 1 1 ...
$ InputName_InputA: Factor w/ 2 levels "Waste Generated - Waste incinerated",..: 2 2 2 2 2 2 2 2 2 2 ...
$ InputName_InputB: Factor w/ 2 levels "Waste Generated - Waste incinerated",..: 1 1 1 1 1 1 1 1 1 1 ...