我得到了一个格式奇怪的数据文件,我将其读作:
df <- structure(
list(
X1 = c(
"\"ID\"",
"\"Parameter\"",
"\"Year\"",
"\"800\"",
"\"799\"",
"\"798\"",
"\"797\"",
"\"796\"",
"\"795\""
),
X2 = c(
"\"001\"",
"\"ap\"",
"\"2016\"",
"\"-0.000978013465745161\"",
"\"-0.000853574674147712\"",
"\"-0.000770681767403833\"",
"\"-0.000762506834996983\"",
"\"-0.000963651463931642\"",
"\"-0.000839241421637097\""
),
X3 = c(
"\"002\"",
"\"ap\"",
"\"2016\"",
"\"-0.000583552718375254\"",
"\"-0.000495471744663315\"",
"\"-0.000502488351223215\"",
"\"-0.000589039598146738\"",
"\"-0.000599887975678647\"",
"\"-0.000471434015603837\""
),
X4 = c(
"\"003\"",
"\"ap\"",
"\"2016\"",
"\"-0.000568187733836333\"",
"\"-0.000527059984394067\"",
"\"-0.000615318057111717\"",
"\"-0.000592280468265934\"",
"\"-0.000577707032763554\"",
"\"-0.000569167407032334\""
)
),
.Names = c("X1", "X2", "X3", "X4"),
row.names = c(NA,-9L),
class = c("tbl_df", "tbl", "data.frame")
)
df
#> X1 X2 X3
#> 1 "ID" "001" "002"
#> 2 "Parameter" "ap" "ap"
#> 3 "Year" "2016" "2016"
#> 4 "800" "-0.000978013465745161" "-0.000583552718375254"
#> 5 "799" "-0.000853574674147712" "-0.000495471744663315"
#> 6 "798" "-0.000770681767403833" "-0.000502488351223215"
#> 7 "797" "-0.000762506834996983" "-0.000589039598146738"
#> 8 "796" "-0.000963651463931642" "-0.000599887975678647"
#> 9 "795" "-0.000839241421637097" "-0.000471434015603837"
#> X4
#> 1 "003"
#> 2 "ap"
#> 3 "2016"
#> 4 "-0.000568187733836333"
#> 5 "-0.000527059984394067"
#> 6 "-0.000615318057111717"
#> 7 "-0.000592280468265934"
#> 8 "-0.000577707032763554"
#> 9 "-0.000569167407032334"
我写了一些代码来整理这些数据。问题是代码不是很优雅。预期产出如下。
res <- data.frame(
stringsAsFactors = FALSE,
wavelength = c(
"\"800\"",
"\"799\"",
"\"798\"",
"\"797\"",
"\"796\"",
"\"795\"",
"\"800\"",
"\"799\"",
"\"798\"",
"\"797\"",
"\"796\"",
"\"795\"",
"\"800\"",
"\"799\"",
"\"798\"",
"\"797\"",
"\"796\"",
"\"795\""
),
id = c(
"\"001\"",
"\"001\"",
"\"001\"",
"\"001\"",
"\"001\"",
"\"001\"",
"\"002\"",
"\"002\"",
"\"002\"",
"\"002\"",
"\"002\"",
"\"002\"",
"\"003\"",
"\"003\"",
"\"003\"",
"\"003\"",
"\"003\"",
"\"003\""
),
parameter = c(
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\"",
"\"ap\""
),
year = c(
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\"",
"\"2016\""
),
value = c(
"\"-0.000978013465745161\"",
"\"-0.000853574674147712\"",
"\"-0.000770681767403833\"",
"\"-0.000762506834996983\"",
"\"-0.000963651463931642\"",
"\"-0.000839241421637097\"",
"\"-0.000583552718375254\"",
"\"-0.000495471744663315\"",
"\"-0.000502488351223215\"",
"\"-0.000589039598146738\"",
"\"-0.000599887975678647\"",
"\"-0.000471434015603837\"",
"\"-0.000568187733836333\"",
"\"-0.000527059984394067\"",
"\"-0.000615318057111717\"",
"\"-0.000592280468265934\"",
"\"-0.000577707032763554\"",
"\"-0.000569167407032334\""
)
)
res
#> wavelength id parameter year value
#> 1 "800" "001" "ap" "2016" "-0.000978013465745161"
#> 2 "799" "001" "ap" "2016" "-0.000853574674147712"
#> 3 "798" "001" "ap" "2016" "-0.000770681767403833"
#> 4 "797" "001" "ap" "2016" "-0.000762506834996983"
#> 5 "796" "001" "ap" "2016" "-0.000963651463931642"
#> 6 "795" "001" "ap" "2016" "-0.000839241421637097"
#> 7 "800" "002" "ap" "2016" "-0.000583552718375254"
#> 8 "799" "002" "ap" "2016" "-0.000495471744663315"
#> 9 "798" "002" "ap" "2016" "-0.000502488351223215"
#> 10 "797" "002" "ap" "2016" "-0.000589039598146738"
#> 11 "796" "002" "ap" "2016" "-0.000599887975678647"
#> 12 "795" "002" "ap" "2016" "-0.000471434015603837"
#> 13 "800" "003" "ap" "2016" "-0.000568187733836333"
#> 14 "799" "003" "ap" "2016" "-0.000527059984394067"
#> 15 "798" "003" "ap" "2016" "-0.000615318057111717"
#> 16 "797" "003" "ap" "2016" "-0.000592280468265934"
#> 17 "796" "003" "ap" "2016" "-0.000577707032763554"
#> 18 "795" "003" "ap" "2016" "-0.000569167407032334"
如何快速收集数据(基础R,tidyr或data.table)?
答案 0 :(得分:2)
方式:强>
library(tidyverse)
# Transpose the dataframe and remove row 1 as that is your column name
test <- t(df) %>%
as.tibble() %>%
filter(row_number() != 1)
# Name your tibble
names(test) <- t(df)[1, ] %>%
str_replace_all('\\"', "")
# Make the column to numeric
test2 <- test %>%
mutate_all(funs(str_replace_all(., '\\"', ""))) %>%
mutate_at(vars(1, 4:9), funs(as.numeric(.)))
# Make the column tidy
answer <- test2 %>%
gather(key, value, -ID, - Parameter, -Year) %>%
rename(Wavelength = key) %>%
mutate(Wavelength = as.numeric(Wavelength))
<强>输出:强>
> answer
# A tibble: 18 x 5
ID Parameter Year Wavelength value
<dbl> <chr> <chr> <dbl> <dbl>
1 1. ap 2016 800. -0.000978
2 2. ap 2016 800. -0.000584
3 3. ap 2016 800. -0.000568
4 1. ap 2016 799. -0.000854
5 2. ap 2016 799. -0.000495
6 3. ap 2016 799. -0.000527
7 1. ap 2016 798. -0.000771
8 2. ap 2016 798. -0.000502
9 3. ap 2016 798. -0.000615
10 1. ap 2016 797. -0.000763
11 2. ap 2016 797. -0.000589
12 3. ap 2016 797. -0.000592
13 1. ap 2016 796. -0.000964
14 2. ap 2016 796. -0.000600
15 3. ap 2016 796. -0.000578
16 1. ap 2016 795. -0.000839
17 2. ap 2016 795. -0.000471
18 3. ap 2016 795. -0.000569
答案 1 :(得分:1)
来自data.table
的帮助您可以执行以下操作:
melt(read.table(h=T,text=do.call(paste,transpose(df))),1:3,var="wavelength")
ID Parameter Year wavelength value
1 1 ap 2016 X800 -0.0009780135
2 2 ap 2016 X800 -0.0005835527
3 3 ap 2016 X800 -0.0005681877
4 1 ap 2016 X799 -0.0008535747
5 2 ap 2016 X799 -0.0004954717
6 3 ap 2016 X799 -0.0005270600
7 1 ap 2016 X798 -0.0007706818
8 2 ap 2016 X798 -0.0005024884
9 3 ap 2016 X798 -0.0006153181
10 1 ap 2016 X797 -0.0007625068
11 2 ap 2016 X797 -0.0005890396
12 3 ap 2016 X797 -0.0005922805
13 1 ap 2016 X796 -0.0009636515
14 2 ap 2016 X796 -0.0005998880
15 3 ap 2016 X796 -0.0005777070
16 1 ap 2016 X795 -0.0008392414
17 2 ap 2016 X795 -0.0004714340
18 3 ap 2016 X795 -0.0005691674