我有一个大的.xlsx数据集,记录了一年中多个日期的花椰菜,西兰花和其他一些物种的密度,覆盖率和高度。
数据是在Excel中创建的,因此它很好地构建了人眼"。 列表示绘图(Plot,Frame,PlotCrop)和蔬菜特征(Cauliflower,Brocolli& Unknown),行是字段中的单个子图。
但是:每个素食都有3个特征(密度,封面,高度),但只有一个列名!
我的问题是,如何在R?
中有效地阅读此表在R中加载表格如下所示:
setwd("C:/fieldData")
# Read csv files
tab<-read.csv("format_question.csv", header = TRUE)
结果:
> tab
Date..4.16.16 X X.1 Cauliflower X.2 X.3 Broccoli X.4 X.5 Unknown..1 X.6 X.7
1 Area Brassica oleracea var. botrytis Brassica oleracea var. italica
2 # plants in a row TRAE UNK
3 Plot Frame Crop Density Cover Height Density Cover Height Density Cover Height
4 114 1N SO 1 5 0 1 <NA> 0 0 0 0
5 114 2N SO 1 5 0 0 0 0 0 0 0
6 114 4N SO 2 2 0 0 0 0 0 0 0
7 214 1N SO 8 5 0 0 0 0 0 0 0
8 214 2N SO 7 5 0 0 0 0 0 0 0
9 214 3N SO 9 5 0 0 0 0 0 0
相反,我希望看到这样的东西,即以某种方式记录所记录的素食。
> tab
Plot Frame Crop Cauli.Density Cauli.Cover Cauli.Height Broc.Density Broc.Cover Broc.Height UNK.Density UNK.Cover UNK.Height
4 114 1N SO 1 5 0 1 <NA> 0 0 0 0
5 114 2N SO 1 5 0 0 0 0 0 0 0
6 114 4N SO 2 2 0 0 0 0 0 0 0
7 214 1N SO 8 5 0 0 0 0 0 0 0
8 214 2N SO 7 5 0 0 0 0 0 0 0
9 214 3N SO 9 5 0 0 0 0 0 0 0
由于我有大约40个Excel .csvs,我真的想避免手动复制每个素食的列名,并至少自动制作一部分。但我不知道怎么做?
感谢您的任何建议!
这里有虚拟表格: https://www.dropbox.com/s/ac4dbahddmsomqp/format_question.csv?dl=0
答案 0 :(得分:2)
unpivotr
可能会成为你的朋友 - 我不是很习惯,但是想给你一个想法:
修改2018-08-01:使用unpivotr简化原始答案,类似于Spreadsheet Munging Strategies中的食谱,由作者@nacnudus友情提供包装和食谱书:
library(unpivotr)
library(tidyverse)
# from the OP's dropbox https://www.dropbox.com/s/ac4dbahddmsomqp/format_question.csv?dl=1
csv_text <-
"Date 4/16/18,,,Cauliflower,,,Broccoli,,,Unknown #1,,
Area,,,Brassica oleracea var. botrytis,,,Brassica oleracea var. italica,,,,,
,,,# plants in a row,,,BRAS,,,UNK ,,
Plot,Frame,PlotCrop,Density,Cover,Height,Density,Cover,Height,Density,Cover,Height
114,1N,SO,1,5,0,1,NA,0,0,0,0
114,2N,SO,1,5,0,0,0,0,0,0,0
114,4N,SO,2,2,0,0,0,0,0,0,0
214,1N,SO,8,5,0,0,0,0,0,0,0
214,2N,SO,7,5,0,0,0,0,0,0,0
214,3N,SO,9,5,0,0,,0,0,0,0 "
csv_text %>%
read_csv(col_names = FALSE) %>%
as_cells() %>%
dplyr::filter(!between(row, 2L, 3L)) %>%
behead("W", "Plot") %>%
behead("W", "Frame") %>%
behead("W", "PlotCrop") %>%
behead("NNW", "Name") %>%
behead("N", "metric") %>%
select(-data_type, -col) %>%
spread(metric, chr) %>%
select(-row)
#> # A tibble: 18 x 7
#> Plot Frame PlotCrop Name Cover Density Height
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 114 1N SO Broccoli <NA> 1 0
#> 2 114 1N SO Cauliflower 5 1 0
#> 3 114 1N SO Unknown #1 0 0 0
#> 4 114 2N SO Broccoli 0 0 0
#> 5 114 2N SO Cauliflower 5 1 0
#> 6 114 2N SO Unknown #1 0 0 0
#> 7 114 4N SO Broccoli 0 0 0
#> 8 114 4N SO Cauliflower 2 2 0
#> 9 114 4N SO Unknown #1 0 0 0
#> 10 214 1N SO Broccoli 0 0 0
#> 11 214 1N SO Cauliflower 5 8 0
#> 12 214 1N SO Unknown #1 0 0 0
#> 13 214 2N SO Broccoli 0 0 0
#> 14 214 2N SO Cauliflower 5 7 0
#> 15 214 2N SO Unknown #1 0 0 0
#> 16 214 3N SO Broccoli <NA> 0 0
#> 17 214 3N SO Cauliflower 5 9 0
#> 18 214 3N SO Unknown #1 0 0 0
原始回答:
library(unpivotr)
library(tidyverse)
download.file("https://www.dropbox.com/s/ac4dbahddmsomqp/format_question.csv?dl=1", tf<-tempfile(fileext = ".csv"))
df <- tf %>%
read_csv(col_names = FALSE) %>%
tidy_table() %>%
filter(!row %in% 2:3) %>%
behead("NNW", "a") %>%
behead("N", "b") %>%
unite("header", a, b) %>%
select(-data_type, -col) %>%
spread(header, chr) %>%
rename_at(vars(starts_with("Date")), ~sub("[^_]+_(.*)", "\\1", .x)) %>%
select(-row) %>% select(Frame:PlotCrop, everything())
glimpse(df)
# Observations: 6
# Variables: 12
# $ Frame <chr> "1N", "2N", "4N", "1N", "2N", "3N"
# $ Plot <chr> "114", "114", "114", "214", "214", "214"
# $ PlotCrop <chr> "SO", "SO", "SO", "SO", "SO", "SO"
# $ Broccoli_Cover <chr> NA, "0", "0", "0", "0", NA
# $ Broccoli_Density <chr> "1", "0", "0", "0", "0", "0"
# $ Broccoli_Height <chr> "0", "0", "0", "0", "0", "0"
# $ Cauliflower_Cover <chr> "5", "5", "2", "5", "5", "5"
# $ Cauliflower_Density <chr> "1", "1", "2", "8", "7", "9"
# $ Cauliflower_Height <chr> "0", "0", "0", "0", "0", "0"
# $ `Unknown #1_Cover` <chr> "0", "0", "0", "0", "0", "0"
# $ `Unknown #1_Density` <chr> "0", "0", "0", "0", "0", "0"
# $ `Unknown #1_Height` <chr> "0", "0", "0", "0", "0", "0"