我有想要将其转换为long的数据。
library(tidyverse)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
df <- structure(list(
julian_days = c(
127, 130, 132, 134, 137, 139,
141, 144, 148, 151, 153, 155, 158, 160, 162, 165, 167, 169, 172,
NA, NA, NA, NA, NA, NA, NA, NA, NA
), sea_ice_algae_last_cm = c(
0.636,
0.698, 0.666666666666667, 0.685384615384615, 0.713, 0.6375, 0.58375,
0.637272727272727, 0.6575, 0.691666666666667, 0.629166666666667,
0.637142857142857, 0.589166666666667, 0.56, 0.571818181818182,
0.492, 0.31, 0.312, 0.203076923076923, NA, NA, NA, NA, NA, NA,
NA, NA, NA
), sd = c(
0.0227058484879019, 0.0369684550213647, 0.0533853912601565,
0.0525381424324881, 0.0413790070231539, 0.0381682876458741, 0.0277788888666675,
0.0410099766132362, 0.0222076972732838, 0.0194079021706795, 0.0299873710792131,
0.0363841933236059, 0.0253908835942542, 0.055746679790749, 0.0604678727620178,
0.0294957624075053, 0.10770329614269, 0.0657267069006199, 0.0693282789084673,
NA, NA, NA, NA, NA, NA, NA, NA, NA
), julian_days_2 = c(
127, 130,
132, 134, 137, 139, 141, 144, 146, 148, 151, 153, 155, 158, 160,
162, 165, 167, 169, 172, 174, 176, 179, 181, 183, 186, 188, 190
), water_1_5_m_depth = c(
0.69, 0.5475, 0.596, 0.512, 0.598, 0.488333333333333,
0.27, 0.41, 0.568, 0.503333333333333, 0.668333333333333, 0.71,
0.636666666666667, 0.623333333333333, 0.66, 0.541666666666667,
0.57, 0.545, 0.501666666666667, 0.526666666666667, 0.566666666666667,
0.493333333333333, 0.59, 0.518333333333333, 0.443333333333333,
0.605, 0.58, 0.478333333333333
), sd_2 = c(
0.121655250605964,
0.0718215380880506, 0.0736885337077625, 0.0376828873628335, 0.084380092438916,
0.0636919670497516, 0.054037024344425, 0.0540370243444251, 0.0370135110466435,
0.0571547606649408, 0.0702614166286638, 0.0442718872423573, 0.0799166232186176,
0.0480277697448743, 0.0409878030638384, 0.0462240918425302, 0.0920869154657709,
0.0706399320497981, 0.0511533641774093, 0.100531918646103, 0.0186189867250252,
0.0588784057755188, 0.0841427358718512, 0.0934701378337842, 0.0492612085384298,
0.0653452370108182, 0.0878635305459549, 0.0851860708488579
),
water_10_m_depth = c(
0.66, 0.732, 0.595, 0.712, 0.514, 0.48,
0.35, 0.44, 0.535, 0.403333333333333, 0.728, 0.746, 0.625,
0.698333333333333, 0.705, 0.555, 0.585, 0.651666666666667,
0.603333333333333, 0.595, 0.615, 0.615, 0.658333333333333,
0.641666666666667, 0.623333333333333, 0.628333333333333,
0.661666666666667, 0.631666666666667
), sd_3 = c(
0, 0.0342052627529742,
0.0387298334620742, 0.0327108544675923, 0.0610737259384104,
0.0700000000000001, 0.127279220613579, 0.0972111104761177,
0.0564800849857717, 0.0504645089807343, 0.0540370243444252,
0.0415932686861709, 0.0809320702811933, 0.0475043857624395,
0.0398748040747538, 0.0568330889535313, 0.0388587184554509,
0.0204124145231932, 0.058878405775519, 0.0896102672688791,
0.0535723809439155, 0.0488876262463212, 0.043089055068157,
0.0306050104830347, 0.0527888877195444, 0.0708284312029193,
0.0426223728418147, 0.0348807492274272
), julian_days_3 = c(
134,
137, 139, 141, 146, 148, 153, 155, 160, 162, 165, 169, 172,
174, 176, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA
), water_40_m_depth = c(
0.523166666666667, 0.360833333333333,
0.279, 0.228, 0.551166666666667, 0.358666666666667, 0.593,
0.6225, 0.6665, 0.5468, 0.334714285714286, 0.654, 0.567666666666667,
0.664166666666667, 0.6345, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA
), sd_4 = c(
0.0793937445058905, 0.0346145441493408,
0.0834625664594612, 0.105740247777277, 0.0437008771841786,
0.0810719844747042, 0.0849529281425892, 0.0539620236833275,
0.0689514321823702, 0.0344992753547085, 0.0889713704621029,
0.064221491729794, 0.0166933120340652, 0.0545982295195244,
0.0578472125516865, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA
), julian_days_4 = c(
181, 183, 186, 188, 190, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA
), water_60_m_depth = c(
0.617833333333333,
0.492333333333333, 0.642166666666667, 0.7265, 0.686166666666667,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA
), sd_5 = c(
0.0574818812032684,
0.049766119666563, 0.0704540039079871, 0.0286618212959331,
0.0382225936674458, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
)
), row.names = c(
NA,
-28L
), class = c("tbl_df", "tbl", "data.frame"))
arrange(df, desc(julian_days_4)) # Look at the data at day 190
#> # A tibble: 28 x 14
#> julian_days sea_ice_algae_l… sd julian_days_2 water_1_5_m_dep…
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 137 0.713 0.0414 137 0.598
#> 2 134 0.685 0.0525 134 0.512
#> 3 132 0.667 0.0534 132 0.596
#> 4 130 0.698 0.0370 130 0.548
#> 5 127 0.636 0.0227 127 0.69
#> 6 139 0.638 0.0382 139 0.488
#> 7 141 0.584 0.0278 141 0.27
#> 8 144 0.637 0.0410 144 0.41
#> 9 148 0.658 0.0222 146 0.568
#> 10 151 0.692 0.0194 148 0.503
#> # … with 18 more rows, and 9 more variables: sd_2 <dbl>,
#> # water_10_m_depth <dbl>, sd_3 <dbl>, julian_days_3 <dbl>,
#> # water_40_m_depth <dbl>, sd_4 <dbl>, julian_days_4 <dbl>,
#> # water_60_m_depth <dbl>, sd_5 <dbl>
我想将所有这些“堆叠”成3列:
julian
,所有列均以“ julian”开头
measure
,所有列均以“水”或“海”开头
sd
,所有列均以“ sd”开头
请注意,在“水”列中,数字表示深度(例如:water_1_5_m_depth表示1.5 m)。
第一行的期望输出如下:
tibble(
julian = c(127, 127, 127, 134, 181),
type = c("sea", "water_1.5", "water_10", "water_40", "water_60"),
measure = c(0.64, 0.69, 0.66, 0.52, 0.62),
sd = c(0.02, 0.12, 0, 0.08, 0.06)
)
#> # A tibble: 5 x 4
#> julian type measure sd
#> <dbl> <chr> <dbl> <dbl>
#> 1 127 sea 0.64 0.02
#> 2 127 water_1.5 0.69 0.12
#> 3 127 water_10 0.66 0
#> 4 134 water_40 0.52 0.08
#> 5 181 water_60 0.62 0.06
到目前为止,我尝试的是data.table
。
melt(
setDT(df),
measure = patterns("^julian", "^sea", "^water_1_5", "^water_10", "^water_40", "^water_60", "^sd"),
value.name = c("julian", "sea", "water_1.5", "water_10", "water_40", "water_60", "sd")
)
#> variable julian sea water_1.5 water_10 water_40 water_60
#> 1: 1 127 0.6360000 0.6900 0.660 0.5231667 0.6178333
#> 2: 1 130 0.6980000 0.5475 0.732 0.3608333 0.4923333
#> 3: 1 132 0.6666667 0.5960 0.595 0.2790000 0.6421667
#> 4: 1 134 0.6853846 0.5120 0.712 0.2280000 0.7265000
#> 5: 1 137 0.7130000 0.5980 0.514 0.5511667 0.6861667
#> ---
#> 136: 5 NA NA NA NA NA NA
#> 137: 5 NA NA NA NA NA NA
#> 138: 5 NA NA NA NA NA NA
#> 139: 5 NA NA NA NA NA NA
#> 140: 5 NA NA NA NA NA NA
#> sd
#> 1: 0.02270585
#> 2: 0.03696846
#> 3: 0.05338539
#> 4: 0.05253814
#> 5: 0.04137901
#> ---
#> 136: NA
#> 137: NA
#> 138: NA
#> 139: NA
#> 140: NA
任何帮助表示赞赏。
更新:
这是我收到的文件。
由reprex package(v0.2.1)于2019-04-12创建
答案 0 :(得分:1)
如果您共享所需的输出,那就太好了。我想这就是你想要的:
df %>%
select(starts_with("julian")) %>%
gather(key = col, julian) %>%
bind_cols(df %>%
select(starts_with("water")) %>%
gather(col_water, measure)) %>%
#bind_cols(df %>%
# select(starts_with("sea")) %>%
# gather(col_sea, measure2)) %>%
bind_cols(df %>%
select(starts_with("sd")) %>%
gather(col_sd, sd)) %>%
select(julian, measure, sd)
julian measure sd
<dbl> <dbl> <dbl>
1 127 0.69 0.122
2 130 0.548 0.0718
3 132 0.596 0.0737
4 134 0.512 0.0377
5 137 0.598 0.0844
6 139 0.488 0.0637
7 141 0.27 0.0540
8 144 0.41 0.0540
9 148 0.568 0.0370
10 151 0.503 0.0572
# ... with 102 more rows
在这种尝试中,我没有包含以sea开头的变量,因为这会导致一对多合并。让我知道我是否在正确的方向上将其包括在内。
答案 1 :(得分:1)
library(tidyverse)
list_of_dfs <- split.default(df, rep(1:4, c(3, 5, 3, 3)))
list_of_dfs[[5]] <- list_of_dfs[[2]][, c(1, 4, 5)]
list_of_dfs[[2]] <- list_of_dfs[[2]][, 1:3]
list_of_dfs %>%
map(~ .[complete.cases(.), ]) %>%
map(~ mutate(., type = grep("^sea|^water", names(.), value = TRUE))) %>%
map(setNames, nm = c("julian", "measure", "sd", "type")) %>%
bind_rows()
# # A tibble: 95 x 4
# julian measure sd type
# <dbl> <dbl> <dbl> <chr>
# 1 127 0.636 0.0227 sea_ice_algae_last_cm
# 2 130 0.698 0.0370 sea_ice_algae_last_cm
# 3 132 0.667 0.0534 sea_ice_algae_last_cm
# 4 134 0.685 0.0525 sea_ice_algae_last_cm
# 5 137 0.713 0.0414 sea_ice_algae_last_cm
# 6 139 0.638 0.0382 sea_ice_algae_last_cm
# 7 141 0.584 0.0278 sea_ice_algae_last_cm
# 8 144 0.637 0.0410 sea_ice_algae_last_cm
# 9 148 0.658 0.0222 sea_ice_algae_last_cm
# 10 151 0.692 0.0194 sea_ice_algae_last_cm
# # … with 85 more rows
答案 2 :(得分:0)
data.table::melt(
df,
measure.vars = patterns("^julian"),
variable.name = "julian_variable",
value.name = "julian_value"
) %>%
data.table::melt(
measure.vars = patterns(measure = "^sea|^water"),
variable.name = "measure_variable",
value.name = "measure_value"
) %>%
data.table::melt(
measure.vars = patterns(measure = "^sd"),
variable.name = "sd_variable",
value.name = "sd_value"
)
# julian_variable julian_value measure_variable measure_value sd_variable sd_value
# 1: julian_days 127 sea_ice_algae_last_cm 0.6360000 sd 0.02270585
# 2: julian_days 130 sea_ice_algae_last_cm 0.6980000 sd 0.03696846
# 3: julian_days 132 sea_ice_algae_last_cm 0.6666667 sd 0.05338539
# 4: julian_days 134 sea_ice_algae_last_cm 0.6853846 sd 0.05253814
# 5: julian_days 137 sea_ice_algae_last_cm 0.7130000 sd 0.04137901
# ---
# 2796: julian_days_4 NA water_60_m_depth NA sd_5 NA
# 2797: julian_days_4 NA water_60_m_depth NA sd_5 NA
# 2798: julian_days_4 NA water_60_m_depth NA sd_5 NA
# 2799: julian_days_4 NA water_60_m_depth NA sd_5 NA
# 2800: julian_days_4 NA water_60_m_depth NA sd_5 NA
尽管不清楚所需的输出是什么。这种解决方案显然会导致很多重复(基本上,每个值重复100次!4个“ julian”列* 5个“ measure”列* 5个“ sd”列)。