我正在尝试将一些数据从一个数据帧重新排列到另一个数据帧。我认为我需要的解决方案将在plyr
包中,但我无法找到完整的解决方案。
概要
我有一系列 Transects 由不同数量的 Points 组成。每个 Transect 可以分为三个 Points 的非独立组,形成 Leg 。
输入数据我在每个 Transect 上给出了每个 Point 的坐标:
# Subset of Points data
structure(list(Transect = structure(c(73L, 73L, 73L, 73L, 73L, 73L, 72L, 72L, 72L, 72L, 72L, 72L, 23L, 23L, 23L, 14L, 14L, 14L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L),
.Label = c("B", "D", "E", "F", "G", "L1", "L2", "L3", "L4", "L5", "L9", "S101", "S105", "S109", "S116", "S117", "S118", "S119", "S121", "S122", "S123", "S124", "S125", "S126", "T001", "T002", "T003", "T004", "T006", "T007", "T008", "T009", "T010", "T011", "T012", "T013", "T014", "T015", "T016", "T017", "T018", "T019", "T022", "T023", "T024", "T026", "T028", "T029", "T030", "T031", "T032", "T033", "T035", "T039", "T040", "T043", "T049", "T050", "T051", "T056", "T060", "T061", "T062", "T063", "T065", "T066", "T067", "T068", "T072", "T073", "T074", "T075", "T076", "T077", "T078", "T079", "T082N", "T083", "T087", "T088", "T092", "T093", "T095", "T096", "T097"),
class = "factor"),
Point = c(1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21),
x = c(38.53, 38.53409, 38.53818, 38.53396, 38.52984, 38.53006, 38.45, 38.44936, 38.44942, 38.45324, 38.45743, 38.45382, 38.29102, 38.29013, 38.28935, 37.7798, 37.7803, 37.78109, 38.08238, 38.07932, 38.07534, 38.07143, 38.06737, 38.06339, 38.0596, 38.05605, 38.05261, 38.0489, 38.0444, 38.04113, 38.03668, 38.03237, 38.02786, 38.0234, 38.01895, 38.01524, 38.01481, 38.01465, 38.013),
y = c(4.23, 4.22811, 4.22622, 4.22465, 4.22281, 4.22553, 4.22, 4.22445, 4.22897, 4.22659, 4.22481, 4.22239, 5.37832, 5.37391, 5.36949, 5.0068, 5.01126, 5.0157, 4.95384, 4.95693, 4.95914, 4.96122, 4.96315, 4.96527, 4.96772, 4.97052, 4.97344, 4.97601, 4.97695, 4.97998, 4.98097, 4.98002, 4.97972, 4.98019, 4.98, 4.98272, 4.98715, 4.99165, 4.9958)),
.Names = c("Transect", "Point", "x", "y"),
row.names = c(NA, -39L),
class = "data.frame")
每个 Transect 中每个 Leg 的身份
# Subset of Legs IDs
structure(list(Transect = structure(c(73L, 73L, 73L, 72L, 72L, 72L, 23L, 14L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L),
.Label = c("B", "D", "E", "F", "G", "L1", "L2", "L3", "L4", "L5", "L9", "S101", "S105", "S109", "S116", "S117", "S118", "S119", "S121", "S122", "S123", "S124", "S125", "S126", "T001", "T002", "T003", "T004", "T006", "T007", "T008", "T009", "T010", "T011", "T012", "T013", "T014", "T015", "T016", "T017", "T018", "T019", "T022", "T023", "T024", "T026", "T028", "T029", "T030", "T031", "T032", "T033", "T035", "T039", "T040", "T043", "T049", "T050", "T051", "T056", "T060", "T061", "T062", "T063", "T065", "T066", "T067", "T068", "T072", "T073", "T074", "T075", "T076", "T077", "T078", "T079", "T082N", "T083", "T087", "T088", "T092", "T093", "T095", "T096", "T097"),
class = "factor"),
Leg = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 1L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L),
.Label = c("A-B", "B-C", "C-A", "C-D", "D-E", "E-F", "F-G", "G-H", "H-I", "I-J", "J-K"),
class = "factor")),
.Names = c("Transect", "Leg"),
row.names = c(NA, -18L),
class = "data.frame")
数据类型
所有带有6个点的 Transects 是三角形的(每个顶点和每边的中间是点),这样:
"Leg" == "A-B" contains "Points" == c(1,2,3)
"Leg" == "B-C" contains "Points" == c(3,4,5)
"Leg" == "C-A" contains "Points" == c(5,6,1)
所有其他 Transects 都是线性的,例如:
"Leg" == "A-B" contains "Points" == c(1,2,3)
"Leg" == "B-C" contains "Points" == c(3,4,5)
"Leg" == "C-D" contains "Points" == c(5,6,7)
"Leg" == "D-E" contains "Points" == c(7,8,9) etc.
解决数据部分(示例所需结果)
通过将两个输入数据帧子集化为单个 Transects ,我已设法为每种类型的 Transects 实现所需的输出:
# when length(tmp$Points)==6 (i.e. triangular sites)
tmp <- Points[Points$Transect=="T076",]
tmp2 <- Leg[Leg$Transect=="T076",]
for (i in 1:3) {
tmp2$Start_x[i] <- tmp$x[i+i-1]
tmp2$Start_y[i] <- tmp$y[i+i-1]
tmp2$Mid_x[i] <- tmp$x[i+i]
tmp2$Mid_y[i] <- tmp$y[i+i]
tmp2$End_x[i] <- ifelse(i==3,
tmp$x[1],
tmp$x[i+i+1])
tmp2$End_y[i] <- ifelse(i==3,
tmp$y[1],
tmp$y[i+i+1])
}
# when length(tmp$Points)!=6 (i.e. straight line sites)
tmp <- Points[Points$Transect=="L2",]
tmp2 <- Leg[Leg$Transect=="L2",]
for (i in 1:round((length(tmp$Point)-1)/2)) {
tmp2$Start_x[i] <- tmp$x[i+i-1]
tmp2$Start_y[i] <- tmp$y[i+i-1]
tmp2$Mid_x[i] <- tmp$x[i+i]
tmp2$Mid_y[i] <- tmp$y[i+i]
tmp2$End_x[i] <- tmp$x[i+i+1]
tmp2$End_y[i] <- tmp$y[i+i+1]
}
在我看来,应该可以使用ddply
和d_ply
等功能的组合来分割每个 Transect 的完整数据帧,并应用相关的代码,并为每个 Leg 返回 Leg 数据帧,其中包含“开始”,“中间”和“结束”,“x”和“y”的新列。< / p>
但是我尝试这样做会返回错误,部分原因是:
a)我无法让ifelse
从线性的(带有任意数量的点)对三角 Transects (有6个点)进行排序
b)我无法使plyr
函数的组合正确。
返回错误的代码示例
library(plyr)
d_ply(BTVs, "Transect", function(a)
ddply(Leg.points, "Transect", function(b)
ifelse(length(a$Point)==6,
# when == 6 (i.e. triangular sites)
for (i in 1:3) {
b$Start_x[i] <- a$x[i+i-1]
b$Start_y[i] <- a$y[i+i-1]
b$Mid_x[i] <- a$x[i+i]
b$Mid_y[i] <- a$y[i+i]
b$End_x[i] <- ifelse(i==3,
a$x[1],
a$x[i+i+1])
b$End_y[i] <- ifelse(i==3,
a$x[1],
a$y[i+i+1])},
# when != 6 (i.e. straight line sites)
for (i in 1:round((length(a$Point)-1)/2)) {
b$Start_x[i] <- a$x[i+i-1]
b$Start_y[i] <- a$y[i+i-1]
b$Mid_x[i] <- a$x[i+i]
b$Mid_y[i] <- a$y[i+i]
b$End_x[i] <- a$x[i+i+1]
b$End_y[i] <- a$y[i+i+1]
})))
有人可以帮忙吗?提前谢谢!
答案 0 :(得分:1)
Start from the work that you have already done and wrap the loops in a function.
start_mid_end <- function(point) {
tmp <- Points[Points$Transect==point,]
tmp2 <- Leg[Leg$Transect==point,]
if(nrow(tmp) == 6) {
for (i in 1:3) {
tmp2$Start_x[i] <- tmp$x[i+i-1]
tmp2$Start_y[i] <- tmp$y[i+i-1]
tmp2$Mid_x[i] <- tmp$x[i+i]
tmp2$Mid_y[i] <- tmp$y[i+i]
tmp2$End_x[i] <- ifelse(i==3,
tmp$x[1],
tmp$x[i+i+1])
tmp2$End_y[i] <- ifelse(i==3,
tmp$x[1],
tmp$y[i+i+1])
}
tmp2
} else {
for (i in 1:round((length(tmp$Point)-1)/2)) {
tmp2$Start_x[i] <- tmp$x[i+i-1]
tmp2$Start_y[i] <- tmp$y[i+i-1]
tmp2$Mid_x[i] <- tmp$x[i+i]
tmp2$Mid_y[i] <- tmp$y[i+i]
tmp2$End_x[i] <- tmp$x[i+i+1]
tmp2$End_y[i] <- tmp$y[i+i+1]
}
tmp2
}
}
Now you can just add a Transect point and it will give you the breakdown:
start_mid_end("T076")
# Transect Leg Start_x Start_y Mid_x Mid_y End_x End_y
#1 T076 A-B 38.53000 4.23000 38.53409 4.22811 38.53818 4.22622
#2 T076 B-C 38.53818 4.22622 38.53396 4.22465 38.52984 4.22281
#3 T076 C-A 38.52984 4.22281 38.53006 4.22553 38.53000 38.53000
If you would like all of them at once:
points <- as.character(unique(Points$Transect))
do.call(rbind,lapply(points, start_mid_end))
# Transect Leg Start_x Start_y Mid_x Mid_y End_x End_y
# 1 T076 A-B 38.53000 4.23000 38.53409 4.22811 38.53818 4.22622
# 2 T076 B-C 38.53818 4.22622 38.53396 4.22465 38.52984 4.22281
# 3 T076 C-A 38.52984 4.22281 38.53006 4.22553 38.53000 38.53000
# 4 T075 A-B 38.45000 4.22000 38.44936 4.22445 38.44942 4.22897
# 5 T075 B-C 38.44942 4.22897 38.45324 4.22659 38.45743 4.22481
# 6 T075 C-A 38.45743 4.22481 38.45382 4.22239 38.45000 38.45000
# 7 S125 A-B 38.29102 5.37832 38.29013 5.37391 38.28935 5.36949
# 8 S109 A-B 37.77980 5.00680 37.78030 5.01126 37.78109 5.01570
# 9 L2 A-B 38.08238 4.95384 38.07932 4.95693 38.07534 4.95914
# 10 L2 B-C 38.07534 4.95914 38.07143 4.96122 38.06737 4.96315