如何通过R中图的类似坐标对数据帧进行子集化?

时间:2014-07-26 20:24:18

标签: r

问题

下图表示一个振荡循环,其中当坐标(x = relative.v,y = gap.dist)近似相似时,至少有2个实例: enter image description here

该图基于以下数据框:

> dput(df)
structure(list(Vehicle.ID = c(3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L
), Frame.ID = 8931:9277, gap.dist = c(14.26, 14.25, 14.18, 14.38, 
14.73, 15.34, 15.75, 16.07, 16.14, 16.28, 16.42, 16.58, 16.71, 
16.77, 16.75, 16.73, 16.78, 16.83, 16.77, 16.56, 16.28, 16.09, 
16.02, 16.03, 16.06, 16.08, 16.02, 15.8, 15.48, 15.08, 14.79, 
14.66, 14.76, 15, 15.29, 15.58, 15.86, 16.13, 16.4, 16.69, 17.01, 
17.39, 17.77, 18.18, 18.63, 19.1, 19.55, 19.87, 20.06, 20.14, 
20.06, 19.83, 19.52, 19.47, 19.57, 19.85, 20.24, 20.56, 20.76, 
20.8, 20.78, 20.82, 21.01, 21.31, 21.76, 22.27, 22.78, 23.24, 
23.81, 24.54, 25.36, 25.93, 26.35, 26.58, 26.63, 26.63, 26.61, 
26.6, 26.61, 26.63, 26.68, 26.76, 26.84, 26.88, 26.88, 26.86, 
26.86, 26.87, 26.84, 26.69, 26.39, 25.98, 25.55, 25.22, 25.05, 
24.96, 24.83, 24.61, 24.37, 24.19, 24.09, 24.03, 23.94, 23.81, 
23.6, 23.36, 23.14, 23, 22.96, 22.93, 22.85, 22.89, 23.09, 23.42, 
23.96, 24.47, 24.98, 25.48, 25.99, 26.49, 26.93, 27.29, 27.58, 
27.84, 28.12, 28.35, 28.5, 28.52, 28.49, 28.49, 28.49, 28.51, 
28.51, 28.46, 28.26, 27.94, 27.49, 27.04, 26.69, 26.46, 26.29, 
26.15, 26.05, 25.95, 25.77, 25.46, 25.01, 24.51, 23.99, 23.48, 
22.97, 22.46, 22.01, 21.71, 21.52, 21.46, 21.47, 21.5, 21.5, 
21.46, 21.35, 21.2, 21, 20.82, 20.73, 20.72, 20.77, 20.81, 20.83, 
20.84, 20.87, 20.93, 20.99, 20.98, 20.81, 20.51, 20.09, 19.5, 
18.84, 18.22, 17.67, 17.2, 16.76, 16.31, 15.85, 15.37, 14.87, 
14.4, 14.09, 13.92, 13.89, 14.04, 14.27, 14.44, 14.52, 14.53, 
14.51, 14.49, 14.48, 14.54, 14.73, 15.03, 15.48, 15.99, 16.51, 
17.01, 17.51, 18.01, 18.51, 19, 19.49, 19.97, 20.49, 21, 21.42, 
21.8, 22.2, 22.68, 23.22, 23.72, 24.16, 24.46, 24.64, 24.7, 24.71, 
24.69, 24.7, 24.79, 24.91, 24.99, 25, 25.04, 25.23, 25.54, 25.98, 
26.47, 26.96, 27.42, 27.88, 28.31, 28.71, 29.13, 29.6, 30.1, 
30.67, 31.22, 31.74, 32.22, 32.7, 33.18, 33.65, 34.14, 34.63, 
35.1, 35.48, 35.77, 35.93, 36.01, 36.02, 36.01, 35.99, 35.98, 
35.97, 36.03, 36.22, 36.53, 36.97, 37.48, 38, 38.5, 39, 39.5, 
40, 40.5, 41, 41.5, 42.02, 42.52, 42.95, 43.27, 43.55, 43.9, 
44.32, 44.71, 45.04, 45.32, 45.68, 46.13, 46.64, 47.15, 47.66, 
48.16, 48.66, 49.14, 49.59, 50.11, 50.76, 51.53, 52.39, 53.23, 
53.89, 54.33, 54.59, 54.69, 54.67, 54.65, 54.67, 54.63, 54.41, 
53.99, 53.32, 52.45, 51.37, 50.34, 49.58, 49.04, 48.75, 48.59, 
48.45, 48.33, 48.29, 48.35, 48.44, 48.5, 48.51, 48.5, 48.5, 48.5, 
48.5, 48.48, 48.5, 48.59, 48.7, 48.79, 48.81, 48.79, 48.79, 48.81, 
48.81, 48.76, 48.66, 48.48, 48.2, 47.9, 47.66, 47.53, 47.45), 
    relative.v = c(-0.231958711994036, -0.358786482018626, -0.493043858010829, 
    -0.603994747669997, -0.6834873406704, -0.704230725717888, 
    -0.683630108352705, -0.641990970504217, -0.605163689204243, 
    -0.567161947335347, -0.529429778626142, -0.482298368556108, 
    -0.42647331081773, -0.369173963973488, -0.317864145436122, 
    -0.27215092988564, -0.222271462250546, -0.165391992820446, 
    -0.108707037952307, -0.0675612213625278, -0.0477147206681607, 
    -0.0414454276080818, -0.0476618292889057, -0.067647250925063, 
    -0.0952135763957465, -0.132233210872627, -0.169032614951778, 
    -0.224558129705247, -0.305892089806985, -0.433192920594433, 
    -0.597058648138646, -0.784350388317396, -0.973527586046128, 
    -1.14462631936783, -1.29678622625557, -1.42761898722476, 
    -1.54906644175957, -1.6650595720809, -1.78209545480779, -1.88940628918218, 
    -1.97722871863032, -2.04225189672011, -2.08546695327304, 
    -2.1044860500428, -2.09796964248165, -2.06190787822558, -1.99844917265652, 
    -1.92193672217891, -1.84742373905972, -1.78780164244319, 
    -1.75565255090781, -1.7629350542013, -1.81635437869936, -1.89059599593372, 
    -1.97208540466579, -2.04481870206861, -2.09849716157203, 
    -2.14227073017713, -2.18512621990102, -2.24263101599094, 
    -2.31718949145358, -2.40514550573653, -2.49340510959946, 
    -2.57436989675751, -2.63544848011734, -2.6689901513396, -2.67350192589669, 
    -2.65699261834573, -2.608778757795, -2.51116460703307, -2.35312028472961, 
    -2.15792890969266, -1.93944582008588, -1.71401090565721, 
    -1.49897671478181, -1.30013098178194, -1.11987964932729, 
    -0.958163046295219, -0.809121425603362, -0.667803469072645, 
    -0.534495948291298, -0.405846360762691, -0.278032061203845, 
    -0.147624784135722, -0.0159661005255529, 0.116484810737678, 
    0.250849865326245, 0.384569960495014, 0.513832535556414, 
    0.628179368957984, 0.717238699454732, 0.772469483794143, 
    0.794081521859322, 0.790585433247973, 0.779778716786936, 
    0.769281747503701, 0.75462248893114, 0.723104131773244, 0.674524493111942, 
    0.616240015693645, 0.553447905749508, 0.481734241451385, 
    0.402808385044558, 0.311768591287493, 0.201557809654801, 
    0.070410636092415, -0.0823411571070167, -0.252235867028997, 
    -0.430192410178485, -0.616230547265296, -0.816932548764832, 
    -1.02128173816849, -1.21482132000639, -1.38472075536161, 
    -1.50943429334364, -1.59116024170469, -1.63280226450591, 
    -1.63605837201801, -1.60185887314092, -1.5328995122303, -1.43474942568578, 
    -1.31516263316973, -1.1799595548971, -1.02883334458002, -0.857296537717993, 
    -0.668656397373574, -0.472193386647078, -0.279835637875358, 
    -0.092896649784354, 0.097145191412725, 0.292469758233477, 
    0.495218389282222, 0.703194412307631, 0.909770485345668, 
    1.10146639181104, 1.26832925378148, 1.40180248390486, 1.50499863779708, 
    1.58976612056803, 1.66550728596222, 1.73760118083855, 1.81589554303689, 
    1.90721508935673, 2.01376994777457, 2.12983243693232, 2.23652373309107, 
    2.32254626761092, 2.38054357596528, 2.40810045535242, 2.40394926714677, 
    2.36686870916879, 2.29766870686011, 2.20318990716965, 2.09813260968445, 
    1.99570702750249, 1.90736988262587, 1.83720289917554, 1.78161187758373, 
    1.73983055853244, 1.70927792570621, 1.67988794712524, 1.64888910606142, 
    1.61361662041016, 1.57650953468424, 1.54372787913957, 1.5244314606538, 
    1.52330649732676, 1.54270508686956, 1.58143977064381, 1.636473487204, 
    1.70950429045165, 1.8033824724133, 1.92033405775614, 2.05384342976532, 
    2.18750215114725, 2.30790921788468, 2.40321870289172, 2.45883788394123, 
    2.47015967697281, 2.44077592149473, 2.37892289121748, 2.29041335774065, 
    2.18002829826845, 2.04966058535416, 1.89526484626084, 1.71171242741373, 
    1.49132081196869, 1.23396310133448, 0.95500954396001, 0.667632997103173, 
    0.38512077599491, 0.123847240601854, -0.110820120876213, 
    -0.325945037178951, -0.532885924719711, -0.742469835797991, 
    -0.962562120790832, -1.19596460888606, -1.44178959331716, 
    -1.69506549562133, -1.94534305483521, -2.18614411237883, 
    -2.40568264717123, -2.60218120721895, -2.77649129664741, 
    -2.92915604423611, -3.06183666455997, -3.17633945440348, 
    -3.27075283510883, -3.34427994841041, -3.39786295278141, 
    -3.43425773685353, -3.45015093218111, -3.44982594478056, 
    -3.43952908619572, -3.42325877405017, -3.3990465071641, -3.35731024625162, 
    -3.28947270437373, -3.19835930400911, -3.09107293257786, 
    -2.98055307462963, -2.88021428074092, -2.80276150187654, 
    -2.75309025614095, -2.73325794972305, -2.7383769416982, -2.75955864571761, 
    -2.79454299208424, -2.84780817145616, -2.92526145550618, 
    -3.02606844521819, -3.13610028523395, -3.24823094670719, 
    -3.35088783781275, -3.44130572873659, -3.51962222582804, 
    -3.58702568846841, -3.64477003478028, -3.69595013747865, 
    -3.74444364567029, -3.78928382057047, -3.82385598846015, 
    -3.84705287468454, -3.8533823819126, -3.84413871661519, -3.8208728402198, 
    -3.78328712769685, -3.73161338369486, -3.66873492116964, 
    -3.59810850476349, -3.51953651265736, -3.42816920472332, 
    -3.32424549358071, -3.21433654038894, -3.1106716969322, -3.02284837678458, 
    -2.95792586222043, -2.92245560276176, -2.92011630212311, 
    -2.95123964026695, -3.01418271871464, -3.10488014168443, 
    -3.21901482408042, -3.34577526567369, -3.47824389913844, 
    -3.60475459912302, -3.72016390758787, -3.82157566853097, 
    -3.90720269237743, -3.97714463557551, -4.0317774412849, -4.070071700155, 
    -4.09450454867099, -4.10700019098849, -4.11024913380821, 
    -4.10123489846076, -4.07776077086166, -4.04575609527759, 
    -4.0141144656488, -3.98773564407337, -3.95746943231781, -3.92304814843235, 
    -3.89232130775218, -3.87372958711487, -3.87265757794753, 
    -3.87838887453369, -3.87841365989132, -3.86578511177274, 
    -3.84037936211251, -3.8029346932891, -3.75199765045956, -3.6847316629871, 
    -3.60155934204821, -3.50339422489437, -3.38352798832636, 
    -3.22655437552623, -3.02125418814082, -2.75647914196206, 
    -2.43327481949344, -2.06824701900987, -1.68047086696324, 
    -1.28213769516684, -0.886240220676704, -0.500970059658314, 
    -0.12444000017868, 0.250991728630396, 0.623774616743511, 
    0.977709952219271, 1.29524721749485, 1.55606175102286, 1.74531832376017, 
    1.83966118036697, 1.84292431839432, 1.77940743062681, 1.67393618451536, 
    1.55755710494071, 1.44205554045755, 1.32670507088405, 1.20789781376152, 
    1.09171590771403, 0.987997471256861, 0.901897811783634, 0.829621803641423, 
    0.762741121859925, 0.699109063115515, 0.639246070171836, 
    0.583762561480121, 0.534201502943503, 0.487081766112105, 
    0.442650388764974, 0.407905770403289, 0.388964336776255, 
    0.381774987131507, 0.378025230385383, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA)), .Names = c("Vehicle.ID", "Frame.ID", 
"gap.dist", "relative.v"), class = "data.frame", row.names = 421135:421481)

用于剧情的代码:

ggplot(data= df, mapping = aes(x=relative.v, y=gap.dist)) + geom_point() 

此图仅适用于1辆车,大约有1300辆车。我想找到相似的坐标并将它们的位置和所有行标记为" oss"即振荡的一部分。此外,我想要对这个数据框进行子集化,以便所有那些没有相似坐标的车辆,即振荡回路被过滤掉。

样本数据框

我在下面的示例数据上测试了一些代码:

my.df <- data.frame(vehid = rep(c(2,3), each = 10),
                    frameid = rep(1:10,2),
                    relative.v = c(50,52,53,55,48,45,40,47,50,51, 50:59),
                    gap.dist = c(10,12,13,15,9,7,6,9,10,10, 15:24 ))

我尝试了什么

我通过逐步解决问题达到了最终结果,但我确信这不是最有效的方法,必须有更快的方法来做到这一点。

# Duplicated Relative Velocity
my.df <- ddply(my.df, .(vehid, relative.v), transform, dup = duplicated(relative.v), dup2 = duplicated(gap.dist))

#Labelling the similar coordinates
my.df <- ddply(my.df, .(vehid), transform, oss = ifelse(dup==T & dup2==T, 'oss', '.'))

# Labelling the first occurrence of similar coordinates
my.df[which(my.df$dup & my.df$dup2)-1,]$oss <- 'oss'

# Ordering by frameid
my.df <- ddply(my.df, .(vehid), function(x) x[with(x, order(frameid)),])

# Filtering out the vehicles with no oscillation
my.df <- subset(my.df, !ave(my.df$oss!='oss', my.df$vehid, FUN=all))

问题

我的代码适用于示例数据框,但对于df它不起作用。显然,最后一步不适用于df,而且这不是主要问题。我想在oss中创建类似于df的{​​{1}}列,该列标记了大约相同my.dfgap.dist的2行。请帮忙!

1 个答案:

答案 0 :(得分:2)

如果你想要速度,我认为data.table是要走的路。特别是因为你有这么多团体来做这件事。

#Bring in data.table and set my.df as data.table
require(data.table)
setDT(my.df)

#Label is oss or not if there is a duplicated
my.df[ , oss  := 1*(.N > 1), by=list(vehid,relative.v,gap.dist)]

#Only keep vehicle ids with a duplicate
my.df[ , keep  := max(oss), by=vehid]
my.df <- my.df[keep!=0]

#Find the starting and stopping points
my.df[oss==1 , min.frameid := min(frameid), by=vehid] 
my.df[ , min.frameid := min(min.frameid,na.rm=T), by=vehid] 
my.df[ , max.frameid := max(frameid*oss), by=vehid] 

#Make oss 1 in between all stopping points
my.df[ , oss := 1*(between(frameid,min.frameid,max.frameid)), by=vehid] 

由于OP的变化而编辑:

您发布的其他数据似乎暗示您并不真正拥有relative.v和gap.dist的重复项,但您有近似重复项。您可能希望尝试按最近的差异对数据进行排序,并找出您愿意说这些点是相同的差异阈值。

#Sort data and look at absolute difference between closest points
my.df <- my.df[order(Vehicle.ID,relative.v,gap.dist)]
my.df[ , gap.dist.diff := c(NA,abs(diff(gap.dist))), by= Vehicle.ID]
my.df[ , relative.v.diff := c(NA,abs(diff(relative.v))), by= Vehicle.ID]

后续步骤:

#Give rank order to data
my.df[ , newID := .I]

#Sort data other way
my.df <- my.df[order(Vehicle.ID,relative.v,gap.dist,decreasing=T)]

#Change values if within threshold
my.df[ , relative.v2 := ifelse(gap.dist.diff < 0.5 & relative.v.diff<0.2, relative.v[ID+1],relative.v)]

my.df[ , gap.dist2 := ifelse(gap.dist.diff < 0.5 & relative.v.diff<0.2, gap.dist[ID+1],gap.dist)]