下图表示一个振荡循环,其中当坐标(x = relative.v,y = gap.dist)近似相似时,至少有2个实例:
该图基于以下数据框:
> dput(df)
structure(list(Vehicle.ID = c(3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L,
3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L, 3002L
), Frame.ID = 8931:9277, gap.dist = c(14.26, 14.25, 14.18, 14.38,
14.73, 15.34, 15.75, 16.07, 16.14, 16.28, 16.42, 16.58, 16.71,
16.77, 16.75, 16.73, 16.78, 16.83, 16.77, 16.56, 16.28, 16.09,
16.02, 16.03, 16.06, 16.08, 16.02, 15.8, 15.48, 15.08, 14.79,
14.66, 14.76, 15, 15.29, 15.58, 15.86, 16.13, 16.4, 16.69, 17.01,
17.39, 17.77, 18.18, 18.63, 19.1, 19.55, 19.87, 20.06, 20.14,
20.06, 19.83, 19.52, 19.47, 19.57, 19.85, 20.24, 20.56, 20.76,
20.8, 20.78, 20.82, 21.01, 21.31, 21.76, 22.27, 22.78, 23.24,
23.81, 24.54, 25.36, 25.93, 26.35, 26.58, 26.63, 26.63, 26.61,
26.6, 26.61, 26.63, 26.68, 26.76, 26.84, 26.88, 26.88, 26.86,
26.86, 26.87, 26.84, 26.69, 26.39, 25.98, 25.55, 25.22, 25.05,
24.96, 24.83, 24.61, 24.37, 24.19, 24.09, 24.03, 23.94, 23.81,
23.6, 23.36, 23.14, 23, 22.96, 22.93, 22.85, 22.89, 23.09, 23.42,
23.96, 24.47, 24.98, 25.48, 25.99, 26.49, 26.93, 27.29, 27.58,
27.84, 28.12, 28.35, 28.5, 28.52, 28.49, 28.49, 28.49, 28.51,
28.51, 28.46, 28.26, 27.94, 27.49, 27.04, 26.69, 26.46, 26.29,
26.15, 26.05, 25.95, 25.77, 25.46, 25.01, 24.51, 23.99, 23.48,
22.97, 22.46, 22.01, 21.71, 21.52, 21.46, 21.47, 21.5, 21.5,
21.46, 21.35, 21.2, 21, 20.82, 20.73, 20.72, 20.77, 20.81, 20.83,
20.84, 20.87, 20.93, 20.99, 20.98, 20.81, 20.51, 20.09, 19.5,
18.84, 18.22, 17.67, 17.2, 16.76, 16.31, 15.85, 15.37, 14.87,
14.4, 14.09, 13.92, 13.89, 14.04, 14.27, 14.44, 14.52, 14.53,
14.51, 14.49, 14.48, 14.54, 14.73, 15.03, 15.48, 15.99, 16.51,
17.01, 17.51, 18.01, 18.51, 19, 19.49, 19.97, 20.49, 21, 21.42,
21.8, 22.2, 22.68, 23.22, 23.72, 24.16, 24.46, 24.64, 24.7, 24.71,
24.69, 24.7, 24.79, 24.91, 24.99, 25, 25.04, 25.23, 25.54, 25.98,
26.47, 26.96, 27.42, 27.88, 28.31, 28.71, 29.13, 29.6, 30.1,
30.67, 31.22, 31.74, 32.22, 32.7, 33.18, 33.65, 34.14, 34.63,
35.1, 35.48, 35.77, 35.93, 36.01, 36.02, 36.01, 35.99, 35.98,
35.97, 36.03, 36.22, 36.53, 36.97, 37.48, 38, 38.5, 39, 39.5,
40, 40.5, 41, 41.5, 42.02, 42.52, 42.95, 43.27, 43.55, 43.9,
44.32, 44.71, 45.04, 45.32, 45.68, 46.13, 46.64, 47.15, 47.66,
48.16, 48.66, 49.14, 49.59, 50.11, 50.76, 51.53, 52.39, 53.23,
53.89, 54.33, 54.59, 54.69, 54.67, 54.65, 54.67, 54.63, 54.41,
53.99, 53.32, 52.45, 51.37, 50.34, 49.58, 49.04, 48.75, 48.59,
48.45, 48.33, 48.29, 48.35, 48.44, 48.5, 48.51, 48.5, 48.5, 48.5,
48.5, 48.48, 48.5, 48.59, 48.7, 48.79, 48.81, 48.79, 48.79, 48.81,
48.81, 48.76, 48.66, 48.48, 48.2, 47.9, 47.66, 47.53, 47.45),
relative.v = c(-0.231958711994036, -0.358786482018626, -0.493043858010829,
-0.603994747669997, -0.6834873406704, -0.704230725717888,
-0.683630108352705, -0.641990970504217, -0.605163689204243,
-0.567161947335347, -0.529429778626142, -0.482298368556108,
-0.42647331081773, -0.369173963973488, -0.317864145436122,
-0.27215092988564, -0.222271462250546, -0.165391992820446,
-0.108707037952307, -0.0675612213625278, -0.0477147206681607,
-0.0414454276080818, -0.0476618292889057, -0.067647250925063,
-0.0952135763957465, -0.132233210872627, -0.169032614951778,
-0.224558129705247, -0.305892089806985, -0.433192920594433,
-0.597058648138646, -0.784350388317396, -0.973527586046128,
-1.14462631936783, -1.29678622625557, -1.42761898722476,
-1.54906644175957, -1.6650595720809, -1.78209545480779, -1.88940628918218,
-1.97722871863032, -2.04225189672011, -2.08546695327304,
-2.1044860500428, -2.09796964248165, -2.06190787822558, -1.99844917265652,
-1.92193672217891, -1.84742373905972, -1.78780164244319,
-1.75565255090781, -1.7629350542013, -1.81635437869936, -1.89059599593372,
-1.97208540466579, -2.04481870206861, -2.09849716157203,
-2.14227073017713, -2.18512621990102, -2.24263101599094,
-2.31718949145358, -2.40514550573653, -2.49340510959946,
-2.57436989675751, -2.63544848011734, -2.6689901513396, -2.67350192589669,
-2.65699261834573, -2.608778757795, -2.51116460703307, -2.35312028472961,
-2.15792890969266, -1.93944582008588, -1.71401090565721,
-1.49897671478181, -1.30013098178194, -1.11987964932729,
-0.958163046295219, -0.809121425603362, -0.667803469072645,
-0.534495948291298, -0.405846360762691, -0.278032061203845,
-0.147624784135722, -0.0159661005255529, 0.116484810737678,
0.250849865326245, 0.384569960495014, 0.513832535556414,
0.628179368957984, 0.717238699454732, 0.772469483794143,
0.794081521859322, 0.790585433247973, 0.779778716786936,
0.769281747503701, 0.75462248893114, 0.723104131773244, 0.674524493111942,
0.616240015693645, 0.553447905749508, 0.481734241451385,
0.402808385044558, 0.311768591287493, 0.201557809654801,
0.070410636092415, -0.0823411571070167, -0.252235867028997,
-0.430192410178485, -0.616230547265296, -0.816932548764832,
-1.02128173816849, -1.21482132000639, -1.38472075536161,
-1.50943429334364, -1.59116024170469, -1.63280226450591,
-1.63605837201801, -1.60185887314092, -1.5328995122303, -1.43474942568578,
-1.31516263316973, -1.1799595548971, -1.02883334458002, -0.857296537717993,
-0.668656397373574, -0.472193386647078, -0.279835637875358,
-0.092896649784354, 0.097145191412725, 0.292469758233477,
0.495218389282222, 0.703194412307631, 0.909770485345668,
1.10146639181104, 1.26832925378148, 1.40180248390486, 1.50499863779708,
1.58976612056803, 1.66550728596222, 1.73760118083855, 1.81589554303689,
1.90721508935673, 2.01376994777457, 2.12983243693232, 2.23652373309107,
2.32254626761092, 2.38054357596528, 2.40810045535242, 2.40394926714677,
2.36686870916879, 2.29766870686011, 2.20318990716965, 2.09813260968445,
1.99570702750249, 1.90736988262587, 1.83720289917554, 1.78161187758373,
1.73983055853244, 1.70927792570621, 1.67988794712524, 1.64888910606142,
1.61361662041016, 1.57650953468424, 1.54372787913957, 1.5244314606538,
1.52330649732676, 1.54270508686956, 1.58143977064381, 1.636473487204,
1.70950429045165, 1.8033824724133, 1.92033405775614, 2.05384342976532,
2.18750215114725, 2.30790921788468, 2.40321870289172, 2.45883788394123,
2.47015967697281, 2.44077592149473, 2.37892289121748, 2.29041335774065,
2.18002829826845, 2.04966058535416, 1.89526484626084, 1.71171242741373,
1.49132081196869, 1.23396310133448, 0.95500954396001, 0.667632997103173,
0.38512077599491, 0.123847240601854, -0.110820120876213,
-0.325945037178951, -0.532885924719711, -0.742469835797991,
-0.962562120790832, -1.19596460888606, -1.44178959331716,
-1.69506549562133, -1.94534305483521, -2.18614411237883,
-2.40568264717123, -2.60218120721895, -2.77649129664741,
-2.92915604423611, -3.06183666455997, -3.17633945440348,
-3.27075283510883, -3.34427994841041, -3.39786295278141,
-3.43425773685353, -3.45015093218111, -3.44982594478056,
-3.43952908619572, -3.42325877405017, -3.3990465071641, -3.35731024625162,
-3.28947270437373, -3.19835930400911, -3.09107293257786,
-2.98055307462963, -2.88021428074092, -2.80276150187654,
-2.75309025614095, -2.73325794972305, -2.7383769416982, -2.75955864571761,
-2.79454299208424, -2.84780817145616, -2.92526145550618,
-3.02606844521819, -3.13610028523395, -3.24823094670719,
-3.35088783781275, -3.44130572873659, -3.51962222582804,
-3.58702568846841, -3.64477003478028, -3.69595013747865,
-3.74444364567029, -3.78928382057047, -3.82385598846015,
-3.84705287468454, -3.8533823819126, -3.84413871661519, -3.8208728402198,
-3.78328712769685, -3.73161338369486, -3.66873492116964,
-3.59810850476349, -3.51953651265736, -3.42816920472332,
-3.32424549358071, -3.21433654038894, -3.1106716969322, -3.02284837678458,
-2.95792586222043, -2.92245560276176, -2.92011630212311,
-2.95123964026695, -3.01418271871464, -3.10488014168443,
-3.21901482408042, -3.34577526567369, -3.47824389913844,
-3.60475459912302, -3.72016390758787, -3.82157566853097,
-3.90720269237743, -3.97714463557551, -4.0317774412849, -4.070071700155,
-4.09450454867099, -4.10700019098849, -4.11024913380821,
-4.10123489846076, -4.07776077086166, -4.04575609527759,
-4.0141144656488, -3.98773564407337, -3.95746943231781, -3.92304814843235,
-3.89232130775218, -3.87372958711487, -3.87265757794753,
-3.87838887453369, -3.87841365989132, -3.86578511177274,
-3.84037936211251, -3.8029346932891, -3.75199765045956, -3.6847316629871,
-3.60155934204821, -3.50339422489437, -3.38352798832636,
-3.22655437552623, -3.02125418814082, -2.75647914196206,
-2.43327481949344, -2.06824701900987, -1.68047086696324,
-1.28213769516684, -0.886240220676704, -0.500970059658314,
-0.12444000017868, 0.250991728630396, 0.623774616743511,
0.977709952219271, 1.29524721749485, 1.55606175102286, 1.74531832376017,
1.83966118036697, 1.84292431839432, 1.77940743062681, 1.67393618451536,
1.55755710494071, 1.44205554045755, 1.32670507088405, 1.20789781376152,
1.09171590771403, 0.987997471256861, 0.901897811783634, 0.829621803641423,
0.762741121859925, 0.699109063115515, 0.639246070171836,
0.583762561480121, 0.534201502943503, 0.487081766112105,
0.442650388764974, 0.407905770403289, 0.388964336776255,
0.381774987131507, 0.378025230385383, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA)), .Names = c("Vehicle.ID", "Frame.ID",
"gap.dist", "relative.v"), class = "data.frame", row.names = 421135:421481)
ggplot(data= df, mapping = aes(x=relative.v, y=gap.dist)) + geom_point()
此图仅适用于1辆车,大约有1300辆车。我想找到相似的坐标并将它们的位置和所有行标记为" oss"即振荡的一部分。此外,我想要对这个数据框进行子集化,以便所有那些没有相似坐标的车辆,即振荡回路被过滤掉。
我在下面的示例数据上测试了一些代码:
my.df <- data.frame(vehid = rep(c(2,3), each = 10),
frameid = rep(1:10,2),
relative.v = c(50,52,53,55,48,45,40,47,50,51, 50:59),
gap.dist = c(10,12,13,15,9,7,6,9,10,10, 15:24 ))
我通过逐步解决问题达到了最终结果,但我确信这不是最有效的方法,必须有更快的方法来做到这一点。
# Duplicated Relative Velocity
my.df <- ddply(my.df, .(vehid, relative.v), transform, dup = duplicated(relative.v), dup2 = duplicated(gap.dist))
#Labelling the similar coordinates
my.df <- ddply(my.df, .(vehid), transform, oss = ifelse(dup==T & dup2==T, 'oss', '.'))
# Labelling the first occurrence of similar coordinates
my.df[which(my.df$dup & my.df$dup2)-1,]$oss <- 'oss'
# Ordering by frameid
my.df <- ddply(my.df, .(vehid), function(x) x[with(x, order(frameid)),])
# Filtering out the vehicles with no oscillation
my.df <- subset(my.df, !ave(my.df$oss!='oss', my.df$vehid, FUN=all))
我的代码适用于示例数据框,但对于df
它不起作用。显然,最后一步不适用于df
,而且这不是主要问题。我想在oss
中创建类似于df
的{{1}}列,该列标记了大约相同my.df
和gap.dist
的2行。请帮忙!
答案 0 :(得分:2)
如果你想要速度,我认为data.table
是要走的路。特别是因为你有这么多团体来做这件事。
#Bring in data.table and set my.df as data.table
require(data.table)
setDT(my.df)
#Label is oss or not if there is a duplicated
my.df[ , oss := 1*(.N > 1), by=list(vehid,relative.v,gap.dist)]
#Only keep vehicle ids with a duplicate
my.df[ , keep := max(oss), by=vehid]
my.df <- my.df[keep!=0]
#Find the starting and stopping points
my.df[oss==1 , min.frameid := min(frameid), by=vehid]
my.df[ , min.frameid := min(min.frameid,na.rm=T), by=vehid]
my.df[ , max.frameid := max(frameid*oss), by=vehid]
#Make oss 1 in between all stopping points
my.df[ , oss := 1*(between(frameid,min.frameid,max.frameid)), by=vehid]
由于OP的变化而编辑:
您发布的其他数据似乎暗示您并不真正拥有relative.v和gap.dist的重复项,但您有近似重复项。您可能希望尝试按最近的差异对数据进行排序,并找出您愿意说这些点是相同的差异阈值。
#Sort data and look at absolute difference between closest points
my.df <- my.df[order(Vehicle.ID,relative.v,gap.dist)]
my.df[ , gap.dist.diff := c(NA,abs(diff(gap.dist))), by= Vehicle.ID]
my.df[ , relative.v.diff := c(NA,abs(diff(relative.v))), by= Vehicle.ID]
后续步骤:
#Give rank order to data
my.df[ , newID := .I]
#Sort data other way
my.df <- my.df[order(Vehicle.ID,relative.v,gap.dist,decreasing=T)]
#Change values if within threshold
my.df[ , relative.v2 := ifelse(gap.dist.diff < 0.5 & relative.v.diff<0.2, relative.v[ID+1],relative.v)]
my.df[ , gap.dist2 := ifelse(gap.dist.diff < 0.5 & relative.v.diff<0.2, gap.dist[ID+1],gap.dist)]