我有一个像这样的数据集。
ID EQP_ID DATE ENTRY EXIT
10 1232 10/01/2018 0058 NA
10 8123 10/01/2018 NA 0059
11 8231 10/02/2018 0063 NA
11 233 10/03/2018 0064 NA
11 2512 10/04/2018 NA 0099
11 2111 10/05/2018 NA 1000
我想折叠观察值,以便将给定ID带有“ ENTRY”的最早行与具有EXIT值的最新行合并,并且我还获得与退出记录相关联的EQP_ID:>
ID EQP_ID ENTRY EXIT
10 8123 0058 0059
11 2111 0063 1000
我对R还是很陌生,这非常复杂,以至于我想不出一个不求助于循环的好方法,而且性能也不是很好。
修改
我想这样做,但是我仍然很好奇其他经验丰富的人是否有更好的答案
> group_by(dataset, ID) %>%
arrange(ENTRY) %>%
summarize(ENTRY = first(ENTRY), EXIT = last(exit), EQP_ID = last(EQP_ID))
答案 0 :(得分:0)
带有data.table的一个选项:
library(data.table)
#create example data
dt <- data.table(
id = c(10, 10, 11, 11, 11, 11),
date = seq(as.Date("2018-10-1"), as.Date("2018-10-6"), by="day"),
entry = c(58, NA, 63, 64, NA, NA),
exit = c(NA, 59, NA, NA, 99, 100)
)
# number rows by id
dt[order(id, date), num := 1:.N, by=id]
# get first-entry and last-exit values by id
dt[ , keepentry := entry[1],by=id]
dt[ , keepexit := exit[.N],by=id]
# keep one row per id
dt[num==1, .(id, keepentry, keepexit)]
这不是我最优雅的工作,但可以完成工作。
答案 1 :(得分:0)
使用PLAY [Print variables defined earlier]
*************************************************************************
TASK [debug]
*************************************************************************
ok: [192.168.1.13] => (item=None) => {
"hostvars[\"192.168.1.12\"]['temp_file']['path']": "/tmp/ansible.DZIGlA"
}
和Sub AddTriangleShape()
Dim osld As slide
Dim oSh As Shape
Dim oEffect As Effect
ReDim ShapesToDelete(0)
Dim ShapeCount
For Each osld In ActivePresentation.Slides
If osld.SlideShowTransition.Hidden Then
Else
For Each oSh In osld.Shapes
If oSh.Name Like "@END@" Then
ShapeCount = ShapeCount + 1
ReDim Preserve ShapesToDelete(0 To ShapeCount)
Set ShapesToDelete(ShapeCount) = oSh
End If
Next oSh
End If
Next osld
For i = 1 To ShapeCount
ShapesToDelete(i).Delete
Next
For Each osld In ActivePresentation.Slides
If osld.SlideShowTransition.Hidden Then
Else
Set oSh = osld.Shapes.AddShape(msoShapeRightTriangle, 947, 529, 6, 6)
With oSh
.Line.ForeColor.RGB = RGB(0, 0, 255)
.Fill.Visible = msoTrue
.Fill.ForeColor.RGB = RGB(0, 0, 255)
.BlackWhiteMode = msoBlackWhiteDontShow
.Flip (msoFlipHorizontal)
.Name = "@END@"
End With
Set oEffect = osld.TimeLine.MainSequence.AddEffect _
(Shape:=oSh, effectid:=msoAnimEffectDissolve, trigger:=msoAnimTriggerAfterPrevious, Index:=-1)
End If
Next osld
End Sub
,我们可以执行以下操作;可以使用dplyr::first
和dplyr::last
min
答案 2 :(得分:0)
此解决方案使用dplyr
。首先,定义数据框。
df <- read.table(text = "ID EQP_ID DATE ENTRY EXIT
10 1232 10/01/2018 0058 NA
10 8123 10/01/2018 NA 0059
11 8231 10/02/2018 0063 NA
11 233 10/03/2018 0064 NA
11 2512 10/04/2018 NA 0099
11 2111 10/05/2018 NA 1000", header = TRUE)
接下来,按ID
分组,并分别使用head
或tail
取得该组中变量的第一个或最后一个值。
df %>%
group_by(ID) %>%
summarise(EQP_ID = tail(EQP_ID, 1),
ENTRY = head(ENTRY, 1),
EXIT = tail(EXIT, 1))
这给了
# # A tibble: 2 x 4
# ID EQP_ID ENTRY EXIT
# <int> <int> <int> <int>
# 1 10 8123 58 59
# 2 11 2111 63 1000