我有一个包含如下字符串的数据框:
bla bla.\n14:39:51 info: pyku bla .\n14:39:51 info: \n14:39:51 info: \n14:39:57 Sam: <span>pyk pyk</span>\n14:43:15 on and on \n14:43:59 you get an idea
我想将\n(number):(number):(number)
序列分隔的行拆分为不同的行。我试过了
stringr::separate_rows(df3$Transcript[1], Transcript , sep = "\\n")
及其与[A-z]
和[:punct:]
的不同组合无济于事。这样做最直接的方式是什么?
由于
答案 0 :(得分:2)
您希望使用后跟时间戳的换行符拆分字符串。你可以使用一个基础R strsplit(s, "\\R+(?=\\d{2}:\\d{2}:\\d{2})", perl=TRUE)
函数和一个基于正向前瞻的PCRE正则表达式:
\R+
请参阅regex demo
模式详情
\n
- 一个或多个换行符序列(\r
或\r\n
或(?=\d{2}:\d{2}:\d{2})
):
- 后跟2位数字:
,2位数字,(?=...)
,再次输入2位数字。由于s <- "bla bla.\n14:39:51 info: pyku bla .\n14:39:51 info: \n14:39:51 info: \n14:39:57 Sam: <span>pyk pyk</span>\n14:43:15 on and on \n14:43:59 you get an idea"
strsplit(s, "\\R+(?=\\d{2}:\\d{2}:\\d{2})", perl=TRUE)
是一个正向前瞻(零宽度断言,不会将匹配的字符放入匹配值),因此不会从结果中删除与之匹配的文本。[[1]]
[1] "bla bla." "14:39:51 info: pyku bla ."
[3] "14:39:51 info: " "14:39:51 info: "
[5] "14:39:57 Sam: <span>pyk pyk</span>" "14:43:15 on and on "
[7] "14:43:59 you get an idea"
输出:
Sub Compare()
Dim sh1 As Worksheet, sh2 As Worksheet, sh3 As Worksheet, lr1 As Long, lr2 As Long, rng1 As Range, rng2 As Range, c As Range
Set sh1 = Sheets(1)
Set sh2 = Sheets(2)
Set sh3 = Sheets(3)
lr1 = sh1.Cells(Rows.Count, 1).End(xlUp).Row
lr2 = sh2.Cells(Rows.Count, 1).End(xlUp).Row
Set rng1 = sh1.Range("A2:A" & lr1)
Set rng2 = sh2.Range("A2:A" & lr2)
With sh3 'If header not there, put them in
If .Range("a1") = "" Then
.Range("a1") = "Extras in List 2"
End If
End With
For Each c In rng2
If Application.CountIf(rng1, c.Value) = 0 Then
sh3.Cells(Rows.Count, 1).End(xlUp)(2) = c.Value
End If
Next
End Sub