
时间:2017-01-01 17:32:19

标签: python excel text extract


ksjd 234first special 34-37xy kjsbn
sde 89second special 22-23xh ewio
647red special 55fg dsk
uuire another special 98
another special 107r
green special 55-59 ewk
blue special 31-39jkl

我需要先提取一个单词" special"和右边的数字(或数字范围)。换句话说,我想:

enter image description here


enter image description here

3 个答案:

答案 0 :(得分:3)


In [1]: import re

In [2]: text = '''234first special 34-37xy                          
   ...: 89second special 22-23xh
   ...: 647red special 55fg
   ...: another special 98
   ...: another special 107r
   ...: green special 55-59
   ...: blue special 31-39jkl'''

In [3]: [re.findall('\d*\s*(\S+)\s+(special)\s+(\d+(?:-\d+)?)', line)[0] for line in text.splitlines()]
[('first', 'special', '34-37'),
 ('second', 'special', '22-23'),
 ('red', 'special', '55'),
 ('another', 'special', '98'),
 ('another', 'special', '107'),
 ('green', 'special', '55-59'),
 ('blue', 'special', '31-39')]

答案 1 :(得分:3)


  1. 选择一个空白单元格并输入此公式= MID(A1,SEARCH(“KTE”,A1)+ 3,SEARCH(“feature”,A1)-SEARCH(“KTE”,A1)-4)进入它,然后按Enter按钮。

  2. 拖动填充手柄以填充要应用此公式的范围。现在只提取“KTE”和“feature”之间的文本字符串。

  3. 注意:

    1. 在此公式中,A1是您要从中提取文本的单元格。

    2. KTE和功能是您要在其间提取文字的字词。

    3. 数字3是KTE的字符长度,数字4等于KTE的字符长度加1。

答案 2 :(得分:1)

另外@RolandSmith写道,这是一种在Excel中使用正则表达式的方法 - VBA

Option Explicit
Function ExtractSpecial(S As String, Index As Long) As String
    Dim RE As Object, MC As Object
    Const sPat As String = "([a-z]+)\s+(special)\s+([^a-z]+)"

Set RE = CreateObject("vbscript.regexp")
With RE
    .Global = True
    .ignorecase = True
    .MultiLine = False
    .Pattern = sPat
    If .test(S) = True Then
        Set MC = .Execute(S)
        ExtractSpecial = MC(0).submatches(Index - 1)
    End If
End With

End Function


enter image description here


Sub ExtractSpec()
    Dim RE As Object, MC As Object
    Dim wsSrc As Worksheet, wsRes As Worksheet, rRes As Range
    Dim vSrc As Variant, vRes As Variant
    Dim I As Long

Set wsSrc = Worksheets("sheet2")
Set wsRes = Worksheets("sheet2")
    Set rRes = wsRes.Cells(1, 3)

With wsSrc
    vSrc = .Range(.Cells(1, 1), .Cells(.Rows.Count, 1).End(xlUp))
End With

Set RE = CreateObject("vbscript.regexp")
With RE
    .Global = True
    .MultiLine = False
    .ignorecase = True
    .Pattern = "([a-z]+)\s+(special)\s+([^a-z]+)"

ReDim vRes(1 To UBound(vSrc), 1 To 3)
For I = 1 To UBound(vSrc)
    If .test(vSrc(I, 1)) = True Then
        Set MC = .Execute(vSrc(I, 1))
        vRes(I, 1) = MC(0).submatches(0)
        vRes(I, 2) = MC(0).submatches(1)
        vRes(I, 3) = MC(0).submatches(2)
    End If
Next I
End With

Set rRes = rRes.Resize(UBound(vRes, 1), UBound(vRes, 2))
With rRes
    .Value = vRes
End With

End Sub