解析列中的行以列出excel中的每个unigram,bigram和trigram

时间:2015-02-19 15:53:23

标签: excel excel-vba vba

正如标题所述,我想使用空格作为分隔符来解析表中的每一行(1列,~1k行)。每行包含一个短语。我想列出每个短语的所有unigrams,bigrams和trigrams。下面的示例数据和所需的输出格式。

the quick brown fox the the quick the quick brown jumps over the lazy dog quick quick brown quick brown fox brown brown fox jumps over the fox jumps over over the lazy jumps over the the lazy dog over the lazy the lazy dog lazy dog

3 个答案:

答案 0 :(得分:1)

以下是单个单元 A1

的小例子
Sub grams()
    Dim K As Long, i As Long
    K = 2
    ary = Split(Range("A1").Value, " ")

    For Each a In ary
        Cells(K, 1).Value = a
        K = K + 1
    Next a

    K = 2
    For i = LBound(ary) To UBound(ary) - 1
        Cells(K, 2).Value = ary(i) & " " & ary(i + 1)
        K = K + 1
    Next i

    K = 2
    For i = LBound(ary) To UBound(ary) - 2
        Cells(K, 3).Value = ary(i) & " " & ary(i + 1) & " " & ary(i + 2)
        K = K + 1
    Next i
End Sub

enter image description here

您可以为多个单元格调整相同的方案。

答案 1 :(得分:1)

假设你的不同句子在第一栏

Sub splitIt()

Dim vArray As Variant

Dim x As Long
Dim y As Long

Dim SentenceRange As Range

Dim startRowB, startRowC, startRowD As Long

Dim LastRow As Long
Dim sht As Worksheet

Set sht = ThisWorkbook.Worksheets(1)

LastRow = sht.Cells(sht.Rows.Count, "A").End(xlUp).Row

Set SentenceRange = Range(Cells(1, 1), Cells(LastRow, 1))

startRowB = 1
startRowC = 1
startRowD = 1

For Each Cell In SentenceRange

    vArray = Split(Cell.Value, " ")

        For y = 0 To 2
            For x = 0 To (UBound(vArray) - y)

                If y = 0 Then
                    Cells(startRowB + x, 2).Value = vArray(x)

                ElseIf y = 1 Then
                    Cells(startRowC + x, 3).Value = vArray(x) & " " & vArray(x + 1)

                ElseIf y = 2 Then
                    Cells(startRowD + x, 4).Value = vArray(x) & " " & vArray(x + 1) & " " & vArray(x + 2)

                Else

                End If
            Next x
        Next y

    startRowB = sht.Cells(sht.Rows.Count, 2).End(xlUp).Row + 1
    startRowC = sht.Cells(sht.Rows.Count, 3).End(xlUp).Row + 1
    startRowD = sht.Cells(sht.Rows.Count, 4).End(xlUp).Row + 1

Next Cell


End Sub

enter image description here

答案 2 :(得分:1)

另一种方法

Sub getIt()

X = Split(Replace(Join(Application.Transpose(Range([a1], Cells(Rows.Count, "A").End(xlUp))), vbNewLine), vbNewLine, Chr(32) & "|"), Chr(32))
[b1].Resize(UBound(X) + 1) = Application.Transpose(X)
[c1].Resize(UBound(X)).FormulaR1C1 = "=IF(LEFT(R[1]C[-1],1)<>""|"",RC[-1]&"" "" &R[1]C[-1],"""")"
[d1].Resize(UBound(X) - 1).FormulaR1C1 = "=IF(AND(LEFT(R[1]C[-2],1)<>""|"",LEFT(R[2]C[-2],1)<>""|""),RC[-2]&"" "" &R[1]C[-2]&"" ""&R[2]C[-2],"""")"
[c1].Resize(UBound(X) + 1, 2).Value = [c1].Resize(UBound(X) + 1, 2).Value

With [b1].Resize(UBound(X) + 1, 3)
.SpecialCells(xlCellTypeBlanks).Delete xlUp
.Replace "|", vbNullString
End With

End Sub