我有一些excel表来自将pdf文件中的文本转换为excel。我需要将表格重新格式化为更适合在Android应用程序中进行sqlite查询的表格。这是从pdf到excel的一个页面的示例:
320E L, 320E LRR 320E L, 320E LRR with Super Long Reach 320F L
with VA Boom with Reach Boom
Stick 2.9 m 9'6" 2.5 m 7'6" 6.28 m 20'6" 2.9 m 9'6"
mm ft mm ft mm ft m ft
A 8410 27'7" 8070 26'6" 11 290 37'0" 6.49 21'4"
B 10 200 33'6" 9800 32'2" 15 720 51'6" 9.86 32'4"
C 6680 21'11" 6270 20'7" 11 690 38'4" 6.72 22'1"
D 5290 17'4" 4890 16'1" 10 670 35'0" 5.06 16'7"
E 6580 21'7" 6170 20'3" 11 590 38'0" 6.55 21'6"
F — — — — — — — —
G 11 520 37'10" 11 180 36'8" 13 590 44'6" 9.37 30'9"
这里有一个截图,说明从pdf中翻录的整页数据是如何在excel中查找的。抱歉,我不确定如何在SO中正确格式化。
这只是数百页中的一页样本。我不知道该怎么办呢。我对使用java更熟悉,但我之前使用过VBA宏,因此我愿意使用宏和/或两者的组合。 Python也是一个很好的脚本选项。我也不确定这样的事情是否可行,因为那里并不是一块石头"格式化原始pdf。您可以看到pdf中的表格非常动态。
完整的pdf翻录第一个屏幕截图:
320E L, 320E LRR 320E L, 320E LRR with Super Long Reach 320F L
with VA Boom with Reach Boom
Stick 2.9 m 9'6" 2.5 m 7'6" 6.28 m 20'6" 2.9 m 9'6"
mm ft mm ft mm ft m ft
A 8410 27'7" 8070 26'6" 11 290 37'0" 6.49 21'4"
B 10 200 33'6" 9800 32'2" 15 720 51'6" 9.86 32'4"
C 6680 21'11" 6270 20'7" 11 690 38'4" 6.72 22'1"
D 5290 17'4" 4890 16'1" 10 670 35'0" 5.06 16'7"
E 6580 21'7" 6170 20'3" 11 590 38'0" 6.55 21'6"
F — — — — — — — —
G 11 520 37'10" 11 180 36'8" 13 590 44'6" 9.37 30'9"
323D2 L 323D2 L
with Reach Boom with Mass Boom
Stick 1.9 m 6'3" 2.5 m 8'2" 2.92 m 9'7" 1.9 m 6'3"
m ft m ft m ft m ft
A 5.99 19'8" 6.59 21'7" 6.77 22'2" 5.53 18'2"
B 8.98 29'5" 9.44 31'0" 9.86 32'4" 8.46 27'9"
C 5.78 19'0" 6.2 20'4" 6.65 21'10" 5.35 17'7"
D 3.74 12'3" 5.09 16'8" 5.52 18'1" 3.88 12'9"
E 5.51 18'1" 5.99 19'8" 6.47 21'3" 5 16'5"
F — — — — — — — —
G 8.94 29'4" 9.38 30'9" 9.58 31'5" 8.56 28'1"
答案 0 :(得分:0)
这是我到目前为止所做的事情。
Option Explicit
Sub extractData()
Dim i As Long, j As Long
Dim wsDat As Worksheet
Dim wbRes As Workbook
Dim rngRowCell As Range, rngColCell As Range
Dim strModelAndBoom As String, strBoom As String
Dim arrModels As Variant, arrTemp As Variant
ReDim arrModelandBoom(0) As Variant
ReDim arrResult(1 To 10, 0) As Variant
Application.ScreenUpdating = True
Set wbRes = Workbooks.Add
With wbRes.Worksheets
If .Count > 1 Then
For i = 2 To .Count
.Item(i).Delete
Next i
End If
.Item(1).Name = "Results"
With .Item(1)
With .Range(.Cells(1, 1), .Cells(1, 10))
.WrapText = True
.HorizontalAlignment = xlCenter
.ColumnWidth = 12.5
.Font.Bold = True
End With
.Cells(1, 1) = "Model Number"
.Cells(1, 2) = "Boom Variations"
.Cells(1, 2).ColumnWidth = 15
.Cells(1, 3) = "Stick Length Variations (m)"
.Cells(1, 4) = "A (m)"
.Cells(1, 5) = "B (m)"
.Cells(1, 6) = "C (m)"
.Cells(1, 7) = "D (m)"
.Cells(1, 8) = "E (m)"
.Cells(1, 9) = "F (m)"
.Cells(1, 10) = "G (m)"
End With
End With
For Each wsDat In ThisWorkbook.Worksheets
With wsDat
wsDat.Cells.UnMerge
For Each rngRowCell In .Range(.Cells(1, 2), .Cells(.Rows.Count, 2).End(xlUp))
If Not rngRowCell.Offset(0, 1) = Empty And rngRowCell.Offset(2, 0) = "Stick" Then
For Each rngColCell In .Range(rngRowCell.Offset(0, 1), .Cells(rngRowCell.Row, .Columns.Count).End(xlToLeft))
Select Case rngColCell
Case Empty
If Join(Array(rngColCell.End(xlToLeft), rngColCell.End(xlToLeft).Offset(1, 0)), " ") = strModelAndBoom And InStr(rngColCell.Offset(2, 0), "m") > 0 Then
For i = LBound(arrModels) To UBound(arrModels)
If UBound(arrResult, 2) = 0 Then
ReDim arrResult(1 To 10, 1 To 1)
Else
ReDim Preserve arrResult(1 To 10, 1 To UBound(arrResult, 2) + 1)
End If
arrResult(1, UBound(arrResult, 2)) = arrModels(i)
arrResult(2, UBound(arrResult, 2)) = strBoom
arrResult(3, UBound(arrResult, 2)) = Val(rngColCell.Offset(2, 0))
For j = 4 To 10
If rngColCell.Offset(3, 0) = "mm" Then
If Not rngColCell.Offset(j, 0) = Chr(151) Then
arrResult(j, UBound(arrResult, 2)) = Val(rngColCell.Offset(j, 0)) / 1000
End If
ElseIf rngColCell.Offset(3, 0) = "m" Then
If Not rngColCell.Offset(j, 0) = Chr(151) Then
arrResult(j, UBound(arrResult, 2)) = Val(rngColCell.Offset(j, 0))
End If
End If
Next j
Next i
End If
Case Else
strModelAndBoom = Join(Array(rngColCell, rngColCell.Offset(1, 0)), " ")
arrTemp = Split(strModelAndBoom, "with")
arrModels = Split(arrTemp(0), ", ")
strBoom = Trim(arrTemp(1))
If InStr(1, strBoom, "Boom", vbTextCompare) > 0 Then
strBoom = Trim(Left(strBoom, InStr(1, strBoom, "Boom", vbTextCompare) - 1))
End If
For i = LBound(arrModels) To UBound(arrModels)
If UBound(arrResult, 2) = 0 Then
ReDim arrResult(1 To 10, 1 To 1)
Else
ReDim Preserve arrResult(1 To 10, 1 To UBound(arrResult, 2) + 1)
End If
arrResult(1, UBound(arrResult, 2)) = arrModels(i)
arrResult(2, UBound(arrResult, 2)) = strBoom
arrResult(3, UBound(arrResult, 2)) = Val(rngColCell.Offset(2, 0))
For j = 4 To 10
If rngColCell.Offset(3, 0) = "mm" Then
If Not rngColCell.Offset(j, 0) = Chr(151) Then
arrResult(j, UBound(arrResult, 2)) = Val(rngColCell.Offset(j, 0)) / 1000
End If
ElseIf rngColCell.Offset(3, 0) = "m" Then
If Not rngColCell.Offset(j, 0) = Chr(151) Then
arrResult(j, UBound(arrResult, 2)) = Val(rngColCell.Offset(j, 0))
End If
End If
Next j
Next i
End Select
Next rngColCell
End If
Next rngRowCell
End With
Next wsDat
With wbRes.Worksheets(1).Cells(2, 1).Resize(UBound(arrResult, 2), UBound(arrResult, 1))
.Value = Application.Transpose(arrResult)
.HorizontalAlignment = xlCenter
End With
With wbRes
With .Worksheets(1).Sort
With .SortFields
.Clear
.Add Key:=wbRes.Worksheets(1).Range(wbRes.Worksheets(1).Cells(1, 1), wbRes.Worksheets(1).Cells(wbRes.Worksheets(1).Rows.Count, 1).End(xlUp)), SortOn:=xlSortOnValues, Order:=xlDescending, DataOption:=xlSortNormal
.Add Key:=wbRes.Worksheets(1).Range(wbRes.Worksheets(1).Cells(1, 2), wbRes.Worksheets(1).Cells(wbRes.Worksheets(1).Rows.Count, 2).End(xlUp)), SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:=xlSortNormal
.Add Key:=wbRes.Worksheets(1).Range(wbRes.Worksheets(1).Cells(1, 3), wbRes.Worksheets(1).Cells(wbRes.Worksheets(1).Rows.Count, 3).End(xlUp)), SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:=xlSortNormal
End With
.SetRange wbRes.Worksheets(1).UsedRange
.Header = xlYes
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
.SaveAs Application.ThisWorkbook.Path & "\Results " & Format(Now, "dd-MM-yy HHmmss"), 51
End With
With Application
.DisplayAlerts = False
.Workbooks.Open (.ThisWorkbook.FullName)
.DisplayAlerts = True
.ScreenUpdating = True
End With
End Sub