我有一个非常大的数据集(600,000行),其格式如下:
1)大约有60种产品。一个是美国总数,而另一个是制造商,并且作为KMF标记。还有一些被标记为PCKG(但与此问题无关)
2)每种产品都位于60个不同的市场
3)每个市场有20个不同的位置
4)我有12个指标,我需要按以下方式计算数据:美国总数 - 每个指标的总和(KMF)
我为此编写了vba代码,但运行时间太长(大约20分钟)我需要在至少20个工作表上运行类似的代码。我尝试过各种方法,例如将screenUpdating等设置为false。这是我的代码。我是vba编码的新手,所以我可能错过了很明显的事情。请让我知道任何不清楚的事情。请帮忙!
Sub beforeRunningCode()
Application.ScreenUpdating = False
Application.DisplayStatusBar = False
Application.Calculation = xlCalculationManual
Application.EnableEvents = False
ActiveSheet.DisplayPageBreaks = False
End Sub
Sub returnToOriginal()
Application.ScreenUpdating = screenUpdateState
Application.DisplayStatusBar = statusBarState
Application.Calculation = calcState
Application.EnableEvents = eventsState
ActiveSheet.DisplayPageBreaks = displayPageBreaksState
End Sub
Function LastRowFunc(Sheet) As Long
LastRowFunc = ActiveWorkbook.Worksheets(Sheet).Range("A2", Worksheets(Sheet).Range("A2").End(xlDown)).Rows.Count
End Function
Function LastColFunc(Sheet) As Long
With ActiveSheet
LastColFunc = ActiveWorkbook.Sheets(Sheet).Cells(1, .Columns.Count).End(xlToLeft).Column
End With
End Function
Sub AOCalculate()
Call beforeRunningCode 'Optimize Excel
Dim LastRow As Long
Dim LastCol As Long
Dim Period As String
Dim Sheet As String
Dim Arr(1 To 16)
Dim Count As Integer
Sheet = "Energy_LS_Bottler"
Period = "2016 WAVE 1 - 3 W/E 05/07"
LastRow = LastRowFunc(Sheet) 'Calculate last row for which data exists
LastCol = LastColFunc(Sheet) 'Calculate last column for which data exists
For Each Location In ActiveWorkbook.Sheets("Locations").Range("D7:D28").Value
For Each Market In ActiveWorkbook.Sheets("Markets").Range("A5:A92").Value
Count = Count + 1
Arr(1) = Market
Arr(2) = "AO"
Arr(3) = Location
Arr(4) = Period
With ActiveWorkbook.Sheets(Sheet) 'Filtering for KMF
.AutoFilterMode = False
.Range(Cells(1, 1), Cells(LastRow, LastCol)).AutoFilter
.Range(Cells(1, 1), Cells(LastRow, LastCol)).AutoFilter field:=17, Criteria1:="=KMF"
.Range(Cells(1, 1), Cells(LastRow, LastCol)).AutoFilter field:=1, Criteria1:=Market
.Range(Cells(1, 1), Cells(LastRow, LastCol)).AutoFilter field:=3, Criteria1:=Location
End With
For k = 5 To 16
Arr(k) = Application.WorksheetFunction.Sum(ActiveWorkbook.Sheets(Sheet).Range(Cells(1, k), Cells(LastRow, k)).SpecialCells(xlCellTypeVisible))
Next k
With ActiveWorkbook.Sheets(Sheet) ' filtering for Total US
.AutoFilterMode = False
.Range(Cells(1, 1), Cells(LastRow, LastCol)).AutoFilter
.Range(Cells(1, 1), Cells(LastRow, LastCol)).AutoFilter field:=17, Criteria1:="=Total US"
.Range(Cells(1, 1), Cells(LastRow, LastCol)).AutoFilter field:=1, Criteria1:=Market
.Range(Cells(1, 1), Cells(LastRow, LastCol)).AutoFilter field:=3, Criteria1:=Location
End With
For k = 5 To 16
Arr(k) = -Arr(k) + Application.WorksheetFunction.Sum(ActiveWorkbook.Sheets(Sheet).Range(Cells(1, k), Cells(LastRow, k)).SpecialCells(xlCellTypeVisible))
Next k
For j = 1 To 16
ActiveWorkbook.Sheets(Sheet).Cells(LastRow + Count, j).Value = Arr(j)
Next j
Erase Arr
Next
Next
ActiveWorkbook.Sheets(Sheet).AutoFilterMode = False
Call returnToOriginal
End Sub
[编辑]:以下是指向示例数据集https://drive.google.com/file/d/0B3MkGa57h6g_WGl2WWlWekd4NU0/view?usp=sharing
的链接答案 0 :(得分:2)
我认为这会有用(虽然我没有机会测试它),而且应该快得多:
Sub AOCalculate()
Call beforeRunningCode 'Optimize Excel
Dim LastRow As Long
Dim LastCol As Long
Dim Period As String
Dim Sheet As String
Dim Arr() '1 To 2000, 1 To 16)
Dim Count As Integer
Sheet = "Energy_LS_Bottler"
Period = "2016 WAVE 1 - 3 W/E 05/07"
LastRow = LastRowFunc(Sheet) 'Calculate last row for which data exists
LastCol = LastColFunc(Sheet) 'Calculate last column for which data exists
'copy all of the relevant cells to local arrays for speed
Dim Locations(), Markets(), data()
Markets = ActiveWorkbook.Sheets("Markets").Range("A5:A92").Value
Locations = ActiveWorkbook.Sheets("Locations").Range("D7:D28").Value
'(pretty sure the following line needs to localize the Cells() to .Cells())
data = ActiveWorkbook.Sheets(Sheet).Range(Cells(1, 1), Cells(LastRow, LastCol)).Value '**'
ReDim Arr(1 To UBound(Markets, 1) * UBound(Locations, 1), 16)
'make an index of pointers into our accumulation array
Dim counts As New Collection
Dim i As Long, l As Long, m As Long
For l = 1 To UBound(Locations, 1)
Location = Locations(l, 1) '**'
For m = 1 To UBound(Markets, 1)
Market = Markets(m, 1) '**'
i = i + 1
counts.Add i, CStr(Location) & "~" & CStr(Market)
'counts.Add NewAccumArray(Location, Market, Period), CStr(Location) & "~" & CStr(Market)
Arr(i, 1) = Market
Arr(i, 2) = "AO"
Arr(i, 3) = Location
Arr(i, 4) = Period
Next
Next
' go through each row and add it to the appropiate count in the array
Dim r As Long
Dim key As String, idx As Long
For r = 1 To UBound(data, 1)
key = CStr(data(r, 3)) & "~" & CStr(data(r, 1))
If data(r, 17) = "KMF" Then
idx = counts(key)
For k = 5 To 16
Arr(idx, k) = Arr(idx, k) - data(r, k)
Next k
Else
If data(r, 17) = "Total US" Then
idx = counts(key)
For k = 5 To 16
Arr(idx, k) = Arr(idx, k) + data(r, k)
Next k
End If
End If
Next r
' output the results
ActiveWorkbook.Sheets(Sheet).Range(Cells(LastRow + 1, 1), Cells(LastRow + Count, 16)).Value = Arr
ActiveWorkbook.Sheets(Sheet).AutoFilterMode = False
Call returnToOriginal
End Sub
回答查询“我的意思是什么?”
'(pretty sure the following line needs to localize the Cells() to .Cells())
data = ActiveWorkbook.Sheets(Sheet).Range(Cells(1, 1), Cells(LastRow, LastCol)).Value '**'
在这里使用Cells(..)
从根本上说是不可靠和破碎的。这是因为Cells(..)
实际上是ActiveSheet.Cells(..)
的快捷方式, Active * 属性本质上很慢且不可靠,因为它们可以更改而代码是运行。更糟糕的是,这段代码假设 ActiveSheet = Energy_LS_Blotter
远非确定。
编写此行的正确方法如下:
data = ActiveWorkbook.Sheets(Sheet).Range( _
ActiveWorkbook.Sheets(Sheet).Cells(1, 1), _
ActiveWorkbook.Sheets(Sheet).Cells(LastRow, LastCol) _
).Value
但那是漫长,丑陋和不方便的。更简单的方法是使用Sheet变量或With
:
With ActiveWorkbook.Sheets(Sheet)
data = .Range(.Cells(1, 1), .Cells(LastRow, LastCol)).Value
End With