我收到了一本工作簿,其中包含两个电动枢轴表(一个约1个行,另一个20个行)。我想把它撕掉(就像真的一样 - 但是让我们说一个CSV)这样我就可以在R + PostGreSQL中使用它了。
由于行数超过100万,我无法导出到Excel表格;复制粘贴数据仅在我选择大约200,000行时才有效。所以我有点卡住了!我尝试将xlsx转换为zip并在notepad ++中打开“item.data”文件,但是它已经加密了。
我很感激任何解决方案(很高兴使用VBA,Python,SQL)
编辑:我把一些VBA放在一起工作正常,大约0.5毫米行但是17号行文档中断了:
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim sheet As Excel.Worksheet
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
wbTarget.Model.Initialize
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
' E.g. Partners
WriteDmvContent "Partners", conn
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
mdx = "EVALUATE " & dmvName
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\output_table_" & dmvName & ".csv"
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i).Name
Else
Write #1, rs.Fields(i).Name,
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, rs.Fields(i)
Else
Write #1, rs.Fields(i),
End If
Next i
rs.MoveNext
Loop
Close #1
rs.Close
Set rs = Nothing
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
答案 0 :(得分:2)
答案 1 :(得分:0)
我找到了一个工作(VBA)解决方案[但greggy也适用于我!] - >我的表太大了,无法导出一个块,所以我循环并按'月'过滤。这似乎有用,并且在我将所有内容合并在一起后产生1.2 gb的CSV:
Function YYYYMM(aDate As Date)
YYYYMM = year(aDate) * 100 + month(aDate)
End Function
Function NextYYYYMM(YYYYMM As Long)
If YYYYMM Mod 100 = 12 Then
NextYYYYMM = YYYYMM + 100 - 11
Else
NextYYYYMM = YYYYMM + 1
End If
End Function
Public Sub CreatePowerPivotDmvInventory()
Dim conn As ADODB.Connection
Dim tblname As String
Dim wbTarget As Workbook
On Error GoTo FailureOutput
Set wbTarget = ActiveWorkbook
wbTarget.Model.Initialize
Set conn = wbTarget.Model.DataModelConnection.ModelConnection.ADOConnection
' Call function by passing the DMV name
tblname = "table1"
WriteDmvContent tblname, conn
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
Private Sub WriteDmvContent(ByVal dmvName As String, ByRef conn As ADODB.Connection)
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Integer
'If table small enough:
'mdx = "EVALUATE " & dmvName
'Other-wise filter:
Dim eval_field As String
Dim eval_val As Variant
'Loop through year_month
Dim CurrYM As Long, LimYM As Long
Dim String_Date As String
CurrYM = YYYYMM(#12/1/2000#)
LimYM = YYYYMM(#12/1/2015#)
Do While CurrYM <= LimYM
String_Date = CStr(Left(CurrYM, 4)) + "-" + CStr(Right(CurrYM, 2))
Debug.Print String_Date
eval_field = "yearmonth"
eval_val = String_Date
mdx = "EVALUATE(CALCULATETABLE(" & dmvName & ", " & dmvName & "[" & eval_field & "] = """ & eval_val & """))"
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = "H:\vba_tbl_" & dmvName & "_" & eval_val & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Else
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.count - 1
If i = rs.Fields.count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Else
Write #1, """" & rs.Fields(i) & """",
End If
Next i
rs.MoveNext
Loop
CurrYM = NextYYYYMM(CurrYM)
i = i + 1
Close #1
rs.Close
Set rs = Nothing
Loop
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub
答案 2 :(得分:0)
我修改了mptevsion脚本-现在它将表中的数据保存到每n行(默认为100k行,可以通过更改chunk_size
来更改)中的csv。
该脚本的优点是它不依赖表中的任何字段来分离数据,而是使用TOPNSKIP(https://dax.guide/topnskip/)来完成数据。
Public Sub CreatePowerPivotDmvInventory()
ActiveWorkbook.Model.Initialize
Dim save_path As String
Dim chunk_size As Long
save_path = "H:\power pivot\csv"
tblName = "data"
chunk_size = 100000
Dim rs As ADODB.Recordset
Dim mdx As String
Dim i As Long
Dim rows_limit As Long
Dim rows_left As Long
Dim conn As ADODB.Connection
Set conn = ActiveWorkbook.Model.DataModelConnection.ModelConnection.ADOConnection
' calculating number of rows in a table
mdx = "evaluate {COUNTROWS('" & tblName & "')}"
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
rows_limit = rs.Fields(0)
rows_left = rows_limit
chunk_id = 1
Do While rows_left > 0
If rows_left < chunk_size Then
chunk_size = rows_left
End If
mdx = "define var data_table = '" & tblName & "'" & Chr(10) & _
"EVALUATE(" & Chr(10) & _
" TOPNSKIP(" & chunk_size & ", " & rows_limit - rows_left & ", data_table)" & Chr(10) & _
");"
Debug.Print (mdx)
Set rs = New ADODB.Recordset
rs.ActiveConnection = conn
rs.Open mdx, conn, adOpenForwardOnly, adLockOptimistic
' Setup CSV file (improve this code)
Dim myFile As String
myFile = save_path & "\vba_tbl_" & tblName & "_" & chunk_id & ".csv"
Debug.Print (myFile)
Open myFile For Output As #1
' Output column names
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i).Name & """"
Else
Write #1, """" & rs.Fields(i).Name & """",
End If
Next i
' Output of the query results
Do Until rs.EOF
For i = 0 To rs.Fields.Count - 1
If i = rs.Fields.Count - 1 Then
Write #1, """" & rs.Fields(i) & """"
Else
Write #1, """" & rs.Fields(i) & """",
End If
Next i
rs.MoveNext
Loop
rows_left = rows_left - chunk_size
chunk_id = chunk_id + 1
Close #1
rs.Close
Set rs = Nothing
Loop
MsgBox "Finished"
Exit Sub
FailureOutput:
MsgBox Err.Description
End Sub