我正在编写一个应该在短时间内处理csv中数百万个数据集的程序,我的想法是因为性能原因使用odbc,因此我用odbc读取所有数据并将其保存在内存中,之后我添加参数并将其插入sql db,这是我的代码到目前为止:
Using connection As New OdbcConnection("jdbc:odbc:Driver={Microsoft Text Driver (*.txt; *.csv)};" & filePath & "Extensions=csv;Persist Security Info=False;")
Dim reader As OdbcDataReader
Dim i As Integer
Dim r As SeekZeilen
Dim TextFileTable As DataTable = Nothing
Dim line As String = reader.Read()
Me.ParseString(line)
Dim memStream As New MemoryStream(Encoding.Default.GetBytes(line))
Using TextFileReader As New Microsoft.VisualBasic.FileIO.TextFieldParser(memStream)
TextFileReader.TextFieldType = FileIO.FieldType.Delimited
TextFileReader.SetDelimiters(";")
r.erste_Zeile = TextFileReader.ReadFields()
If TextFileTable Is Nothing Then
TextFileTable = New DataTable("TextFileTable")
For i = 0 To r.erste_Zeile.Length - 1
Dim Column As New DataColumn(r.erste_Zeile(i))
Column.ReadOnly = True
TextFileTable.Columns.Add(Column)
Next
End If
DataGridView1.DataSource = TextFileTable
End Using
While reader.HasRows
line = reader.Read()
Me.ParseString(line)
memStream = New MemoryStream(Encoding.Default.GetBytes(line))
Using TextFileReader As New Microsoft.VisualBasic.FileIO.TextFieldParser(memStream)
TextFileReader.TextFieldType = FileIO.FieldType.Delimited
TextFileReader.SetDelimiters(";")
DataGridView1.DataSource = TextFileTable
Try
r._Rest = TextFileReader.ReadFields()
ReplaceChars(r._Rest)
If Not r._Rest Is Nothing Then
Dim oSQL As New DBUmgebung.cdb.SQL()
oSQL.init()
AddParameters(oSQL, r)
oSQL.ausfuehrenSQL(DBUmgebung.cdb.KSQLCommand.INSERT, _table, "")
Dim dtRow As DataRow = TextFileTable.NewRow
For i = 0 To r._Rest.Length - 1
dtRow(i) = r._Rest(i).ToString()
Next
TextFileTable.Rows.Add(dtRow)
DataGridView1.Refresh()
Application.DoEvents()
End If
Catch ex As Microsoft.VisualBasic.FileIO.MalformedLineException
MsgBox("Error! " & ex.Message & _
"")
Catch sqlEx As SqlException
MessageBox.Show(sqlEx.Message)
rtbSql.Focus()
Exit For
Catch ex As Exception
MessageBox.Show(ex.Message)
rtbSql.Focus()
Exit For
End Try
End Using
End While
reader.Close()
End Using
问题是我因未知原因得到空指针异常,有没有人知道我做错了什么?可能是因为我的odbc阅读器没有正确初始化?
答案 0 :(得分:3)
试试这个。这将把csv文件作为所有文本读入数据表。一旦进入Datatable,您就可以将记录插入SQL。您始终可以对此进行调整以处理多个csv文件。
import matplotlib as mpl
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["r", "#e94cdc", "0.7"])