如何使用c#将csv数据导入Oracle。其中要导入的数据大小为3GB,行数为7512263.我已设法将csv数据导入Oracle,但需要大约1小时的时间。如何加快将csv数据导入oracle所需的时间。谢谢。 这是我的代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.Threading;
using System.Text.RegularExpressions;
using System.IO;
using FileHelpers;
using System.Data.OracleClient;
namespace sqlloader
{
class Program
{
static void Main(string[] args)
{
int jum;
int i;
bool isFirstLine = false;
FileHelperEngine engine = new FileHelperEngine(typeof(XL_XDR));
//Connect To Database
string constr = "Data Source=(DESCRIPTION=(ADDRESS_LIST="
+ "(ADDRESS=(PROTOCOL=TCP)(HOST= pt-9a84825594af )(PORT=1521 )))"
+ "(CONNECT_DATA=(SERVER=DEDICATED)(SERVICE_NAME=o11g)));"
+ "User Id=xl;Password=rahasia;";
OracleConnection con = new OracleConnection(constr);
con.Open();
// To Read Use:
XL_XDR[] res = engine.ReadFile("DataOut.csv") as XL_XDR[];
jum = CountLinesInFile("DataOut.csv");
FileInfo f2 = new FileInfo("DataOut.csv");
long s2 = f2.Length;
int jmlRecord = jum - 1;
for (i = 0; i < jum; i++)
{
ShowPercentProgress("Processing...", i, jum);
Thread.Sleep(100);
if (isFirstLine == false)
{
isFirstLine = true;
}
else
{
string sql = "INSERT INTO XL_XDR (XDR_ID, XDR_TYPE, SESSION_START_TIME, SESSION_END_TIME, SESSION_LAST_UPDATE_TIME, " +
"SESSION_FLAG, VERSION, CONNECTION_ROW_COUNT, ERROR_CODE, METHOD, HOST_LEN, HOST, URL_LEN, URL, CONNECTION_START_TIME, " +
"CONNECTION_LAST_UPDATE_TIME, CONNECTION_FLAG, CONNECTION_ID, TOTAL_EVENT_COUNT, TUNNEL_PAIR_ID, RESPONSIVENESS_TYPE, " +
"CLIENT_PORT, PAYLOAD_TYPE, VIRTUAL_TYPE, VID_CLIENT, VID_SERVER, CLIENT_ADDR, SERVER_ADDR, CLIENT_TUNNEL_ADDR, " +
"SERVER_TUNNEL_ADDR, ERROR_CODE_2, IPID, C2S_PKTS, C2S_OCTETS, S2C_PKTS, S2C_OCTETS, NUM_SUCC_TRANS, CONNECT_TIME, " +
"TOTAL_RESP, TIMEOUTS, RETRIES, RAI, TCP_SYNS, TCP_SYN_ACKS, TCP_SYN_RESETS, TCP_SYN_FINS, EVENT_TYPE, FLAGS, TIME_STAMP, " +
"EVENT_ID, EVENT_CODE) VALUES (" +
"'" + res[i].XDR_ID + "', '" + res[i].XDR_TYPE + "', '" + res[i].SESSION_START_TIME + "', '" + res[i].SESSION_END_TIME + "', " +
"'" + res[i].SESSION_LAST_UPDATE_TIME + "', '" + res[i].SESSION_FLAG + "', '" + res[i].VERSION + "', '" + res[i].CONNECTION_ROW_COUNT + "', " +
"'" + res[i].ERROR_CODE + "', '" + res[i].METHOD + "', '" + res[i].HOST_LEN + "', '" + res[i].HOST + "', " +
"'" + res[i].URL_LEN + "', '" + res[i].URL + "', '" + res[i].CONNECTION_START_TIME + "', '" + res[i].CONNECTION_LAST_UPDATE_TIME + "', " +
"'" + res[i].CONNECTION_FLAG + "', '" + res[i].CONNECTION_ID + "', '" + res[i].TOTAL_EVENT_COUNT + "', '" + res[i].TUNNEL_PAIR_ID + "', " +
"'" + res[i].RESPONSIVENESS_TYPE + "', '" + res[i].CLIENT_PORT + "', '" + res[i].PAYLOAD_TYPE + "', '" + res[i].VIRTUAL_TYPE + "', " +
"'" + res[i].VID_CLIENT + "', '" + res[i].VID_SERVER + "', '" + res[i].CLIENT_ADDR + "', '" + res[i].SERVER_ADDR + "', " +
"'" + res[i].CLIENT_TUNNEL_ADDR + "', '" + res[i].SERVER_TUNNEL_ADDR + "', '" + res[i].ERROR_CODE_2 + "', '" + res[i].IPID + "', " +
"'" + res[i].C2S_PKTS + "', '" + res[i].C2S_OCTETS + "', '" + res[i].S2C_PKTS + "', '" + res[i].S2C_OCTETS + "', " +
"'" + res[i].NUM_SUCC_TRANS + "', '" + res[i].CONNECT_TIME + "', '" + res[i].TOTAL_RESP + "', '" + res[i].TIMEOUTS + "', " +
"'" + res[i].RETRIES + "', '" + res[i].RAI + "', '" + res[i].TCP_SYNS + "', '" + res[i].TCP_SYN_ACKS + "', " +
"'" + res[i].TCP_SYN_RESETS + "', '" + res[i].TCP_SYN_FINS + "', '" + res[i].EVENT_TYPE + "', '" + res[i].FLAGS + "', " +
"'" + res[i].TIME_STAMP + "', '" + res[i].EVENT_ID + "', '" + res[i].EVENT_CODE + "')";
OracleCommand command = new OracleCommand(sql, con);
command.ExecuteNonQuery();
}
}
Console.WriteLine("Successfully Inserted");
Console.WriteLine();
Console.WriteLine("Number of Row Data: " + jmlRecord.ToString());
Console.WriteLine();
Console.WriteLine("The size of {0} is {1} bytes.", f2.Name, f2.Length);
con.Close();
}
static void ShowPercentProgress(string message, int currElementIndex, int totalElementCount)
{
if (currElementIndex < 0 || currElementIndex >= totalElementCount)
{
throw new InvalidOperationException("currElement out of range");
}
int percent = (100 * (currElementIndex + 1)) / totalElementCount;
Console.Write("\r{0}{1}% complete", message, percent);
if (currElementIndex == totalElementCount - 1)
{
Console.WriteLine(Environment.NewLine);
}
}
static int CountLinesInFile(string f)
{
int count = 0;
using (StreamReader r = new StreamReader(f))
{
string line;
while ((line = r.ReadLine()) != null)
{
count++;
}
}
return count;
}
}
[DelimitedRecord(",")]
public class XL_XDR
{
public string XDR_ID;
public string XDR_TYPE;
public string SESSION_START_TIME;
public string SESSION_END_TIME;
public string SESSION_LAST_UPDATE_TIME;
public string SESSION_FLAG;
public string VERSION;
public string CONNECTION_ROW_COUNT;
public string ERROR_CODE;
public string METHOD;
public string HOST_LEN;
public string HOST;
public string URL_LEN;
public string URL;
public string CONNECTION_START_TIME;
public string CONNECTION_LAST_UPDATE_TIME;
public string CONNECTION_FLAG;
public string CONNECTION_ID;
public string TOTAL_EVENT_COUNT;
public string TUNNEL_PAIR_ID;
public string RESPONSIVENESS_TYPE;
public string CLIENT_PORT;
public string PAYLOAD_TYPE;
public string VIRTUAL_TYPE;
public string VID_CLIENT;
public string VID_SERVER;
public string CLIENT_ADDR;
public string SERVER_ADDR;
public string CLIENT_TUNNEL_ADDR;
public string SERVER_TUNNEL_ADDR;
public string ERROR_CODE_2;
public string IPID;
public string C2S_PKTS;
public string C2S_OCTETS;
public string S2C_PKTS;
public string S2C_OCTETS;
public string NUM_SUCC_TRANS;
public string CONNECT_TIME;
public string TOTAL_RESP;
public string TIMEOUTS;
public string RETRIES;
public string RAI;
public string TCP_SYNS;
public string TCP_SYN_ACKS;
public string TCP_SYN_RESETS;
public string TCP_SYN_FINS;
public string EVENT_TYPE;
public string FLAGS;
public string TIME_STAMP;
public string EVENT_ID;
public string EVENT_CODE;
}
}
答案 0 :(得分:2)
我很幸运在ODB.NET中使用批量绑定方法。在此页面上搜索“ArrayBindCount”。
http://dotnetslackers.com/articles/ado_net/BulkOperationsUsingOracleDataProviderForNETODPNET.aspx
这将允许您一次插入所有内容,而不是数千个小插入。您目前看到的时间将变为几分钟。
答案 1 :(得分:2)
虽然强烈反对Vincent建议使用绑定变量(这可能会带来巨大的性能提升以及阻止DBA限制你破坏共享池)和Brosto建议进行大量绑定,但我倾向于问你为什么要在C#中首先编写这类东西。执行诸如使用外部表之类的操作会更高效,以便您的应用程序将文件放在数据库服务器的文件系统上,并且数据的解析和加载由外部表定义完成(甚至可以使用您的应用程序调用SQL * Loader)。这使您可以利用Oracle已针对此类处理优化的代码。
此外,由于您在开始时计算文件中的行数一次以显示进度条,您可以通过从更容易获得的信息中获取近似行数来优化它。您大致了解文件中每行的字节数,文件大小是一个相对容易检索的文件属性。这应该允许您相对准确地估计文件中的行数,这对于进度条来说应该足够了。
答案 2 :(得分:0)
只是删除对CountLinesInFile
的调用可能会有所帮助,因为在该方法中,您逐行读取所有文件,正如您所说的文件是biiig ...
答案 3 :(得分:0)
使用批量插入来快速加载:
Insert Bulk Data From CSV File To Database Table Using SQLBulkCopy Class
或
使用SQL * Loader实用程序:
Loading CSV file data into a Table Variable (Index-by table) in Oracle