将DataSet写入字符分隔文件的最快方法

时间:2015-11-21 20:05:30

标签: c# performance oracle11g

这是迄今为止我发现的从Oracle DB检索响应记录集并将其写入分隔文件的最快方法。更快会更好。请提供建议。

检索结果集:

using (var oracleConnection = new OracleConnection(ContextInfo.ConnectionString))
{
    oracleConnection.Open();
    try
    {
        using (var oracleCommand = new OracleCommand(extractToRun, OracleConnection))
        {

            oracleCommand.CommandType = CommandType.StoredProcedure;
            oracleCommand.BindByName = true;
            oracleCommand.FetchSize = oracleCommand.FetchSize * 128;
            oracleCommand.InitialLONGFetchSize = 5000;
            oracleCommand.Parameters.Add(refCursorOracleParameter);
            oracleCommand.Parameters.Add(startDateOracleParameter);
            oracleCommand.Parameters.Add(endDateOracleParameter);
            oracleCommand.Parameters.Add(jobIdOracleParameter);

            using (var oracleDataAdapter = new OracleDataAdapter(oracleCommand))
            {
                oracleDataAdapter.Fill(ds);
                return ds;
            }
        }
    }
    finally
    {
        oracleConnection.Close();
        oracleConnection.Dispose();
    }
}

处理数据并将其写入文件:

public static void ExportDataTableToDelimitedFile(DataTable table, string filename, string encloseWith, string delimiter, bool includeHeader, string fieldsToExclude, bool fixedLengthValues)
{
    String excludeList = String.Empty;

    if (!String.IsNullOrEmpty(fieldsToExclude))
    {
        excludeList = fieldsToExclude.ToUpper();
    }

    using (FileStream fs = new FileStream(filename, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, 131072, FileOptions.None))
    {
        BinaryWriter sw = new BinaryWriter(fs);
        if (table.Rows.Count == 0)
        {
            sw.Write(String.Empty);
            sw.Close();
            sw.Dispose();
            return;
        }
        //Handle header
        if (includeHeader)
        {
            string header = String.Empty;
            String formattedHeader = String.Empty;
            foreach (DataColumn clm in table.Columns)
            {
                if (excludeList.Contains(clm.ColumnName.ToUpper()))
                    continue;

                if (clm.ColumnName.Length > 0)
                {
                    formattedHeader = String.Empty;
                    formattedHeader = encloseWith + clm.ColumnName + encloseWith;

                    if (header.Length > 0)
                        header = String.Join(delimiter, new string[] { header, formattedHeader });
                    else
                        header = formattedHeader;
                }
            }
            sw.Write(header);
        }
        // handle  values in data rows now
        Boolean hasEnlosedCharacter = !String.IsNullOrEmpty(encloseWith);
        ParallelOptions rowOptions = new ParallelOptions();
        rowOptions.MaxDegreeOfParallelism = Environment.ProcessorCount;
        Parallel.ForEach(table.Rows.Cast<DataRow>(), rowOptions, row =>
        {
            char[] rowValue = new char[8192];
            Int32 rowValueIndex = 0;

            string[] dcc = row.ItemArray.Select(field => field.ToString()).ToArray();
            foreach (String dc in dcc)
            {
                if (rowValueIndex > 0)
                {
                    if (!String.IsNullOrEmpty(dc) && hasEnlosedCharacter)
                    {
                        rowValue[rowValueIndex++] = delimiter[0];
                        rowValue[rowValueIndex++] = encloseWith[0];
                        foreach (char c in dc)
                        {
                            rowValue[rowValueIndex++] = c;
                        }
                        rowValue[rowValueIndex++] = encloseWith[0];
                    }
                    else
                    {
                        rowValue[rowValueIndex++] = delimiter[0];
                        foreach (char c in dc)
                        {
                            rowValue[rowValueIndex++] = c;
                        }
                    }
                }
                else
                {
                    if (!String.IsNullOrEmpty(dc) && hasEnlosedCharacter)
                    {
                        rowValue[rowValueIndex++] = encloseWith[0];
                        foreach (char c in dc)
                        {
                            rowValue[rowValueIndex++] = c;
                        }
                        rowValue[rowValueIndex++] = encloseWith[0];
                    }
                    else
                    {
                        foreach (char c in dc)
                        {
                            rowValue[rowValueIndex++] = c;
                        }
                    }
                }
            }

            rowValue[rowValueIndex++] = '\r';
            rowValue[rowValueIndex++] = '\n';
            lock (sw)
            {
                sw.Write(rowValue, 0, rowValueIndex);
            }
        });
        sw.Close();
        sw.Dispose();
        table.Dispose();
        fs.Close();
    }
}

我知道我应该重命名一些变量并以相同的方式处理标题(我不是在编写标题)所以这真的是一个纯粹的逻辑问题,样式答案无助于提高性能。

令人费解的是网络性能。当它快速返回5行数据集时,它只使用1.5%的带宽?我正在使用最新的ODP.Net(Oracle)来对抗11g数据库。我试过Devarts提供商,它完全轰炸了我。

Network Performance

处理器负载反映了Parallel.ForEach对数据表中行的影响,这是一件好事。

Processor Performance

2 个答案:

答案 0 :(得分:1)

这是我能够获得的最快速度。

检索数据:

public static DataTable GetData(String extractToRun, DateTime startDate, DateTime endDate)
{
    //RefCursor
    OracleParameter refCursorOracleParameter = new OracleParameter
                                            {
                                                ParameterName = "pCursor",
                                                Direction = ParameterDirection.Output,
                                                OracleDbType = OracleDbType.RefCursor
                                            };

    OracleParameter startDateOracleParameter = new OracleParameter
    {
        ParameterName = "pStartDate",
        Direction = ParameterDirection.Input,
        OracleDbType = OracleDbType.Varchar2,
        Value =   startDate
    };

    OracleParameter endDateOracleParameter = new OracleParameter
    {
        ParameterName = "pEndDate",
        Direction = ParameterDirection.Input,
        OracleDbType = OracleDbType.Varchar2,
        Value =   endDate
    };

    OracleParameter jobIdOracleParameter = new OracleParameter
    {
        ParameterName = "pJobId",
        Direction = ParameterDirection.Input,                
        Value =   "123456"
    };

    using (var oracleConnection = new OracleConnection(ContextInfo.ConnectionString))
    {
        oracleConnection.Open();
        try
        {
            using (var oracleCommand = new OracleCommand(extractToRun, oracleConnection))
            {

                oracleCommand.CommandType = CommandType.StoredProcedure;
                oracleCommand.BindByName = true;
                oracleCommand.FetchSize = oracleCommand.FetchSize * 128;
                oracleCommand.InitialLONGFetchSize = 5000;
                oracleCommand.Parameters.Add(refCursorOracleParameter);
                oracleCommand.Parameters.Add(startDateOracleParameter);
                oracleCommand.Parameters.Add(endDateOracleParameter);
                oracleCommand.Parameters.Add(jobIdOracleParameter);

                using (OracleDataReader rdr = oracleCommand.ExecuteReader())
                {
                    rdr.FetchSize = rdr.RowSize * 65536;
                    DataTable dt = new DataTable();
                    dt.MinimumCapacity = 400000;
                    dt.BeginLoadData();
                    dt.Load(rdr, LoadOption.Upsert);
                    dt.EndLoadData();
                    rdr.Close();
                    rdr.Dispose();
                    oracleCommand.Dispose();
                    return dt;
                }
            }
        }
        finally
        {
            oracleConnection.Close();
            oracleConnection.Dispose();
        }
    }
}

处理数据:

public static void ExportDataTableToDelimitedFile(DataTable table, string filename, string encloseWith, string delimiter, bool includeHeader, string fieldsToExclude, bool fixedLengthValues)
{
    String excludeList = String.Empty;

    if (!String.IsNullOrEmpty(fieldsToExclude))
    {
        excludeList = fieldsToExclude.ToUpper();
    }

    using (FileStream fs = new FileStream(filename, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, 2097152, FileOptions.None))
    {
        BinaryWriter sw = new BinaryWriter(fs);
        if (table.Rows.Count == 0)
        {
            sw.Write(String.Empty);
            sw.Close();
            sw.Dispose();
            return;
        }
        //Handle header
        if (includeHeader)
        {
            string header = String.Empty;
            String formattedHeader = String.Empty;
            foreach (DataColumn clm in table.Columns)
            {
                if (excludeList.Contains(clm.ColumnName.ToUpper()))
                    continue;

                if (clm.ColumnName.Length > 0)
                {
                    formattedHeader = String.Empty;
                    formattedHeader = encloseWith + clm.ColumnName + encloseWith;

                    if (header.Length > 0)
                        header = String.Join(delimiter, new string[] { header, formattedHeader });
                    else
                        header = formattedHeader;
                }
            }
            sw.Write(header);
        }
        // handle  values in data rows now
        Boolean hasEnlosedCharacter = !String.IsNullOrEmpty(encloseWith);
        Parallel.ForEach(table.Rows.Cast<DataRow>(), row =>
        {
            char[] rowValue = new char[8192];
            Int32 rowValueIndex = 0;

            char[][] rowData = row.ItemArray.Select(field => field.ToString().ToCharArray()).ToArray();
            for (int i = 0; i < rowData.Length; i++)
            {
                Boolean useEnclosed = rowData[i].Length > 0 && hasEnlosedCharacter;
                if (rowValueIndex > 0)
                {
                    if (useEnclosed)
                    {
                        rowValue[rowValueIndex++] = delimiter[0];
                        rowValue[rowValueIndex++] = encloseWith[0];
                        rowData[i].CopyTo(rowValue, rowValueIndex);
                        rowValueIndex += rowData[i].Length;
                        rowValue[rowValueIndex++] = encloseWith[0];
                    }
                    else
                    {
                        rowValue[rowValueIndex++] = delimiter[0];
                        rowData[i].CopyTo(rowValue, rowValueIndex);
                        rowValueIndex += rowData[i].Length;
                    }
                }
                else
                {
                    if (useEnclosed)
                    {
                        rowValue[rowValueIndex++] = encloseWith[0];
                        rowData[i].CopyTo(rowValue, rowValueIndex);
                        rowValueIndex += rowData[i].Length;
                        rowValue[rowValueIndex++] = encloseWith[0];
                    }
                    else
                    {
                        rowData[i].CopyTo(rowValue, rowValueIndex);
                        rowValueIndex += rowData[i].Length;
                    }
                }
            }

            rowValue[rowValueIndex++] = '\r';
            rowValue[rowValueIndex++] = '\n';
            lock (sw)
            {
                sw.Write(rowValue, 0, rowValueIndex);
            }
        });
        sw.Close();
        sw.Dispose();
        table.Dispose();
        fs.Close();
    }
}

有几点需要注意。 DataReader使用Load进入DataTable比Dataset快40%.Fill但是不要将fetchsize设置为高于64K。之后性能下降。 32K可能是最好的。字符数组比StringBuilder快得多。在我的拙见中,C#已经瘫痪,我们无法拥有汇编程序子程序。我考虑过编写一个C ++ DLL,因此我可以使用汇编语言子程序来复制内存。然后我不需要调用ToCharArray()。当然,我没有看过IL,看看究竟是什么ToCharArray(),但性能分析器指出这段代码占26%的时间。

令人惊讶的是,这些变化将网络利用率提高了4.5%(对于企业网络中的单个PC而言是高的)并且它将CPU利用率降低到80%左右,因为它现在主要在等待磁盘写入方法而不是忙于复制字符串。

我没有显示原始代码,但过去需要13-15分钟才能将数据导出到管道分隔文件。通过这些更改,导出完全相同的数据需要40-45秒。

我也没有表明原始数据库查询中有七个查询,所有联合都在一起。我打破了这些,所以我可以并行运行它们。性能修复需要作为一个整体来解决。许多人在关注数据库之前试图解决这个问题。没有人真正专注于客户端,并试图找出真正的问题是什么。

希望这有助于将来。

答案 1 :(得分:0)

好!这是一个更好的答案!

public static List<ROW_DATA> GetData(String extractToRun, DateTime startDate, DateTime endDate)
{
    List<ROW_DATA> dataTable = new List<ROW_DATA>();
    //RefCursor
    OracleParameter refCursorOracleParameter = new OracleParameter
                                            {
                                                ParameterName = "pCursor",
                                                Direction = ParameterDirection.Output,
                                                OracleDbType = OracleDbType.RefCursor
                                            };

    OracleParameter startDateOracleParameter = new OracleParameter
    {
        ParameterName = "pStartDate",
        Direction = ParameterDirection.Input,
        OracleDbType = OracleDbType.Varchar2,
        Value =   startDate
    };

    OracleParameter endDateOracleParameter = new OracleParameter
    {
        ParameterName = "pEndDate",
        Direction = ParameterDirection.Input,
        OracleDbType = OracleDbType.Varchar2,
        Value =   endDate
    };

    OracleParameter jobIdOracleParameter = new OracleParameter
    {
        ParameterName = "pJobId",
        Direction = ParameterDirection.Input,                
        Value =   "123456"
    };

    using (var oracleConnection = new OracleConnection(ContextInfo.ConnectionString))
    {
        oracleConnection.Open();
        try
        {
            using (var oracleCommand = new OracleCommand(extractToRun, oracleConnection))
            {

                oracleCommand.CommandType = CommandType.StoredProcedure;
                oracleCommand.BindByName = true;
                oracleCommand.FetchSize = oracleCommand.FetchSize * 128;
                oracleCommand.InitialLONGFetchSize = 5000;
                oracleCommand.Parameters.Add(refCursorOracleParameter);
                oracleCommand.Parameters.Add(startDateOracleParameter);
                oracleCommand.Parameters.Add(endDateOracleParameter);
                oracleCommand.Parameters.Add(jobIdOracleParameter);

                using (OracleDataReader rdr = oracleCommand.ExecuteReader())
                {
                    //byte[] columnBytes = new byte[16384];
                    Int32 tryCount = 0;
                    rdr.FetchSize = rdr.RowSize * 262144;
                            while (rdr.Read())
                            {
                                Int32 charLength = (Int32)rdr.GetChars(0, 0, null, 0, 0);
                                char[] colChars = new char[charLength];
                                rdr.GetChars(0, 0, colChars, 0, charLength);
                                //OracleString colValue = rdr.GetOracleString(0);
                                //int valueLength = colValue.Length;
                                //unsafe
                                //{
                                //    fixed (char* pcolValue = colValue.Value)
                                //    {
                                //        fixed (byte* pcolBytes = columnBytes)
                                //        {
                                //            for (int i = 0; i < valueLength; i++)
                                //            {
                                //                pcolBytes[i] = (byte)pcolValue[i];
                                //            }
                                //        }
                                //    }
                                //}
                                ROW_DATA rowData = new ROW_DATA { length = charLength, rowValues = colChars };
                                dataTable.Add(rowData);
                        }
                    }
                    rdr.Close();
                    rdr.Dispose();
                    oracleCommand.Dispose();
                    return dataTable;
                }
            }
        }
        finally
        {
            oracleConnection.Close();
            oracleConnection.Dispose();
        }
    }
}

我故意留下注释掉的代码来表明我甚至尝试过不安全的代码来将数据转换为我需要的格式。事实证明,GetChars以我想要的方式返回它,所以我可以简单地将它传输到磁盘。我有高达11%的网络利用率和27秒的时间来检索413K行并将它们写入磁盘。我还修改了存储过程以返回管道分隔的字符串,因此我只在客户端接收一列数据。它真的很快,但我有想法将时间缩短一半。请继续关注。