SSIS:将记录集写入文件的脚本任务

时间:2016-05-05 20:32:09

标签: sql sql-server csv ssis export-to-csv

我正在使用SQL Server Data Tools 2013来创建SSIS包。此程序包具有带有完整结果集选项的执行SQL任务,可将查询结果推送到Object类型的SSIS变量中。

我在脚本任务中使用以下内容来获取存储在对象变量中的记录集并将其写入CSV:

    Public Sub Main()

    Dim fileName As String = Dts.Variables("vFileName").Value.ToString
    Dim destinationPath As String = Dts.Variables("vDestinationPath").Value.ToString
    Dim destinationPathAndFileName As String = destinationPath + fileName
    Dim fileContents As String = ""

    Dim oleDB As OleDbDataAdapter = New OleDbDataAdapter()
    Dim table As DataTable = New DataTable()
    Dim rs As System.Object = Dts.Variables("vResultSet").Value

    ' Populate DataTable with vResultSet data
    oleDB.Fill(table, rs)

    ' Loop through columns and concatenate with commas
    For Each col As DataColumn In table.Columns
        fileContents &= col.ColumnName & ","
    Next

    ' Remove final comma from columns string and append line break
    fileContents = fileContents.Substring(0, fileContents.Length - 1)
    fileContents &= Environment.NewLine

    ' Loop through rows and concatenate with commas
    Dim i As Integer
    For Each row As DataRow In table.Rows
        For i = 1 To table.Columns.Count
            fileContents &= row(i - 1).ToString() & ","
        Next

        ' Remove final comma from row string and append line break
        fileContents = fileContents.Substring(0, fileContents.Length - 1)
        fileContents &= Environment.NewLine

    Next

    ' Write all text to destination file. If file exists, this step will overwrite it.
    System.IO.File.WriteAllText(destinationPathAndFileName, fileContents)

    Dts.TaskResult = ScriptResults.Success
End Sub

这是有效的,但它的速度很慢,比如将一个14k行数据集写入CSV只需要25分钟。我无法使用数据流,因为此过程存在于循环中,并且要导出的每个表的元数据不同。我非常确定脚本任务是唯一的选择,但是比循环遍历数据集的每一行有更快的方法吗?如果我能提供更多信息,请告诉我。

2 个答案:

答案 0 :(得分:4)

您可以随意翻译成VB.NET。看到我已经有了这个代码〜为不同的项目编写,我把你的请求与我的工作方式混为一谈

传入3个SSIS变量:vFileName,vDestinationPath和vResultSet,Main中的代码将ado记录集转换为DataTable,然后将其添加到DataSet并传递给Persist方法。 Persistdelimiter |的默认参数。

此实现根本不会尝试处理任何极端情况。它不会使用限定符转义文本列,也不会转义嵌入限定符,使用换行符中的换行符执行任何操作,而OleDbDataAdapter的填充方法中的某些内容会因二进制数据等而失败

    public void Main()
    {
        string fileName = Dts.Variables["User::vFileName"].Value.ToString();
        DataSet ds = null;
        DataTable dt = null;
        string outputFolder = Dts.Variables["User::vDestinationPath"].Value.ToString();
        string fileMask = string.Empty;
        string sheetName = string.Empty;
        string outSubFolder = string.Empty;
        string message = string.Empty;
        bool fireAgain = true;
        try
        {

            ds = new DataSet();
            dt = new DataTable();

            System.Data.OleDb.OleDbDataAdapter adapter = new System.Data.OleDb.OleDbDataAdapter();
            adapter.Fill(dt, Dts.Variables["User::vResultSet"].Value);

            string baseFileName = System.IO.Path.GetFileNameWithoutExtension(fileName);
            baseFileName = System.IO.Path.GetFileName(fileName);

            ds.Tables.Add(dt);
            //foreach (DataTable dt in ds.Tables)
            {
                Persist(ds, fileName, outputFolder);
            }
        }
        catch (Exception ex)
        {
            Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "fileName", fileName), string.Empty, 0, ref fireAgain);
            Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "outputFolder", outputFolder), string.Empty, 0, ref fireAgain);
            Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "ExceptionDetails", ex.ToString()), string.Empty, 0, ref fireAgain);
            Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "InnerExceptionDetails", ex.InnerException), string.Empty, 0, ref fireAgain);
        }

        Dts.TaskResult = (int)ScriptResults.Success;
    }

    public static void Persist(System.Data.DataSet ds, string originalFileName, string outputFolder, string delimiter = "|")
    {
        // Enumerate through all the tables in the dataset
        // Save it out as sub versions of the 
        if (ds == null)
        {
            return;
        }

        string baseFileName = System.IO.Path.GetFileNameWithoutExtension(originalFileName);
        string baseFolder = System.IO.Path.GetDirectoryName(originalFileName);
        System.Collections.Generic.List<string> header = null;            

        foreach (System.Data.DataTable table in ds.Tables)
        {
            string outFilePath = System.IO.Path.Combine(outputFolder, string.Format("{0}.{1}.csv", baseFileName, table.TableName));
            System.Text.Encoding e = System.Text.Encoding.Default;

            if (table.ExtendedProperties.ContainsKey("Unicode") && (bool)table.ExtendedProperties["Unicode"])
            {
                e = System.Text.Encoding.Unicode;
            }

            using (System.IO.StreamWriter file = new System.IO.StreamWriter(System.IO.File.Open(outFilePath, System.IO.FileMode.Create), e))
            {
                table.ExtendedProperties.Add("Path", outFilePath);

                // add header row
                header = new System.Collections.Generic.List<string>(table.Columns.Count);
                foreach (System.Data.DataColumn item in table.Columns)
                {
                    header.Add(item.ColumnName);
                }

                file.WriteLine(string.Join(delimiter, header));

                foreach (System.Data.DataRow row in table.Rows)
                {
                    // TODO: For string based fields, capture the max length
                    IEnumerable<string> fields = (row.ItemArray).Select(field => field.ToString());

                    file.WriteLine(string.Join(delimiter, fields));
                }
            }
        }
    }

需要运行,但Biml实现看起来像

<Biml xmlns="http://schemas.varigence.com/biml.xsd">
    <Connections>
        <OleDbConnection Name="tempdb" ConnectionString="Data Source=localhost\dev2014;Initial Catalog=AdventureWorksDW2014;Provider=SQLNCLI11.0;Integrated Security=SSPI;"/>
    </Connections>
    <Packages>
        <Package Name="so_37059747" ConstraintMode="Linear">
            <Variables>
                <Variable DataType="String" Name="QuerySource"><![CDATA[SELECT
    S.name
,   T.name
FROM
    sys.schemas AS S
    INNER JOIN
        sys.tables AS T 
        ON T.schema_id = S.schema_id;]]></Variable>
                <Variable DataType="String" Name="SchemaName">dbo</Variable>
                <Variable DataType="String" Name="TableName">foo</Variable>
                <Variable DataType="String" Name="QueryTableDump" EvaluateAsExpression="true">"SELECT X.* FROM [" + @[User::SchemaName] + "].[" + @[User::TableName] + "] AS X;"</Variable>
                <Variable DataType="Object" Name="rsTables"></Variable>
                <Variable DataType="Object" Name="vResultSet"></Variable>
                <Variable DataType="String" Name="vFileName" EvaluateAsExpression="true">@[User::SchemaName] + "_" + @[User::TableName] + ".txt"</Variable>
                <Variable DataType="String" Name="vDestinationPath">c:\ssisdata\so\Output</Variable>
            </Variables>
            <Tasks>
                <ExecuteSQL 
                    ConnectionName="tempdb" 
                    Name="SQL Generate Loop data"
                    ResultSet="Full">
                    <VariableInput VariableName="User.QuerySource" />
                    <Results>
                        <Result VariableName="User.rsTables" Name="0" />
                    </Results>
                </ExecuteSQL>
                <ForEachAdoLoop SourceVariableName="User.rsTables" Name="FELC Shred rs" ConstraintMode="Linear">
                    <VariableMappings>
                        <VariableMapping VariableName="User.SchemaName" Name="0" />
                        <VariableMapping VariableName="User.TableName" Name="1" />
                    </VariableMappings>
                    <Tasks>
                        <ExecuteSQL 
                            ConnectionName="tempdb" 
                            Name="SQL Generate Export data"
                            ResultSet="Full">
                            <VariableInput VariableName="User.QueryTableDump" />
                            <Results>
                                <Result VariableName="User.vResultSet" Name="0" />
                            </Results>
                        </ExecuteSQL>
                        <Script ProjectCoreName="ST_RS2CSV" Name="SCR Convert to text">
                            <ScriptTaskProjectReference ScriptTaskProjectName="ST_RS2CSV" />
                        </Script>
                    </Tasks>
                </ForEachAdoLoop>
            </Tasks>
        </Package>
    </Packages>
    <ScriptProjects>
        <ScriptTaskProject ProjectCoreName="ST_RS2CSV" Name="ST_RS2CSV" VstaMajorVersion="0">
            <ReadOnlyVariables>
                <Variable Namespace="User" VariableName="vFileName" DataType="String" />
                <Variable Namespace="User" VariableName="vDestinationPath" DataType="String" />
                <Variable Namespace="User" VariableName="vResultSet" DataType="Object" />
            </ReadOnlyVariables>
            <Files>
                <File Path="ScriptMain.cs" BuildAction="Compile">
                    <![CDATA[namespace DataDumper
{
    using System;
    using System.Collections.Generic;
    using System.Data;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using System.Xml.Linq;
    using Microsoft.SqlServer.Dts.Runtime;

    [Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
    public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
    {
        public void Main()
        {
            string fileName = Dts.Variables["User::vFileName"].Value.ToString();
            DataSet ds = null;
            DataTable dt = null;
            string outputFolder = Dts.Variables["User::vDestinationPath"].Value.ToString();
            string fileMask = string.Empty;
            string sheetName = string.Empty;
            string outSubFolder = string.Empty;
            string message = string.Empty;
            bool fireAgain = true;
            try
            {

                ds = new DataSet();
                dt = new DataTable();

                System.Data.OleDb.OleDbDataAdapter adapter = new System.Data.OleDb.OleDbDataAdapter();
                adapter.Fill(dt, Dts.Variables["User::vResultSet"].Value);

                string baseFileName = System.IO.Path.GetFileNameWithoutExtension(fileName);
                baseFileName = System.IO.Path.GetFileName(fileName);

                ds.Tables.Add(dt);
                //foreach (DataTable dt in ds.Tables)
                {
                    Persist(ds, fileName, outputFolder);
                }
            }
            catch (Exception ex)
            {
                Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "fileName", fileName), string.Empty, 0, ref fireAgain);
                Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "outputFolder", outputFolder), string.Empty, 0, ref fireAgain);
                Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "ExceptionDetails", ex.ToString()), string.Empty, 0, ref fireAgain);
                Dts.Events.FireInformation(0, "Data Dumper", string.Format("{0}|{1}", "InnerExceptionDetails", ex.InnerException), string.Empty, 0, ref fireAgain);
            }

            Dts.TaskResult = (int)ScriptResults.Success;
        }

        public static void Persist(System.Data.DataSet ds, string originalFileName, string outputFolder, string delimiter = "|")
        {
            // Enumerate through all the tables in the dataset
            // Save it out as sub versions of the 
            if (ds == null)
            {
                return;
            }

            string baseFileName = System.IO.Path.GetFileNameWithoutExtension(originalFileName);
            string baseFolder = System.IO.Path.GetDirectoryName(originalFileName);
            System.Collections.Generic.List<string> header = null;            

            foreach (System.Data.DataTable table in ds.Tables)
            {
                string outFilePath = System.IO.Path.Combine(outputFolder, string.Format("{0}.{1}.csv", baseFileName, table.TableName));
                System.Text.Encoding e = System.Text.Encoding.Default;

                if (table.ExtendedProperties.ContainsKey("Unicode") && (bool)table.ExtendedProperties["Unicode"])
                {
                    e = System.Text.Encoding.Unicode;
                }

                using (System.IO.StreamWriter file = new System.IO.StreamWriter(System.IO.File.Open(outFilePath, System.IO.FileMode.Create), e))
                {
                    table.ExtendedProperties.Add("Path", outFilePath);

                    // add header row
                    header = new System.Collections.Generic.List<string>(table.Columns.Count);
                    foreach (System.Data.DataColumn item in table.Columns)
                    {
                        header.Add(item.ColumnName);
                    }

                    file.WriteLine(string.Join(delimiter, header));

                    foreach (System.Data.DataRow row in table.Rows)
                    {
                        // TODO: For string based fields, capture the max length
                        IEnumerable<string> fields = (row.ItemArray).Select(field => field.ToString());

                        file.WriteLine(string.Join(delimiter, fields));
                    }
                }
            }
        }
        enum ScriptResults
        {
            Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
            Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
        };
    }
}                
]]>
                </File>
                <File Path="Properties\AssemblyInfo.cs" BuildAction="Compile">
                    using System.Reflection;
                    using System.Runtime.CompilerServices;

                    [assembly: AssemblyTitle("AssemblyTitle")]
                    [assembly: AssemblyDescription("")]
                    [assembly: AssemblyConfiguration("")]
                    [assembly: AssemblyCompany("Bill Fellows")]
                    [assembly: AssemblyProduct("ProductName")]
                    [assembly: AssemblyCopyright("Copyright @  2016")]
                    [assembly: AssemblyTrademark("")]
                    [assembly: AssemblyCulture("")]
                    [assembly: AssemblyVersion("1.0.*")]
                </File>
            </Files>
            <AssemblyReferences>
                <AssemblyReference AssemblyPath="System" />
                <AssemblyReference AssemblyPath="System.Core" />
                <AssemblyReference AssemblyPath="System.Data" />
                <AssemblyReference AssemblyPath="System.Data.DataSetExtensions" />
                <AssemblyReference AssemblyPath="System.Windows.Forms" />
                <AssemblyReference AssemblyPath="System.Xml" />
                <AssemblyReference AssemblyPath="Microsoft.SqlServer.ManagedDTS.dll" />
                <AssemblyReference AssemblyPath="Microsoft.SqlServer.ScriptTask.dll" />
                <AssemblyReference AssemblyPath="System.Linq" />
                <AssemblyReference AssemblyPath="System.Xml.Linq" />
                <AssemblyReference AssemblyPath="Microsoft.VisualBasic" />
            </AssemblyReferences>
        </ScriptTaskProject>
    </ScriptProjects>

</Biml>

在15秒内抛弃所有AdventureworksDW2014

根据此行失败的评论IEnumerable<string> fields = (row.ItemArray).Select(field => field.ToString());

确保项目中包含以下using语句。我认为这些扩展位于Linq命名空间中,但它可能是集合

    using System;
    using System.Collections.Generic;
    using System.Data;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using System.Xml.Linq;
    using Microsoft.SqlServer.Dts.Runtime;

原来为什么慢?

我的假设是缓慢归结为所有连接。字符串在.Net中是不可变的,并且每次向其添加列时都会创建该字符串的新版本。当我构建我的行时,我正在使用String.Join方法将数组中的每个元素压缩成单个字符串。这也简化了追加字段分隔符所需的逻辑。

我也立即将当前行写入文件而不是膨胀我的记忆只是为了通过调用WriteAllText

来全部转储它

答案 1 :(得分:2)

这是@ billinkc的VB.NET版本的优秀答案,以防它对任何人都有用:

导入系统

Imports System.Data 
Imports System.Math 
Imports System.Collections 
Imports System.Collections.Generic 
Imports Microsoft.SqlServer.Dts.Runtime 
Imports System.Linq 
Imports System.Text 
Imports System.Windows.Forms

Public Sub Main()
    Dim fileName As String = Dts.Variables("User::vFileName").Value.ToString()
    Dim ds As DataSet = Nothing
    Dim dt As DataTable = Nothing
    Dim outputFolder As String = Dts.Variables("User::vDestinationPath").Value.ToString()
    Dim fileMask As String = String.Empty
    Dim sheetName As String = String.Empty
    Dim outSubFolder As String = String.Empty
    Dim message As String = String.Empty
    Dim fireAgain As Boolean = True
    Try

        ds = New DataSet()
        dt = New DataTable()

        Dim adapter As New System.Data.OleDb.OleDbDataAdapter()
        adapter.Fill(dt, Dts.Variables("User::vResultSet").Value)

        Dim baseFileName As String = System.IO.Path.GetFileNameWithoutExtension(fileName)
        baseFileName = System.IO.Path.GetFileName(fileName)

        ds.Tables.Add(dt)
        'foreach (DataTable dt in ds.Tables)
        If True Then
            Persist(ds, fileName, outputFolder)
        End If
    Catch ex As Exception
        Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "fileName", fileName), String.Empty, 0, fireAgain)
        Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "outputFolder", outputFolder), String.Empty, 0, fireAgain)
        Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "ExceptionDetails", ex.ToString()), String.Empty, 0, fireAgain)
        Dts.Events.FireInformation(0, "Data Dumper", String.Format("{0}|{1}", "InnerExceptionDetails", ex.InnerException), String.Empty, 0, fireAgain)
    End Try

    Dts.TaskResult = CInt(ScriptResults.Success)
End Sub

Public Shared Sub Persist(ds As System.Data.DataSet, originalFileName As String, outputFolder As String, Optional delimiter As String = ",")

    ' Enumerate through all the tables in the dataset
    ' Save it out as sub versions of the 
    If ds Is Nothing Then
        Return
    End If

    Dim baseFileName As String = System.IO.Path.GetFileNameWithoutExtension(originalFileName)
    Dim baseFolder As String = System.IO.Path.GetDirectoryName(originalFileName)
    Dim header As System.Collections.Generic.List(Of String) = Nothing

    For Each table As System.Data.DataTable In ds.Tables
        Dim outFilePath As String = System.IO.Path.Combine(outputFolder, String.Format("{0}.csv", baseFileName, table.TableName))
        Dim e As System.Text.Encoding = System.Text.Encoding.[Default]

        If table.ExtendedProperties.ContainsKey("Unicode") AndAlso CBool(table.ExtendedProperties("Unicode")) Then
            e = System.Text.Encoding.Unicode
        End If

        Using file As New System.IO.StreamWriter(System.IO.File.Open(outFilePath, System.IO.FileMode.Create), e)
            table.ExtendedProperties.Add("Path", outFilePath)

            ' add header row
            header = New System.Collections.Generic.List(Of String)(table.Columns.Count)
            For Each item As System.Data.DataColumn In table.Columns
                header.Add(item.ColumnName)
            Next

            file.WriteLine(String.Join(delimiter, header))

            For Each row As System.Data.DataRow In table.Rows
                ' TODO: For string based fields, capture the max length
                Dim fields As IEnumerable(Of String) = (row.ItemArray).[Select](Function(field) field.ToString())

                file.WriteLine(String.Join(delimiter, fields))
            Next
        End Using
    Next
End Sub