我的业务需求是寻找能够将Excel文件放在网络位置并将这些文件中的数据上传到SQL Server数据库的能力。这些文件将包含1个数据工作表。这些文件对应于已知数据库中的表。这些文件可以并且将与多个表相关联,并且仅在打开文件时才知道,即工作表的名称。我目前正在为每个文件创建多个SSIS包,因为它们已上传到共享驱动器,但有时候,我没有足够快地创建包。
我想我的问题是,这种类型的动态解析是SSIS可以从Foreach容器中的脚本任务完成的吗?或者我应该考虑另一种选择?
到目前为止,我有以下内容......但正如我所研究过的那样,我发现了与此类似的帖子:Extracting excel files with SSIS这让我对可行性略有关注。 ..
var isYesChecked = document.forms[0]["correct"+i][0].checked;
那么,我应该研究哪些想法或想法或方向?
答案 0 :(得分:1)
步骤:
1)创建For Each循环容器。在Collection下,应用“Foreach文件枚举器”,在枚举器配置下,列出文件夹位置和文件。对于文件,您可以列出。甚至* .xlsx或* .xls以过滤到特定文件。申请完全合格。对于Variable Mappings,应用字符串用户变量,如“ExcelFile”,索引为0。
2)在For Each循环容器中添加脚本任务。您将向它发送ReadOnlyVariable“ExcelFile”,它将在ReadWriteVariables下写入两个新的字符串变量“TableName”和“Worksheets”。应用以下C#脚本。请注意,scince以下脚本将更新您的Excel文件,您应该应用原件的副本。
#region Namespaces
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Runtime;
using System.Windows.Forms;
using System.IO;
using Excel = Microsoft.Office.Interop.Excel;
using System.Text;
using System.Linq;
using System.Threading.Tasks;
using System.Data.OleDb;
using System.Xml.Serialization;
#endregion
namespace xxxxxxxxx
{
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
{
public void Main()
{
// Includes full path, filename and extension... C:\Documents\ThisExcel.xlsx
string xlFile = Dts.Variables["User::ExcelFile"].Value.ToString();
// Remove path changing value to "ThisExcel.xlsx"
string NoPath = Path.GetFileName(xlFile);
// Remove extension changing value to "ThisExcel".
// This is applied because filename will become part of the name for new database tables
string tableName = Path.GetFileNameWithoutExtension(NoPath);
// Replace any spaces with underscores in tableName (FileName without path and extension)
tableName = tableName.Replace(" ", "_");
Dts.Variables["User::TableName"].Value = tableName;
Excel.Application app = new Excel.Application();
Excel.Workbook excelWorkbook;
try
{
excelWorkbook = app.Workbooks.Open(xlFile);
string tempsheet = " ";
int CountWorksheets = excelWorkbook.Sheets.Count;
//Dts.Variables["User::WorksheetCount"].Value = CountWorksheets;
string[] Excelworksheets;
Excelworksheets = new string[CountWorksheets];
int x = 0;
// Rename worksheets replace empty space with an underscore needed for an SSIS import and
// to avoid empty spaces in final table names.
foreach (Excel.Worksheet sheet in excelWorkbook.Worksheets)
{
tempsheet = sheet.Name;
tempsheet = tempsheet.Replace(" ", "_");
Excelworksheets[x++] = tempsheet.ToString();
sheet.Name = tempsheet;
}
Dts.Variables["User::Worksheets"].Value = Excelworksheets;
excelWorkbook.Save();
excelWorkbook.Close();
}
catch (Exception ex)
{
MessageBox.Show("Excel sheet rename failed for file " + xlFile + " based on " + ex.Message);
}
finally
{
app.Quit();
app = null;
GC.Collect();
GC.WaitForPendingFinalizers();
}
Dts.TaskResult = (int)ScriptResults.Success;
}
#region ScriptResults declaration
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
#endregion
}
}
3)保存并构建上述C#脚本任务后,在刚刚创建的脚本任务下面的早期For Each循环容器中添加For Each循环容器。这将遍历每个Excel文件中的每个工作表。如果您只有一个工作表,那很好。它将应用“Foreach From Variable Enumerator”的枚举器,它将是在前面提到的脚本任务中填充的“Worksheets”字符串变量。它将写入一个名为“Worksheet”的新用户字符串变量,索引为0。
4)在这个新的嵌套For Each循环容器中,添加将为每个工作表创建数据库表的脚本任务。我必须处理的棘手部分是定义字段类型,这不是从Excel工作表或文本CSV文件中保留的。所以我将它们全部设为nvarchar(255),或者如果列标题类似于备注,描述或其他内容,我将其设为nvarchar(max),这对于4000或4262个字符是好的(我不记得确定)。以下是我应用的动态代码。
#region Namespaces
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Data;
using System.Data.OleDb;
using System.Xml.Serialization;
using System.IO;
using Microsoft.SqlServer.Dts.Runtime;
using System.Windows.Forms;
#endregion
namespace yyyyyyyyyy
{
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
public void Main()
{
// TODO: Add your code here
string xlFile = Dts.Variables["User::ExcelFile"].Value.ToString(); //Includes full path and filename with extension
//xlFilex = xlFilex.Replace(@"\", @"\\");
string worksheet = Dts.Variables["User::Worksheet"].Value.ToString(); //Worksheet name from Excel file.
string Tablename = Dts.Variables["User::TableName"].Value.ToString(); //Currently file name without path and extension. Spaces replaced by underscores.
string ExcelExtension = Path.GetExtension(xlFile);
string columnName = "";
string columnType = "";
int i = 0;
string worksheet2 = worksheet + "$";
OleDbConnection xl = new OleDbConnection("Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + xlFile + ";Extended Properties=\"Excel 12.0 Xml;HDR=YES;IMEX=1\"");
xl.Open();
System.Data.DataTable dt = xl.GetOleDbSchemaTable(OleDbSchemaGuid.Columns, new object[] { null, null, worksheet2, null });
List<string> listColumn = new List<string>();
// Create the name of the table that will be created in the SQL Server database, which is
// a concatentation of the root file name and worksheet name separated by two undescores.
Tablename = Tablename + "__" + worksheet;
string CreateTable = "CREATE TABLE " + Tablename + " (";
string InsertTable = "INSERT INTO " + Tablename + " (";
string SelectColumns = "";
// Create the string that will be applied to create the table defining the field types based on the names
foreach (DataRow row in dt.Rows)
{
listColumn.Add(row["Column_name"].ToString());
columnName = listColumn[i].ToString();
if ((columnName == "Remark") || (columnName == "remark") || (columnName == "REMARK") ||
(columnName == "Remarks") || (columnName == "remarks") || (columnName == "REMARKS") ||
(columnName.Contains("Remarks")) || (columnName.Contains("remarks")) || (columnName.Contains("REMARKS")) ||
(columnName.Contains("Remark")) || (columnName.Contains("remark")) || (columnName.Contains("REMARK")) ||
(columnName == "Comment") || (columnName == "comment") || (columnName == "COMMENT") ||
(columnName == "Comments") || (columnName == "comments") || (columnName == "COMMENTS") ||
(columnName == "Description") || (columnName == "description") || (columnName == "DESCRIPTION") ||
(columnName.Contains("Description")) || (columnName.Contains("description")) || (columnName.Contains("DESCRIPTION")) ||
(columnName == "Legal") || (columnName == "legal") || (columnName == "LEGAL") ||
(columnName == "Note") || (columnName == "note") || (columnName == "NOTE") ||
(columnName.Contains("Format")) || (columnName.Contains("format")) || (columnName.Contains("FORMAT")) ||
(columnName == "Notes") || (columnName == "notes") || (columnName == "NOTES")
)
{
columnType = "nvarchar(max),";
}
else
{
columnType = "nvarchar(255),";
}
CreateTable = CreateTable + "[" + columnName + "] " + columnType;
InsertTable = InsertTable + "[" + columnName + "],";
SelectColumns = SelectColumns + "[" + columnName + "],";
//MessageBox.Show(columnName + " " + columnType);
i++;
}
// Remove last comma from CreateTable and add closing
CreateTable = CreateTable.Remove(CreateTable.Length - 1);
CreateTable = CreateTable + ")";
// Removoe last comman from InsertTable and add closing
InsertTable = InsertTable.Remove(InsertTable.Length - 1);
InsertTable = InsertTable + ")";
// Removoe last comman from SelectColumns
SelectColumns = SelectColumns.Remove(SelectColumns.Length - 1);
xl.Close();
string SQL = "";
// Assemble the dynamic SQL that will be applied in the SQL task next to generate and populate a new database table
if (ExcelExtension == ".xlsx")
{
SQL = "IF OBJECT_ID ('dbo." + Tablename + "') IS NOT NULL DROP TABLE dbo." + Tablename +
" " + CreateTable + " " +
InsertTable + " " + "SELECT " + SelectColumns + " FROM OPENROWSET('Microsoft.ACE.OLEDB.12.0', " +
//" INSERT INTO [dbo].[" + Tablename + "] SELECT * FROM OPENROWSET('Microsoft.ACE.OLEDB.12.0', " +
"'Excel 12.0 Xml;HDR=YES;Database=" + xlFile + "', 'SELECT * FROM [" + worksheet + "$]');";
}
else if (ExcelExtension == ".xls")
{
SQL = "IF OBJECT_ID ('dbo." + Tablename + "') IS NOT NULL DROP TABLE dbo." + Tablename +
" " + CreateTable + " " +
" INSERT INTO [dbo].[" + Tablename + "] SELET * FROM OPENROWSET('Microsoft.Jet.OLEDB.4.0', " +
"'Excel 8.0 Xml;HDR=YES;Database=" + xlFile + "', 'SELECT * FROM [" + worksheet + "$]');";
}
//MessageBox.Show(SQL);
Dts.Variables["User::CreateTableSQL"].Value = SQL;
Dts.TaskResult = (int)ScriptResults.Success;
}
#region ScriptResults declaration
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
#endregion
}
}
查看上面的脚本,您会注意到需要声明以下ReadOnlyVariables:ExelFile,SourceFolder,TableName,tempFileName和Worksheet。然后需要声明以下ReadWriteVariables:ColumnCount,CreateTable和InsertTableName。
5)在嵌套的ForEach循环容器中,在上面的Task脚本下面,添加一个执行SQL任务,该任务将运行CreateTableSQL变量中包含的sql。确保将SQLSourceType设置为“Variable”。这将创建并填充表,如果它已经存在,甚至会覆盖它。
完成后,您应该看到类似以下内容的内容:
希望这有帮助,如果您有任何疑问,请与我联系。我没有时间去掉那些无关紧要的东西,但这应该让你走上正确的道路。此循环容器用于Excel文件,但您可以添加其他循环容器,并修改代码以处理CSV或其他文件类型。所有这些都可以包含在一个SSIS包中。
最终的SQL任务只运行以下TSQL,它在数据库中查找包含单词之间空格的字段名称,并用下划线替换该空格。这不是必需的,但是避免必须使用括号[]包装的列来应用SQL。
DECLARE My_Cursor Cursor
FOR
SELECT 'sp_rename '''+table_name+'.['+column_name+']'','''+replace(column_name,' ','_')+''',''COLUMN'''
FROM information_schema.columns
WHERE column_name like '% %'
OPEN My_Cursor
DECLARE @SQL NVARCHAR(1000)
FETCH NEXT FROM My_Cursor INTO @SQL
WHILE @@FETCH_STATUS <> -1
BEGIN
EXECUTE sp_executesql @SQL
FETCH NEXT FROM My_Cursor INTO @SQL
END
CLOSE My_Cursor
DEALLOCATE My_Cursor