我在IIS上托管了一个asp.net mvc应用程序。我有一个表单,用户上传包含50k +行的excel文件。我用以下C#代码读取了excel文件。
public DataTable GetExcelDataTable(string fileName)
{
string connectionString = Path.GetExtension(fileName) == "xls" ?
string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) :
string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName);
var conn = new OleDbConnection(connectionString);
using (var adapter = new OleDbDataAdapter("SELECT * FROM [Sheet1$]", conn))
{
var ds = new DataSet();
adapter.Fill(ds);
DataTable data = ds.Tables[0];
conn.Close();
conn.Dispose();
adapter.Dispose();
return data;
}
}
问题是它最多只读取30k行,但从不读取整个excel文件。
有趣的是,如果我使用visual studio运行mvc app,我可以阅读(使用相同的代码)所有行,但是,再次,永远不会从IIS(IIS也在我的机器上)托管的网站上。
任何想法,为什么会这样?
答案 0 :(得分:6)
在这种方法中,不需要在目标机器上安装excel
NPOI.SS.UserModel.IWorkbook hssfworkbook;
bool InitializeWorkbook(string path)
{
try
{
if (path.ToLower().EndsWith(".xlsx"))
{
FileStream file1 = File.OpenRead(path);
hssfworkbook = new XSSFWorkbook(file1);
}
else
{
//read the template via FileStream, it is suggested to use FileAccess.Read to prevent file lock.
//book1.xls is an Excel-2007-generated file, so some new unknown BIFF records are added.
using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read))
{
hssfworkbook = new HSSFWorkbook(file);
}
}
return true;
}
catch
{
return false;
}
}
以下内容:
public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int rowCount)
{
NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0);
System.Collections.IEnumerator rows = sheet.GetRowEnumerator();
DataTable dt = new DataTable();
bool skipReadingHeaderRow = rows.MoveNext();
if (skipReadingHeaderRow)
{
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current;
else
row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current;
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell != null)
{
dt.Columns.Add(cell.ToString());
}
else
{
dt.Columns.Add(string.Empty);
}
}
}
int cnt = 0;
while (rows.MoveNext() && cnt < rowCount)
{
cnt++;
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current;
else
row = (XSSFRow)rows.Current;
DataRow dr = dt.NewRow();
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell == null)
{
dr[i - 1] = null;
}
else if (i > 0)
{
dr[i - 1] = cell.ToString();
}
}
dt.Rows.Add(dr);
}
return dt;
}
或:
public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int rowCount)
{
NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0);
System.Collections.IEnumerator rows = sheet.GetRowEnumerator();
DataTable dt = new DataTable();
bool skipReadingHeaderRow = rows.MoveNext();
if (skipReadingHeaderRow)
{
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current;
else
row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current;
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell != null)
{
dt.Columns.Add(cell.ToString());
}
else
{
dt.Columns.Add(string.Empty);
}
}
}
int cnt = 0;
while (rows.MoveNext() && cnt < rowCount)
{
cnt++;
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (HSSFRow)rows.Current;
else
row = (XSSFRow)rows.Current;
DataRow dr = dt.NewRow();
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell == null && i > 0)
{
dr[i - 1] = null;
}
else if (i > 0)
{
switch (cell.CellType)
{
case CellType.Blank:
dr[i - 1] = "[null]";
break;
case CellType.Boolean:
dr[i - 1] = cell.BooleanCellValue;
break;
case CellType.Numeric:
dr[i - 1] = cell.ToString();
break;
case CellType.String:
dr[i - 1] = cell.StringCellValue;
break;
case CellType.Error:
dr[i - 1] = cell.ErrorCellValue;
break;
case CellType.Formula:
default:
dr[i - 1] = "=" + cell.CellFormula;
break;
}
}
}
dt.Rows.Add(dr);
}
return dt;
}
或:
public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int segment, int rowCount)
{
NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0);
System.Collections.IEnumerator rows = sheet.GetRowEnumerator();
DataTable dt = new DataTable();
bool skipReadingHeaderRow = rows.MoveNext();
if (skipReadingHeaderRow)
{
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current;
else
row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current;
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell != null)
{
dt.Columns.Add(cell.ToString());
}
else
{
dt.Columns.Add(string.Empty);
}
}
}
for (int i = 0; i < (segment - 1)*rowCount; i++)
{
if (!rows.MoveNext()) break;
}
int cnt = 0;
while (rows.MoveNext() && cnt < rowCount)
{
cnt++;
dynamic row;
if (rows.Current is NPOI.HSSF.UserModel.HSSFRow)
row = (NPOI.HSSF.UserModel.HSSFRow) rows.Current;
else
row = (NPOI.XSSF.UserModel.XSSFRow) rows.Current;
DataRow dr = dt.NewRow();
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell == null)
{
dr[i - 1] = null;
}
else if (i > 0)
{
switch (cell.CellType)
{
case CellType.Blank:
dr[i - 1] = "[null]";
break;
case CellType.Boolean:
dr[i - 1] = cell.BooleanCellValue;
break;
case CellType.Numeric:
dr[i - 1] = cell.ToString();
break;
case CellType.String:
dr[i - 1] = cell.StringCellValue;
break;
case CellType.Error:
dr[i - 1] = cell.ErrorCellValue;
break;
case CellType.Formula:
default:
dr[i - 1] = "=" + cell.CellFormula;
break;
}
}
}
dt.Rows.Add(dr);
}
return dt;
}
答案 1 :(得分:4)
您是否可以发布服务器的某些规格?它是基于VM和云吗?在过去,我成功地使用了:
NPOI:http://npoi.codeplex.com/
要读取.xls文件,但如果您可以将文件限制为.xlsx,我会使用ClosedXML。我已经在Azure中的一个强大的VM上阅读了大量50K +的大文件和ClosedXML而没有问题。我有一种感觉,你正在服务器上的用户空间墙。如果用户达到这样的百分比,他们就会达到使用配额并结束任务。
答案 2 :(得分:4)
这个问题可以通过读取25K + 25k = 50K两部分的数据来解决。 您只需将选择查询更新为:
SELECT TOP 25000 * FROM [Sheet1$]
答案 3 :(得分:2)
我已经在我身边创建了小样本,将SELECT TOP和ORDER BY一起使用,然后就可以得到结果:
检查代码:
public DataSet GetExcelDataTable(string fileName)
{
string connectionString = Path.GetExtension(fileName) == "xls" ?
string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) :
string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName);
var conn = new OleDbConnection(connectionString);
DataTable data = new DataTable();
DataTable data2 = new DataTable();
var ds = new DataSet();
using (var adapter = new OleDbDataAdapter("SELECT TOP 25000 Name, Surname FROM [Sheet1$] ORDER BY Name asc", conn))
{
adapter.Fill(data);
}
using (var adapter = new OleDbDataAdapter("SELECT TOP 25000 Name, Surname FROM [Sheet1$] ORDER BY Name desc", conn))
{
adapter.Fill(data2);
}
if (data.Rows.Count > 0)ds.Tables.Add(data);
if (data2.Rows.Count > 0) ds.Tables.Add(data2);
return ds;
}
答案 4 :(得分:2)
您可以尝试通过读取块之后的行块而不是执行一次读取来填充数据表。
这种方法的优点在于,您不仅限于50k记录,而是适应数据表的实际基数。
此代码适用于我的机器(Win10 X64,VS2010 Express):
public DataTable GetExcelDataTable(string fileName)
{
string connectionString = Path.GetExtension(fileName) == "xls" ?
string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) :
string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName);
var conn = new OleDbConnection(connectionString);
using (var adapter = new OleDbDataAdapter("SELECT * FROM [Sheet1$]", conn))
{
var dt = new DataTable();
int recordRead = 0;
int recordCur = 0; //starting point
int recordStep = 6789; //records to read
//here, we read **recordStep** records instead of reading
//all excel data
do
{
recordRead = adapter.Fill( recordCur, recordStep, dt);
recordCur += recordRead; //increment starting point accordingly
} while (recordRead > 0);
conn.Close();
conn.Dispose();
adapter.Dispose();
return dt;
}
}