我目前有一个处理程序,它接受excel文件的文件路径和tabname,将文件处理成数据表,然后将表序列化为json字符串以返回。 这是有效的,直到我尝试处理大文件,然后我得到一个内存不足的例外。
我认为如果我没有先将所有内容加载到数据表中,而是直接加载到json字符串中,它会减少内存使用量。但是,我一直无法找到任何如何执行此操作的示例。
我可以直接从OleDbConnection序列化为字符串吗?怎么样?
public void ProcessRequest(HttpContext context)
{
string path = context.Request["path"];
string tableNames = context.Request["tableNames"];
string connectionString = string.Empty;
if (path.EndsWith(".xls"))
{
connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
Data Source={0};
Extended Properties=""Excel 8.0;HDR=YES;IMEX=1""", path);
}
else if (path.EndsWith(".xlsx"))
{
connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
Data Source={0};
Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1""", path);
}
DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb");
DbDataAdapter adapter = factory.CreateDataAdapter();
OleDbConnection conn = new OleDbConnection(connectionString);
conn.Open();
DataTable tmp = new DataTable();
DbCommand selectCommand = factory.CreateCommand();
selectCommand.CommandText = String.Format("SELECT * FROM [{0}]", tableNames);
selectCommand.Connection = conn;
adapter.SelectCommand = selectCommand;
adapter.Fill(tmp);
string tabdata = JsonConvert.SerializeObject(tmp);
context.Response.Write(tabdata);
}
答案 0 :(得分:7)
首先,您应该停止序列化为中间string
,而是使用以下简单方法直接序列化到HttpResponse.OutputStream
:
public static class JsonExtensions
{
public static void SerializeToStream(object value, System.Web.HttpResponse response, JsonSerializerSettings settings = null)
{
if (response == null)
throw new ArgumentNullException("response");
SerializeToStream(value, response.OutputStream, settings);
}
public static void SerializeToStream(object value, TextWriter writer, JsonSerializerSettings settings = null)
{
if (writer == null)
throw new ArgumentNullException("writer");
var serializer = JsonSerializer.CreateDefault(settings);
serializer.Serialize(writer, value);
}
public static void SerializeToStream(object value, Stream stream, JsonSerializerSettings settings = null)
{
if (stream == null)
throw new ArgumentNullException("stream");
using (var writer = new StreamWriter(stream))
{
SerializeToStream(value, writer, settings);
}
}
}
由于大字符串需要为基础char
数组提供大的连续内存块,因此您将首先耗尽内存。另请参阅Json.NET的Performance Tips
为了最大限度地减少内存使用量和分配的对象数量,Json.NET支持直接序列化和反序列化流。一次读取或写入JSON,而不是将整个JSON字符串加载到内存中,在处理大小超过85kb的JSON文档时尤为重要,以避免JSON字符串在大对象堆中结束。
接下来,请务必将所有一次性用品包裹在using
声明中,如下所示。
这可能会解决您的问题,但如果没有,您可以使用以下IDataReader
将JsonConverter
序列化为JSON:
public class DataReaderConverter : JsonConverter
{
public override bool CanConvert(Type objectType)
{
return typeof(IDataReader).IsAssignableFrom(objectType);
}
public override bool CanRead { get { return false; } }
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
{
throw new NotImplementedException();
}
public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
{
var reader = (IDataReader)value;
writer.WriteStartArray();
while (reader.Read())
{
writer.WriteStartObject();
for (int i = 0; i < reader.FieldCount; i++)
{
writer.WritePropertyName(reader.GetName(i));
if (reader.IsDBNull(i))
writer.WriteNull();
else
serializer.Serialize(writer, reader[i]);
}
writer.WriteEndObject();
}
writer.WriteEndArray();
}
}
然后序列化为流,如下所示:
public static class ExcelExtensions
{
private static string GetExcelConnectionString(string path)
{
string connectionString = string.Empty;
if (path.EndsWith(".xls"))
{
connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
Data Source={0};
Extended Properties=""Excel 8.0;HDR=YES;IMEX=1""", path);
}
else if (path.EndsWith(".xlsx"))
{
connectionString = String.Format(@"Provider=Microsoft.ACE.OLEDB.12.0;
Data Source={0};
Extended Properties=""Excel 12.0 Xml;HDR=YES;IMEX=1""", path);
}
return connectionString;
}
public static string SerializeJsonToString(string path, string workSheetName, JsonSerializerSettings settings = null)
{
using (var writer = new StringWriter())
{
SerializeJsonToStream(path, workSheetName, writer, settings);
return writer.ToString();
}
}
public static void SerializeJsonToStream(string path, string workSheetName, Stream stream, JsonSerializerSettings settings = null)
{
using (var writer = new StreamWriter(stream))
SerializeJsonToStream(path, workSheetName, writer, settings);
}
public static void SerializeJsonToStream(string path, string workSheetName, TextWriter writer, JsonSerializerSettings settings = null)
{
settings = settings ?? new JsonSerializerSettings();
var converter = new DataReaderConverter();
settings.Converters.Add(converter);
try
{
string connectionString = GetExcelConnectionString(path);
DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb");
using (OleDbConnection conn = new OleDbConnection(connectionString))
{
conn.Open();
using (DbCommand selectCommand = factory.CreateCommand())
{
selectCommand.CommandText = String.Format("SELECT * FROM [{0}]", workSheetName);
selectCommand.Connection = conn;
using (var reader = selectCommand.ExecuteReader())
{
JsonExtensions.SerializeToStream(reader, writer, settings);
}
}
}
}
finally
{
settings.Converters.Remove(converter);
}
}
}
注意 - 经过轻微测试。在投入生产之前,请务必使用现有方法对其进行单元测试!对于转换器代码,我使用JSON Serialization of a DataReader作为灵感。
<强>更新强>
我的转换器以与Json.NET的DataTableConverter
相同的结构发出JSON。因此,您将能够使用Json.NET自动反序列化为DataTable
。如果您更喜欢更紧凑的格式,可以定义自己的格式,例如:
{
"columns": [
"Name 1",
"Name 2"
],
"rows": [
[
"value 11",
"value 12"
],
[
"value 21",
"value 22"
]
]
}
他们创建了以下转换器:
public class DataReaderArrayConverter : JsonConverter
{
public override bool CanConvert(Type objectType)
{
return typeof(IDataReader).IsAssignableFrom(objectType);
}
public override bool CanRead { get { return false; } }
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
{
throw new NotImplementedException();
}
static string[] GetFieldNames(IDataReader reader)
{
var fieldNames = new string[reader.FieldCount];
for (int i = 0; i < reader.FieldCount; i++)
fieldNames[i] = reader.GetName(i);
return fieldNames;
}
static void ValidateFieldNames(IDataReader reader, string[] fieldNames)
{
if (reader.FieldCount != fieldNames.Length)
throw new InvalidOperationException("Unequal record lengths");
for (int i = 0; i < reader.FieldCount; i++)
if (fieldNames[i] != reader.GetName(i))
throw new InvalidOperationException(string.Format("Field names at index {0} differ: \"{1}\" vs \"{2}\"", i, fieldNames[i], reader.GetName(i)));
}
const string columnsName = "columns";
const string rowsName = "rows";
public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
{
var reader = (IDataReader)value;
writer.WriteStartObject();
string[] fieldNames = null;
while (reader.Read())
{
if (fieldNames == null)
{
writer.WritePropertyName(columnsName);
fieldNames = GetFieldNames(reader);
serializer.Serialize(writer, fieldNames);
writer.WritePropertyName(rowsName);
writer.WriteStartArray();
}
else
{
ValidateFieldNames(reader, fieldNames);
}
writer.WriteStartArray();
for (int i = 0; i < reader.FieldCount; i++)
{
if (reader.IsDBNull(i))
writer.WriteNull();
else
serializer.Serialize(writer, reader[i]);
}
writer.WriteEndArray();
}
if (fieldNames != null)
{
writer.WriteEndArray();
}
writer.WriteEndObject();
}
}
当然,您需要在客户端创建自己的反序列化转换器。
或者,您可以考虑压缩您的回复。我从未尝试过,但请参阅HttpWebRequest and GZip Http Responses和ASP.NET GZip Encoding Caveats。