我需要将5.000.000条记录从数据库转换为JSON,但是在4.000条记录之后,我的内存不足。
我正在使用Task进行思考,当任务完成时,GC从内存中清除线程中的所有内容。
public class Program()
{
public static void Main(string[] args)
{
Program p = new Program();
p.ExportUsingTask();
}
public void ExportUsingTask()
{
List<int> ids = Program.LoadID(); // trying dont keep DBContext references, so GC can free memory
GC.Collect(); // GC can clear 130MB of memory, DBContext have no references anymore
GC.WaitForPendingFinalizers();
foreach (int item in ids)
{
Task job = new Task(() => new Program().Process(item));
job.RunSynchronously();
Task.WaitAll(job);
job.Dispose();
job = null;
GC.Collect(); // GC don't clear memory, uses more and more memory at each iteration, until OutOfMemoryException
GC.WaitForPendingFinalizers();
}
}
public static List<int> LoadID()
{
List<int> ids = new List<int>();
using (Context db = new Context())
{
ids = db.Alfa.Where(a => a.date.Year == 2019).Select(a => a.id).ToList<int>(); // load 500.000 id from DB, use 130MB of memory
// have some business logic here, but isn't the problem, memory is free after execution anyway
db.Dispose();
}
return ids;
}
public void Process(int id)
{
Beta b = GetBetaFromAlfa(id); // Beta is JSON model that I need save to file
string json = Newtonsoft.Json.JsonConvert.SerializeObject(b);
b = null;
using (StreamWriter sw = System.IO.File.AppendText(@"c:\MyFile.json"))
{
sw.Write(json);
sw.Close();
sw.Dispose();
}
GC.Collect(); // GC don't clear memory
GC.WaitForPendingFinalizers();
}
public static Beta GetBetaFromAlfa(int idAlfa)
{
Alfa a = null; // Alfa is my model in DB
Beta b = null; // Beta is JSON model that I need save to file
using (Context db = new Context())
{
Alfa a = db.Alfa.Single(a => a.id == idAlfa);
b = ConvertAlfaToBeta(a);
db.Dispose();
}
GC.Collect(); // GC don't clear memory
GC.WaitForPendingFinalizers();
return b;
}
public static Beta ConvertAlfaToBeta(Alfa alfa)
{
// business logic, something like:
// beta.id = alfa.id;
// beta.name = alfa.name;
// only simple type association (int, string, decimal, datetime, etc)
}
}
public class Alfa(){ ... }
public class Beta(){ ... }
在第一次尝试中,我做了一个循环,一个接一个地读取记录,当我得到100条记录时,我将所有JSON保存到文件中。但是无论如何,当我使用循环获得4000条记录时,我的内存不足:
public void ExportUsingLoop()
{
List<int> ids = Program.LoadID(); // trying dont keep DBContext references, so GC can free memory
GC.Collect(); // GC can clear 130MB of memory, DBContext have no references anymore
GC.WaitForPendingFinalizers();
int count = 0;
StringBuilder content = new StringBuilder();
foreach (int item in ids)
{
count++;
Beta b = GetBetaFromAlfa(id); // Beta is JSON model that I need save to file
string json = Newtonsoft.Json.JsonConvert.SerializeObject(b);
content.AppendLine(json);
b = null;
json = null;
if(count % 100 == 0)
{
using (StreamWriter sw = System.IO.File.AppendText(@"c:\MyFile.json"))
{
sw.Write(content.ToString());
content.Clear(); // just for clarification
sw.Close();
sw.Dispose();
}
GC.Collect(); // GC don't clear memory, uses more and more memory at each iteration, until OutOfMemoryException
GC.WaitForPendingFinalizers();
}
}
}