我正在尝试构建一个创建PDF并保存在本地文件中的应用程序。我正在C#中通过iTextSharp使用它。我需要至少保存10万个PDF文件。
保存10 000个文件后,保存速度会变慢。前2万分钟会保存前10k,其余文件将花费近5个小时。
我认为问题在于内存使用情况。但是我找不到解决该问题的解决方案。我已经发布了源代码。
private void CreatePDF11()
{
Queue<String> QTempFileNames;
QTempFileNames = new Queue<string>();
string fileName = string.Empty;
DateTime fileCreationDatetime = DateTime.Now;
fileName = @"D:\StatementMassPrint\tesystw14j414435.PDF";
string DestinationDirectory = @"D:\StatementMassPrint";
string StrTempFilePath = String.Empty;
string pdfPath = fileName;
int TempPdfFileName = 1;
if (DestinationDirectory != string.Empty)
{
StrTempFilePath = DestinationDirectory + "temp" + "(" + DateTime.Now.ToFileTime() + ")";
}
if (!(Directory.Exists(StrTempFilePath)))
{
Directory.CreateDirectory(StrTempFilePath);
}
string reportFileName = StrTempFilePath + "\\" + TempPdfFileName.ToString() + ".Pdf";
for (int f = 0; f < 100000; f++)
{
reportFileName = StrTempFilePath + "\\" + TempPdfFileName.ToString() + ".Pdf";
using (FileStream msReport = new FileStream(reportFileName, FileMode.Create))
{
//step 1
using (Document pdfDoc = new Document(PageSize.A5.Rotate(), 10f, 10f, 200f,40f))
{
try
{
// step 2
PdfWriter pdfWriter = PdfWriter.GetInstance(pdfDoc, msReport);
pdfWriter.PageEvent = new EpisodePageHeaderAndFooter();
//open the stream
pdfDoc.Open();
DataTable dtEpisodeWise = new DataTable();
dtEpisodeWise.Columns.Add("INVOICE_NO");
dtEpisodeWise.Columns.Add("INVOICE_DATE");
dtEpisodeWise.Columns.Add("CODE");
dtEpisodeWise.Columns.Add("SERVICE_DESCRIPTION");
dtEpisodeWise.Columns.Add("QTY",typeof(decimal));
dtEpisodeWise.Columns.Add("UNIT_PRICE",typeof(decimal));
dtEpisodeWise.Columns.Add("GROSS",typeof(decimal));
dtEpisodeWise.Columns.Add("DISCOUNT",typeof(decimal));
dtEpisodeWise.Columns.Add("NET",typeof(decimal));
dtEpisodeWise.Columns.Add("DEDUCTION",typeof(decimal));
dtEpisodeWise.Columns.Add("NET_PAYABLE_WITHOUT_VAT",typeof(decimal));
dtEpisodeWise.Columns.Add("VAT",typeof(decimal));
dtEpisodeWise.Columns.Add("NET_PAYABLE_WITH_VAT",typeof(decimal));
PdfPTable table = new PdfPTable(dtEpisodeWise.Columns.Count);
table.WidthPercentage = 100;
Font fontH1 = new Font(Font.FontFamily.HELVETICA, 6, Font.BOLDITALIC);
for (int k = 0; k < dtEpisodeWise.Columns.Count; k++)
{
string str = System.Globalization.CultureInfo.CurrentCulture.TextInfo.ToTitleCase(dtEpisodeWise.Columns[k].ColumnName.Replace("_", " ").ToLower()); ;
PdfPCell cell = new PdfPCell(new Phrase(str,fontH1));
cell.HorizontalAlignment = PdfPCell.ALIGN_CENTER;
cell.VerticalAlignment = PdfPCell.ALIGN_CENTER;
// cell.BackgroundColor = new iTextSharp.text.BaseColor(51, 102, 102);
table.AddCell(cell);
}
// for (int i = 0; i < 1000; i++)
// {
dtEpisodeWise.Rows.Add("CR100005", "25-05-1989", "CPT004", "SERVICE005", 1, 10, 100, 10, 90, 45, 45, 5, 50);
dtEpisodeWise.Rows.Add("CR100006", "25-05-1992", "CPT00555", "SERVICE105",6, 60, 600, 60, 450, 45, 45, 5, 500);
// }
Font fontH2 = new Font(Font.FontFamily.HELVETICA, 6, Font.ITALIC);
for (int i = 0; i < dtEpisodeWise.Rows.Count; i++)
{
for (int j = 0; j < dtEpisodeWise.Columns.Count; j++)
{
PdfPCell cell = new PdfPCell(new Phrase(dtEpisodeWise.Rows[i][j].ToString(),fontH2));
//Align the cell in the center
if (dtEpisodeWise.Columns[j].DataType == typeof(decimal))
{
cell.HorizontalAlignment = PdfPCell.ALIGN_RIGHT;
cell.VerticalAlignment = PdfPCell.ALIGN_CENTER;
}
else
{
cell.HorizontalAlignment = PdfPCell.ALIGN_LEFT;
cell.VerticalAlignment = PdfPCell.ALIGN_CENTER;
}
table.HeaderRows = 1;
table.AddCell(cell);
}
}
pdfDoc.Add(table);
pdfDoc.Close();
TempPdfFileName++;
}
catch (Exception ex)
{
//handle exception
}
finally
{
}
}
}
}
}
答案 0 :(得分:2)
好吧,有些事情您尚未处理,这可能会导致问题。例如,DataTable
实现IDisposable
,PdfWriter
也实现。
对于PdfWriter
,您实际上并不需要它作为变量声明,因为您实际上并没有真正使用它,因此您可以像对待它一样
using (PdfWriter.GetInstance(pdfDoc, msReport))
{
// ...
}
需要注意的是,当前您正在for循环中的所有时间重新创建字体fontH1
和fontH2
。我没有真正的需要,它们在程序的整个运行过程中都不会改变,因此您可以在循环外部将它们声明为局部变量,或者作为创建pdf的类内部静态字段的一部分进行声明。 / p>
另一件事是,您似乎一次又一次地重新创建相同的数据表,现在我可以假设对于此处的当前代码,这只是伪代码,但是从我的角度来看,您不必创建循环中的数据表,我宁愿认为它可以作为创建pdf文件的参数(就像文件名一样),因此您可以重写代码以在自己的类中创建pdf(我将其命名为PdfModule
但我敢打赌,它有很多更好的名称:)),然后将代码重构为可以处理1个文件,例如:
public class PdfModule
{
private static readonly Font H1Font = new Font(Font.FontFamily.HELVETICA, 6, Font.BOLDITALIC);
private static readonly Font H2Font = new Font(Font.FontFamily.HELVETICA, 6, Font.ITALIC);
public static void CreateFile(string filename, DataTable data)
{
using (var msReport = new FileStream(filename, FileMode.Create, FileAccess.Write))
{
using (var pdfDoc = new Document(PageSize.A5.Rotate(), 10f, 10f, 200f, 40f))
{
using (PdfWriter.GetInstance(pdfDoc, msReport))
{
pdfDoc.Open();
var table = new PdfPTable(data.Columns.Count)
{
WidthPercentage = 100,
HeaderRows = 1
};
for (var k = 0; k < data.Columns.Count; k++)
{
var str =
System.Globalization.CultureInfo.CurrentCulture.TextInfo.ToTitleCase(data
.Columns[k].ColumnName.Replace("_", " ").ToLower());
;
var cell = new PdfPCell(new Phrase(str, H1Font))
{
HorizontalAlignment = Element.ALIGN_CENTER,
VerticalAlignment = Element.ALIGN_CENTER
};
table.AddCell(cell);
}
for (var i = 0; i < data.Rows.Count; i++)
{
for (var j = 0; j < data.Columns.Count; j++)
{
var cell =
new PdfPCell(new Phrase(data.Rows[i][j].ToString(), H2Font))
{
VerticalAlignment = Element.ALIGN_CENTER,
HorizontalAlignment = data.Columns[j].DataType == typeof(decimal)
? Element.ALIGN_RIGHT
: Element.ALIGN_LEFT
};
table.AddCell(cell);
}
}
pdfDoc.Add(table);
pdfDoc.Close();
}
}
}
}
}
这会将字体声明保留为类级别的静态字段,因此在程序运行期间仅将它们初始化一次,并且将写入1个单个文件,其中1个DataTable
会建立文档。两者都作为参数发送。
要使用该类,我编写了以下示例代码,它似乎可以非常快速地生成100.000个文件(在您提到的5个小时内没有)。
请注意,我没有关于您如何在实际程序中处理/填充数据表的任何详细信息,但至少它会为您提供有关如何重组代码并从那里开始的基本思路
internal class Program
{
private static DataTable CreateDataTable( IEnumerable<object[]> rawData )
{
var datatable = new DataTable();
datatable.Columns.Add("INVOICE_NO");
datatable.Columns.Add("INVOICE_DATE");
datatable.Columns.Add("CODE");
datatable.Columns.Add("SERVICE_DESCRIPTION");
datatable.Columns.Add("QTY", typeof(decimal));
datatable.Columns.Add("UNIT_PRICE", typeof(decimal));
datatable.Columns.Add("GROSS", typeof(decimal));
datatable.Columns.Add("DISCOUNT", typeof(decimal));
datatable.Columns.Add("NET", typeof(decimal));
datatable.Columns.Add("DEDUCTION", typeof(decimal));
datatable.Columns.Add("NET_PAYABLE_WITHOUT_VAT", typeof(decimal));
datatable.Columns.Add("VAT", typeof(decimal));
datatable.Columns.Add("NET_PAYABLE_WITH_VAT", typeof(decimal));
foreach (var row in rawData)
{
datatable.Rows.Add(row);
}
return datatable;
}
public static void Main(string[] args)
{
var rowData = new List<object[]>()
{
new object[] { "CR100005", "25-05-1989", "CPT004", "SERVICE005", 1, 10, 100, 10, 90,
45, 45, 5, 50 },
new object[] { "CR100006", "25-05-1992", "CPT00555", "SERVICE105", 6, 60, 600, 60,
450, 45, 45, 5, 500 }
};
var pdfModule = new PdfModule();
var outDirectory = Path.Combine(Environment.CurrentDirectory, "Output");
if (!Directory.Exists(outDirectory))
{
// well theoretically I should just create the directory and worry about conflicts differently
Directory.CreateDirectory(outDirectory);
}
Console.WriteLine( $"Creating files to {outDirectory}");
var nrOfFiles = 100000;
var stepCount = 1000;
for (var i = 0; i < nrOfFiles; i++)
{
if (i % stepCount == 0)
{
Console.WriteLine($"Creating files {i}-{i+stepCount-1}" );
}
var filename = Path.Combine(outDirectory, $"{i}.pdf");
using (var dataTable = CreateDataTable(rowData))
{
pdfModule.CreateFile(filename, dataTable);
}
}
Console.WriteLine($"Done, created {nrOfFiles} files");
}
}
对于其他情况,我不确定我的环境是否与您的环境匹配,但是它一直运行(收到ITextSharp
的通知,因为我没有有效的AGPL许可证^ _ ^),并且我正在运行它在Linux下使用Rider IDE