我正在尝试从制表符分隔的文本文件中读取HTML,并创建一个HTML文件,然后将其转换为pdf。当我尝试阅读文本文件时,出现了'和其他一些奇怪的字符。这是我的代码
var lines = System.IO.File.ReadAllLines(@"C:\temp\Laura.txt", Encoding.GetEncoding("Windows-1255"));
var csv = lines.Select(x =>
{
var parts = x.Split('\t');
return new Articles()
{
id = parts[0].Trim(),
name = parts[1].Trim(),
body = parts[2].Trim(),
//body = WebUtility.HtmlDecode(parts[2].Trim()),
//body = HttpUtility.HtmlEncode(parts[2].Trim()),
//body = WebUtility.HtmlEncode(parts[2].Trim()),
//body = SecurityElement.Escape(parts[2].Trim()),
};
}).ToList();
foreach (var item in csv)
{
string id = item.name;
string filename = item.name + ".html";
string body = item.body;
string path = @"c:\temp\" + filename;
// This text is added only once to the file.
if (!File.Exists(path))
{
// Create a file to write to.
File.WriteAllText(path, body);
Microsoft.Office.Interop.Word.Application ap = new Microsoft.Office.Interop.Word.Application();
Document document = ap.Documents.Open(path);
object oFalse = false;
object oTrue = true;
object OutputFileName = Path.Combine(
Path.GetDirectoryName(path),
Path.GetFileNameWithoutExtension(path) + ".pdf");
object missing = System.Reflection.Missing.Value;
document.PrintOut(
oTrue, // Background
oFalse, // Append
ref missing, // Range
OutputFileName, // OutputFileName
ref missing, // From
ref missing, // To
ref missing, // Item
ref missing, // Copies
ref missing, // Pages
ref missing, // PageType
ref missing, // PrintToFile
ref missing, // Collate
ref missing, // ActivePrinterMacGX
ref missing, // ManualDuplexPrint
ref missing, // PrintZoomColumn
ref missing, // PrintZoomRow
ref missing, // PrintZoomPaperWidth
ref missing // PrintZoomPaperHeight
);
}
}
我尝试了注释掉的代码,但是似乎没有任何效果。
答案 0 :(得分:0)
尝试
var lines = System.IO.File.ReadAllLines(@"C:\temp\Laura.txt", Encoding.GetEncoding("Windows-1255"));
var csv = lines.Select(x =>
{
var parts = x.Split('\t');
return new Articles()
{
id = parts[0].Trim(),
name = parts[1].Trim(),
body = parts[2].Trim(),
};
}).ToList();
尝试此wdExportFormatPDF
var lines = System.IO.File.ReadAllText(@"1.html", Encoding.GetEncoding("Windows-1255"));
var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"2.html");
var app = new Microsoft.Office.Interop.Word.Application();
var doc = app.Documents.Open(path, false);
var OutputFileName = Path.Combine(
Path.GetDirectoryName(path),
Path.GetFileNameWithoutExtension(path)+
".pdf");
doc.ExportAsFixedFormat(OutputFileName, WdExportFormat.wdExportFormatPDF);
完整代码
static void connvert()
{
var lines =
File.
ReadAllLines
(@"C:\temp\Laura.txt",
Encoding.GetEncoding("Windows-1255")
);
var csv = lines.Select(x =>
{
var parts = x.Split('\t');
return new Articles()
{
id = parts[0].Trim(),
name = parts[1].Trim(),
body = parts[2].Trim(),
};
}).ToList();
foreach (var item in csv)
{
string id = item.name;
string filename = item.name + ".html";
string body = item.body;
string path = @"c:\temp\" + filename;
// This text is added only once to the file.
if (!File.Exists(path))
{
// Create a file to write to.
// File.WriteAllText(path, body);
File.WriteAllText(path, body, Encoding.Unicode); // try this
// File.WriteAllText(path, body, Encoding.Encoding.GetEncoding("Windows-1255"));// then this
var app = new Application();
var doc = app.Documents.Open(path, false);
var OutputFileName =
Path.Combine(
Path.GetDirectoryName(path),
Path.GetFileNameWithoutExtension(path) +
".pdf");
doc.ExportAsFixedFormat
(OutputFileName,
WdExportFormat.wdExportFormatPDF
);
}
}
}