我有一个名为&#34的.jar文件; DynamicContentLoader.jar"执行连接到网页的Java进程,使用HtmlUnit,并通过static void Main(string[] args)
{
string file = "C:\\CUWCDFileStorage\\temp\\test.png";
var bytes = File.ReadAllBytes(file);
using (var stream = File.Open(file, FileMode.Open))
{
Console.WriteLine(Md5HashFile(bytes));
Console.WriteLine(Md5HashFile(stream));
Console.WriteLine(Sha1HashFile(bytes));
Console.WriteLine(Sha1HashFile(stream));
Console.WriteLine(Sha1HashFile2(bytes));
}
Console.Read();
}
public static string Md5HashFile(byte[] file)
{
using (MD5 md5 = MD5.Create())
{
return BitConverter.ToString(md5.ComputeHash(file)).Replace("-", "");
}
}
public static string Sha1HashFile(byte[] file)
{
using (SHA1Managed sha1 = new SHA1Managed())
{
return BitConverter.ToString(sha1.ComputeHash(file)).Replace("-", "");
}
}
public static string Md5HashFile(Stream stream)
{
using (MD5 md5 = MD5.Create())
{
return BitConverter.ToString(md5.ComputeHash(stream)).Replace("-", "");
}
}
public static string Sha1HashFile(Stream stream)
{
using (SHA1Managed sha1 = new SHA1Managed())
{
return BitConverter.ToString(sha1.ComputeHash(stream)).Replace("-", "");
}
}
public static string Sha1HashFile2(byte[] bytes)
{
string file = "C:\\CUWCDFileStorage\\temp\\test2.png";
File.WriteAllBytes(file, bytes);
return Sha1HashFile(File.OpenRead(file));
}
打印其Html文档。此过程从命令行获取一个参数:需要检索的网页的URI。
导出到.jar文件的Java进程代码:
System.out.println();
接下来,我在Mono项目中执行此.jar,其中一个类使用import java.io.IOException;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class DynamicContentLoader {
public static void main(String[] args) {
java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(java.util.logging.Level.OFF);
String s = DynamicContentLoader.loadHtml("https://query.nytimes.com/search/sitesearch/?action=click&contentCollection®ion=TopBar&WT.nav=searchWidget&module=SearchSubmit&pgtype=Homepage#/Donald%20Trump");
System.out.println(s);
}
public static String loadHtml(String url) {
final WebClient webClient = new WebClient(BrowserVersion.CHROME);
webClient.getOptions().setCssEnabled(false); //if you don't need css
webClient.getOptions().setThrowExceptionOnScriptError(false); // stop process breaking exception throws
HtmlPage page;
try {
page = webClient.getPage(url);
webClient.waitForBackgroundJavaScript(20 * 1000); /* will wait JavaScript to execute up to 5s */
String pageAsXml = page.asXml();
webClient.close();
return pageAsXml;
} catch (FailingHttpStatusCodeException | IOException e) {
return null;
}
}
}
Object来执行.jar,将其Process
流读取到StandardOutput
,然后创建并从StringBuilder
返回HtmlAgilityPack.HtmlDocument
个对象:
StringBuilder.ToString();
我的问题是,当我从命令行运行.jar时,
" java -jar path / to / file / DynamicContentLoader.jar' some uri'"
我得到了正确加载的Html文档/字符串。但是,我上面的C#代码返回一个不同的,不完整的Html文档/字符串,甚至崩溃,例如:
using System;
using System.Diagnostics;
using System.Text;
using HtmlAgilityPack;
namespace Search {
public static class DynamicContentLoader {
// path of .jar file in ProjectDirectory/Resources/.jar
readonly static string jarPath =
AppDomain.CurrentDomain.BaseDirectory +
"Resources/DynamicContentLoader.jar";
public static HtmlDocument LoadDynamicWebPage(string url) {
var startInfo = new ProcessStartInfo("java", @" -jar "
+ jarPath + " \'" + url + "\'");
startInfo.UseShellExecute = false;
startInfo.RedirectStandardOutput = true;
var javaProcess = new Process();
javaProcess.StartInfo = startInfo;
javaProcess.Start();
var output = new StringBuilder();
while (!javaProcess.HasExited) {
output.Append(javaProcess.StandardOutput.ReadToEnd());
}
if (output.Length > 0) {
var doc = new HtmlDocument();
doc.LoadHtml(output.ToString());
// looking see if correct Html doc
Console.WriteLine(doc.DocumentNode.InnerHtml);
return doc;
}
return null;
}
}
}
是否有人知道可能导致这两种不同执行方法之间的行为差异的原因或解决此问题的原因是什么?