我正在尝试使用Awesomium从当前加载页面获取内容。但如果页面是XML(RSS),则结果不正确。这是演示问题的测试程序
using System;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
using Awesomium.Core;
namespace AwesomiumTest
{
class Program
{
static void Main(string[] args)
{
WebCore.Initialize(new WebConfig()
{
LogLevel = LogLevel.None
});
string result;
//Example #1
//ok
WritePageToConsole("http://www.google.com/");
//Example #2
//small problem. Result has added tags and replace from "<" ">" to "<" ">"
WritePageToConsole("http://social.msdn.microsoft.com/search/es-ES/feed?query=vb&format=RSS");
//Example #3
//big problem. Result = 'undefined' !!!!!!!
WritePageToConsole("http://prmac.com/rss-ct-d.htm");
Console.ReadKey();
}
static void WritePageToConsole(string url)
{
using (WebSession session = WebCore.CreateWebSession(new WebPreferences()))
{
using (WebView view = WebCore.CreateWebView(1280, 960, session))
{
bool finishedLoading = false;
view.LoadingFrameComplete += (s, e) =>
{
if (e.IsMainFrame)
finishedLoading = true;
};
view.Source = url.ToUri();
while (!finishedLoading)
{
Thread.Sleep(100);
WebCore.Update();
}
string docTagName = view.ExecuteJavascriptWithResult("document.documentElement.tagName").ToString();
string outerHTML= view.ExecuteJavascriptWithResult("document.documentElement.outerHTML").ToString();
Console.WriteLine("Document tagName: " + docTagName);
Console.WriteLine("Part of content: " + outerHTML.Substring(0, Math.Min(300, outerHTML.Length)));
Console.WriteLine();
}
}
}
}
}
控制台看起来像这样:
示例#1
文档标记名称: HTML
部分内容:
<html itemscope="" itemtype="http://schema.org/WebPage"><head><meta content="Поиск информации в интернете: веб страницы, картинки, видео и многое другое." name="description"><meta content="noodp" name="robots"><meta itemprop="image" content="/images/google_favicon_128.png"><title>Google</title><scri
示例#2
文档标记名称: HTML
部分内容:
<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"><rss xmlns:a10="http://www.w3.org/2005/Atom" version="2.0"><channel><title>Buscar en Msdn</title><description>Use Msdn para buscar en la Web.</description><item><link>h
示例#3
文档标记名称: rss
部分内容:未定义
在示例#2和#3中获取页面内容的正确方法是什么?