WCF(.NET 4.0)是否提供检索网页HTML的功能?

时间:2012-08-24 17:09:44

标签: .net html wcf web-services http

WCF(.NET 4.0)是否提供检索网页HTML的功能?如果没有,在.NET 4.0中这样做的最佳方法是什么?

2 个答案:

答案 0 :(得分:1)

您可以使用WebClient检索网页的HTML

using (WebClient web = new WebClient())
{
    var data = web.DownloadData(myURL);
}    

WebClient受到一些限制,例如无法设置下载时间和不一致的进度事件。我编写了自己的子类来改进它。这是代码,以防它有用。请注意,我正在处理的代码中存在一个错误(请参阅WebClient Subclass Disposed During Event Handler, Result is ObjectDisposedException)。对bug的一个简单的解决方法就是在我的问题中提到的那条线上放置一个try / catch,但是我试图理解那个问题中的核心问题。

public class MyWebClient : WebClient, IDisposable
{
    public int Timeout { get; set; }
    public int TimeUntilFirstByte { get; set; }
    public int TimeBetweenProgressChanges { get; set; }

    public long PreviousBytesReceived { get; private set; }
    public long BytesNotNotified { get; private set; }

    public string Error { get; private set; }
    public bool HasError { get { return Error != null; } }

    private bool firstByteReceived = false;
    private bool success = true;
    private bool cancelDueToError = false;

    private EventWaitHandle asyncWait = new ManualResetEvent(false);
    private Timer abortTimer = null;
    private bool isDisposed = false;

    const long ONE_MB = 1024 * 1024;

    public delegate void PerMbHandler(long totalMb);

    public event PerMbHandler NotifyMegabyteIncrement;

    public MyWebClient(int timeout = 60000, int timeUntilFirstByte = 30000, int timeBetweenProgressChanges = 15000)
    {
        this.Timeout = timeout;
        this.TimeUntilFirstByte = timeUntilFirstByte;
        this.TimeBetweenProgressChanges = timeBetweenProgressChanges;

        this.DownloadFileCompleted += new System.ComponentModel.AsyncCompletedEventHandler(MyWebClient_DownloadFileCompleted);
        this.DownloadProgressChanged += new DownloadProgressChangedEventHandler(MyWebClient_DownloadProgressChanged);

        abortTimer = new Timer(AbortDownload, null, TimeUntilFirstByte, System.Threading.Timeout.Infinite);
    }

    protected void OnNotifyMegabyteIncrement(long totalMb)
    {
        if (NotifyMegabyteIncrement != null) NotifyMegabyteIncrement(totalMb);
    }

    void AbortDownload(object state)
    {
        cancelDueToError = true;
        this.CancelAsync();
        success = false;
        Error = firstByteReceived ? "Download aborted due to >" + TimeBetweenProgressChanges + "ms between progress change updates." : "No data was received in " + TimeUntilFirstByte + "ms";
        asyncWait.Set();
    }

    void MyWebClient_DownloadProgressChanged(object sender, DownloadProgressChangedEventArgs e)
    {
        if (cancelDueToError || isDisposed) return;

        long additionalBytesReceived = e.BytesReceived - PreviousBytesReceived;
        PreviousBytesReceived = e.BytesReceived;
        BytesNotNotified += additionalBytesReceived;

        if (BytesNotNotified > ONE_MB)
        {
            OnNotifyMegabyteIncrement(e.BytesReceived);
            BytesNotNotified = 0;
        }
        firstByteReceived = true;
        if (!isDisposed) abortTimer.Change(TimeBetweenProgressChanges, System.Threading.Timeout.Infinite);
    }

    public bool DownloadFileWithEvents(string url, string outputPath)
    {
        asyncWait.Reset();
        Uri uri = new Uri(url);
        this.DownloadFileAsync(uri, outputPath);
        asyncWait.WaitOne();

        return success;
    }

    void MyWebClient_DownloadFileCompleted(object sender, System.ComponentModel.AsyncCompletedEventArgs e)
    {
        if (cancelDueToError || isDisposed) return;
        asyncWait.Set();
    }

    protected override WebRequest GetWebRequest(Uri address)
    {            
        var result = base.GetWebRequest(address);
        result.Timeout = this.Timeout;
        return result;
    }

    void IDisposable.Dispose()
    {
        isDisposed = true;

        if (asyncWait != null) asyncWait.Dispose();
        if (abortTimer != null) abortTimer.Dispose();

        base.Dispose();
    }
}

答案 1 :(得分:1)

如果你想抓取HTML内容(用于缓存等),Eric J是对的。如果您打算将网页的某些部分作为数据提取,您可能需要查看HTML Agility Pack。 http://htmlagilitypack.codeplex.com/wikipage?title=Examples