如何从HTTP中提取属性

时间:2017-12-01 09:18:43

标签: c# http

我试图从HTTP捕获团队的日程安排。 但到目前为止我所做的是这样的:

2018/1/23 Tuesday   AM4:00  Swansea Ci      Liverpool 

但我需要这样做:

2018/1/23 Tuesday   AM4:00  Swansea City    Liverpool 

因为我将字符串长度设置为10,所以它不会打印出#34; ty" 那么我怎样才能在这个代码结构中做到正确呢? THX!

using System.IO;
using System.Net;
using System;
using System.Text;
using System.Text.RegularExpressions;
public class Crawler
{
    static void Main()
    {
        string web = "http://www.goal.com/hk/fixtures/team/liverpool/663";
        WebRequest request = WebRequest.Create(web);
        request.Credentials = CredentialCache.DefaultCredentials;
        HttpWebResponse response = null;
        response = (HttpWebResponse)request.GetResponse();
        Stream dataStream = response.GetResponseStream();
        StreamReader reader = new StreamReader(dataStream);
        string line0 = reader.ReadToEnd();
        string path = @"C:\Users\UserName\Desktop\LiverpoolSchedule.txt";
        using (FileStream fileStream = File.OpenWrite(path))
        {
            using (StreamWriter writer = new StreamWriter(fileStream))
            {
                string SS0 = @"2...年.?.?月.?.?日星期.?</th>\n<th colspan=";
                foreach (Match match0 in Regex.Matches(line0, SS0))
                {
                    writer.Write(match0.ToString().Remove(match0.ToString().IndexOf("</th>")));
                    writer.Write("\t");

                    string SS1 = @"<td class=.status.>\n.午.?.?:.?.?";
                    string line1 = line0.Substring(match0.Index);
                    Match match1 = Regex.Match(line1, SS1);
                    writer.Write(match1.ToString().Substring(match1.ToString().IndexOf("\n") + 1));
                    writer.Write("\t");

                    string SS2 = @".png. alt=.";
                    string line2 = line1.Substring(match1.Index);
                    Match match2 = Regex.Match(line2, SS2);
                    writer.Write(line2.Substring(match2.Index + 11, 10));
                    writer.Write("\t");

                    string SS3 = @".png. alt=.";
                    string line3 = line2.Substring(match2.Index + 30);
                    Match match3 = Regex.Match(line3, SS3);
                    writer.Write(line3.Substring(match3.Index + 11, 10));
                    writer.WriteLine();
                }
                writer.Close();
            }
        }
        reader.Close();
        dataStream.Close();
    }
}

1 个答案:

答案 0 :(得分:0)

您可以尝试在RegEx中使用.+,而不是.?.?.?.?.?.?.?.?.?.?.?.?.?.?.?.?.?.?

有关详细信息,请查看此website