我试图从HTTP捕获团队的日程安排。 但到目前为止我所做的是这样的:
2018/1/23 Tuesday AM4:00 Swansea Ci Liverpool
但我需要这样做:
2018/1/23 Tuesday AM4:00 Swansea City Liverpool
因为我将字符串长度设置为10,所以它不会打印出#34; ty" 那么我怎样才能在这个代码结构中做到正确呢? THX!
using System.IO;
using System.Net;
using System;
using System.Text;
using System.Text.RegularExpressions;
public class Crawler
{
static void Main()
{
string web = "http://www.goal.com/hk/fixtures/team/liverpool/663";
WebRequest request = WebRequest.Create(web);
request.Credentials = CredentialCache.DefaultCredentials;
HttpWebResponse response = null;
response = (HttpWebResponse)request.GetResponse();
Stream dataStream = response.GetResponseStream();
StreamReader reader = new StreamReader(dataStream);
string line0 = reader.ReadToEnd();
string path = @"C:\Users\UserName\Desktop\LiverpoolSchedule.txt";
using (FileStream fileStream = File.OpenWrite(path))
{
using (StreamWriter writer = new StreamWriter(fileStream))
{
string SS0 = @"2...年.?.?月.?.?日星期.?</th>\n<th colspan=";
foreach (Match match0 in Regex.Matches(line0, SS0))
{
writer.Write(match0.ToString().Remove(match0.ToString().IndexOf("</th>")));
writer.Write("\t");
string SS1 = @"<td class=.status.>\n.午.?.?:.?.?";
string line1 = line0.Substring(match0.Index);
Match match1 = Regex.Match(line1, SS1);
writer.Write(match1.ToString().Substring(match1.ToString().IndexOf("\n") + 1));
writer.Write("\t");
string SS2 = @".png. alt=.";
string line2 = line1.Substring(match1.Index);
Match match2 = Regex.Match(line2, SS2);
writer.Write(line2.Substring(match2.Index + 11, 10));
writer.Write("\t");
string SS3 = @".png. alt=.";
string line3 = line2.Substring(match2.Index + 30);
Match match3 = Regex.Match(line3, SS3);
writer.Write(line3.Substring(match3.Index + 11, 10));
writer.WriteLine();
}
writer.Close();
}
}
reader.Close();
dataStream.Close();
}
}