文件A B包含百万个网址。
1,逐个浏览文件A中的url。
2,提取subdomain.com(http://subdomain.com/path/file)
3,如果subdomain.com存在文件B,则将其保存到文件C.
使用c#获取文件C的最快捷方式是什么?
感谢。
当我使用readline时,它没有太大的不同。
// stat
DateTime start = DateTime.Now;
int totalcount = 0;
int n1;
if (!int.TryParse(num1.Text, out n1))
n1 = 0;
// memory
dZLinklist = new Dictionary<string, string>();
// read file
string fileName = openFileDialog1.FileName; // get file name
textBox1.Text = fileName;
StreamReader sr = new StreamReader(textBox1.Text);
string fullfile = File.ReadAllText(@textBox1.Text);
string[] sArray = fullfile.Split( '\n');
//IEnumerable<string> sArray = tool.GetSplit(fullfile, '\n');
//string sLine = "";
//while (sLine != null)
foreach ( string sLine in sArray)
{
totalcount++;
//sLine = sr.ReadLine();
if (sLine != null)
{
//string reg = "http[s]*://.*?/";
//Regex R = new Regex(reg, RegexOptions.Compiled);
//Match m = R.Match(sLine);
//if(m.Success)
int length = sLine.IndexOf(' ', n1); // default http://
if(length > 0)
{
//string urls = sLine.Substring(0, length);
dZLinklist[sLine.Substring(0,length)] = sLine;
}
}
}
TimeSpan time = DateTime.Now - start;
int count = dZLinklist.Count;
double sec = Math.Round(time.TotalSeconds,2);
label1.Text = "(" + totalcount + ")" + count.ToString() + " / " + sec + " = " + (Math.Round(count / sec,2)).ToString();
sr.Close();