班级ParserWorker
WorkerHTMLFile()
方法遍历列表。
列表ListUrlActive
包含4行。
ParsingPage(string source)
方法将稀疏页面发送到SiteParser
类。
班级SiteParser
执行Parsing(IHtmlDocument document)
,break
之后的方法case "Type_2"
:中断代码的执行。
如果我理解正确,则代码的执行应返回到ParserWorker
类,返回到WorkerHTMLFile()
循环中的for
方法。
并执行“ ListUrlActive”列表的下一行,但这不会发生。
该代码仅处理第一行,并终止break
case "Type_2"
问题。
在执行break
和case "Type_2"
之后,如何使代码继续工作?
class ParserWorker
{
public void WorkerHTMLFile()
{
for (int i = 0; i <= ListUrlActive.Count; i++)
{
string source = File.ReadAllText(ListUrlActive[i]);
ParsingPage(source);
}
}
public async void ParsingPage(string source)
{
var domParser = new HtmlParser();
IHtmlDocument document = await domParser.ParseAsync(source);
siteParser.Parsing(document);
}
}
public class SiteParser
{
public async void Parsing(IHtmlDocument document)
{
switch (objectParsing)
{
case "Type_1":
/// ...
/// ... Code
/// ...
break;
case "Type_2":
var domParserAnnounc = new HtmlParser();
var htmlBlockAnnounc = document.QuerySelectorAll("div.flexRoot > div.view.main");
foreach (var item in htmlBlockAnnounc)
{
string s = item.OuterHtml;
IHtmlDocument documentCur = await domParserAnnounc.ParseAsync(s);
ParsingPoster(documentCur);
}
break;
}
}
}
public async void ParsingPoster(IHtmlDocument document)
{
try
{
try
{
email = document.QuerySelectorAll("#start_widget > div:nth-child(3) > div.form-line.view-form-line > div.adv-point.view-adv-point > script:nth-child(3)")[0].TextContent.Trim();
email = wordProcessing.FindRegularExpression(email, @"(?<=eval\(unescape\(').*(?='\)\))");
email = wordProcessing.DecodeResult(email);
IHtmlDocument htmlDocumentEmail = await domParser.ParseAsync(email);
var itemsAttr = htmlDocumentEmail.QuerySelectorAll("a");
email = itemsAttr[0].TextContent.Trim();
}
catch (Exception ex)
{
InfoMessageErrorEvent?.Invoke("Поле: 'email'. Error !!!" + ex.Message);
}
}
catch (Exception ex)
{
string s1 = ex.Message;
string s2 = ex.StackTrace;
// throw;
}
}
更新_1
ParserWorker
类
WorkerHTMLFile ()
方法
它是:public void WorkerHTMLFile ()
它变成了:public async void WorkerHTMLFile ()
更改了方法的组成。
ParsingPage (string source)
方法
它是:public async void ParsingPage (string source)
它变成了:public async Task <IHtmlDocument> ParsingPage (string source)
更改了方法的组成。
班级SiteParser
方法Parsing (IHtmlDocument document)
它是:public async void Parsing (IHtmlDocument document)
发件人:public async Task Parsing (IHtmlDocument document)
原来是:ParsingPoster (documentCur);
现在:await ParsingPoster (documentCur);
代码
class ParserWorker
{
public async void WorkerHTMLFile()
{
for (int i = 0; i <= ListUrlActive.Count; i++)
{
string source = File.ReadAllText(ListUrlActive[i]);
IHtmlDocument document = await ParsingPage(source);
await siteParser.Parsing(document);
}
}
public async Task<IHtmlDocument> ParsingPage(string source)
{
IHtmlDocument document = null;
try
{
var domParser = new HtmlParser();
IHtmlDocument document = await domParser.ParseAsync(source);
document = await siteParser.Parsing(document);
}
catch (Exception ex)
{
string s = ex.Message;
string s1 = ex.StackTrace;
// throw;
}
return document;
}
}
public class SiteParser
{
public async Task Parsing(IHtmlDocument document)
{
switch (settingOper.objectParsing)
{
case "Type_1":
/// ...
/// ... Code
/// ...
break;
case "Type_2":
var domParserAnnounc = new HtmlParser();
var htmlBlockAnnounc = document.QuerySelectorAll("div.flexRoot > div.view.main");
foreach (var item in htmlBlockAnnounc)
{
string s = item.OuterHtml;
IHtmlDocument documentCur = await domParserAnnounc.ParseAsync(s);
await ParsingPoster(documentCur);
}
break;
}
}
}
public async void ParsingPoster(IHtmlDocument document)
{
try
{
try
{
email = document.QuerySelectorAll("#start_widget > div:nth-child(3) > div.form-line.view-form-line > div.adv-point.view-adv-point > script:nth-child(3)")[0].TextContent.Trim();
email = wordProcessing.FindRegularExpression(email, @"(?<=eval\(unescape\(').*(?='\)\))");
email = wordProcessing.DecodeResult(email);
IHtmlDocument htmlDocumentEmail = await domParser.ParseAsync(email);
var itemsAttr = htmlDocumentEmail.QuerySelectorAll("a");
email = itemsAttr[0].TextContent.Trim();
}
catch (Exception ex)
{
InfoMessageErrorEvent?.Invoke("Поле: 'email'. Error !!!" + ex.Message);
}
//...
// ... Parsing additional fields ...
//...
}
catch (Exception ex)
{
string s1 = ex.Message;
string s2 = ex.StackTrace;
// throw;
}
}
答案 0 :(得分:-1)
这是可以帮助您的更新代码:
class ParserWorker
{
public void WorkerHTMLFile()
{
for (int i = 0; i <= ListUrlActive.Count; i++)
{
string source = File.ReadAllText(ListUrlActive[i]);
ParsingPage(source);
}
}
public async void ParsingPage(string source)
{
var domParser = new HtmlParser();
IHtmlDocument document = await domParser.ParseAsync(source);
siteParser.Parsing(document);
}
}
public class SiteParser
{
public async void Parsing(IHtmlDocument document)
{
switch (settingOper.objectParsing)
{
case "Type_1":
/// ...
/// ... Code
/// ...
break;
case "Type_2":
var domParserAnnounc = new HtmlParser();
var htmlBlockAnnounc = document.QuerySelectorAll("div.flexRoot > div.view.main");
foreach (var item in htmlBlockAnnounc)
{
string s = item.OuterHtml;
IHtmlDocument documentCur = await domParserAnnounc.ParseAsync(s);
await ParsingPoster(documentCur);
}
break;
}
}
}
public async void ParsingPoster(IHtmlDocument document)
{
try
{
try
{
settingOper.email = document.QuerySelectorAll("#start_widget > div:nth-child(3) > div.form-line.view-form-line > div.adv-point.view-adv-point > script:nth-child(3)")[0].TextContent.Trim();
if (settingOper.email == null)
{
settingOper.email = document.QuerySelectorAll("div[class='adv-point view-adv-point']>script[type*='text/javascript']")[0].TextContent.Trim();
}
settingOper.email = wordProcessing.FindRegularExpression(settingOper.email, @"(?<=eval\(unescape\(').*(?='\)\))");
settingOper.email = wordProcessing.DecodeResult(settingOper.email);
IHtmlDocument htmlDocumentEmail = await domParser.ParseAsync(settingOper.email);
var itemsAttr = htmlDocumentEmail.QuerySelectorAll("a");
settingOper.email = itemsAttr[0].TextContent.Trim();
}
catch (Exception ex)
{
InfoMessageErrorEvent?.Invoke("Поле: 'email'. Error !!!" + ex.Message);
}
}
catch (Exception ex)
{
string s1 = ex.Message;
string s2 = ex.StackTrace;
// throw;
}
}
我主要在某些地方为您解决了异步等待问题。