private static void PizzaHutPizzaScrapper()
{
try
{
MatchCollection mclName;
MatchCollection mclPrice;
WebClient webClient = new WebClient();
string strUrl = "http://www.pizzahut.com.pk/pizzas.html";
byte[] reqHTML;
reqHTML = webClient.DownloadData(strUrl);
UTF8Encoding objUTF8 = new UTF8Encoding();
string pageContent = objUTF8.GetString(reqHTML);
Regex r = new Regex("(<p class=\"MenuDescHead\">[A-Za-z\\s*]+[0-9]*)");
// Regex r1 = new Regex("(<p class=\"MenuDescPrice\">[A-Za-z.\\s?]+[0-9]*[A-Za-z\\s?]*[0-9]*[A-Za-z.\\s?]*)");
Regex r1 = new Regex("(<p class=\"MenuDescPrice\">[0-9]*)");
mclName = r.Matches(pageContent);
mclPrice = r1.Matches(pageContent);
StringBuilder strBuilder = new StringBuilder();
string name = "";
string price = "";
List<string> menuPriceList = new List<string>();
foreach (Match ml in mclPrice)
{
price = ml.Value.Remove(0, ml.Value.IndexOf(">") + 1).Trim();
if (price != "")
{
menuPriceList.Add(ml.Value);
}
}
int j = 0;
for (int i = 0; i < mclName.Count; i++)
{
name = mclName[i].Value.Remove(0, mclName[i].Value.IndexOf(">") + 1);
if (i == 0 || i == 4)
{
price = menuPriceList[j].Remove(0, menuPriceList[j].IndexOf(">") + 1);
strBuilder.Append(name.Trim() + ", " + price.Trim() + " , PizzaHut\r\n");
j++;
}
price = menuPriceList[j].Remove(0, menuPriceList[j].IndexOf(">") + 1);
strBuilder.Append(name.Trim() + ", " + price.Trim() + " ,PizzaHut\r\n");
j++;
}`
我想只选择数值......但它也会提取字母..
我想只从HTML中选择数值并使用[0-9] *作为正则表达式,但它不起作用并显示字母表。我只想要数值,正确的正则表达式是什么?任何想法?
答案 0 :(得分:0)
在这里,你要找的是Grouping Constructs:
MatchCollection mclName;
MatchCollection mclPrice;
WebClient webClient = new WebClient();
string strUrl = "http://www.pizzahut.com.pk/pizzas.html";
byte[] reqHTML;
reqHTML = webClient.DownloadData(strUrl);
UTF8Encoding objUTF8 = new UTF8Encoding();
string pageContent = objUTF8.GetString(reqHTML);
Regex nameRegex = new Regex("<p class=\"MenuDescHead\">([A-Za-z\\s]+[0-9]*)");
Regex priceRegex = new Regex("<p class=\"MenuDescPrice\">[^0-9]*([0-9]*)");
mclName = nameRegex.Matches(pageContent);
mclPrice = priceRegex.Matches(pageContent);
StringBuilder strBuilder = new StringBuilder();
List<string> menuPriceList = new List<string>();
foreach (Match ml in mclPrice)
{
string price = ml.Groups[1].ToString();
if (price != "" && price != "0")
{
menuPriceList.Add(price);
}
}
int j = 0;
for (int i = 0; i < mclName.Count; i++)
{
string price;
string name = mclName[i].Groups[1].ToString();
if (i == 0 || i == 4)
{
price = menuPriceList[j];
strBuilder.Append(name.Trim() + ", " + price.Trim() + " , PizzaHut\r\n");
j++;
}
price = menuPriceList[j];
strBuilder.Append(name.Trim() + ", " + price.Trim() + " ,PizzaHut\r\n");
j++;
}
Console.WriteLine(strBuilder.ToString());