错误,c#console应用程序中的正则表达式无效

时间:2014-02-01 07:51:28

标签: c# regex

private static void PizzaHutPizzaScrapper()
    {
        try
        {
            MatchCollection mclName;
            MatchCollection mclPrice;
            WebClient webClient = new WebClient();
            string strUrl = "http://www.pizzahut.com.pk/pizzas.html";

            byte[] reqHTML;
            reqHTML = webClient.DownloadData(strUrl);

            UTF8Encoding objUTF8 = new UTF8Encoding();
            string pageContent = objUTF8.GetString(reqHTML);

            Regex r = new Regex("(<p class=\"MenuDescHead\">[A-Za-z\\s*]+[0-9]*)");
           // Regex r1 = new Regex("(<p class=\"MenuDescPrice\">[A-Za-z.\\s?]+[0-9]*[A-Za-z\\s?]*[0-9]*[A-Za-z.\\s?]*)");

            Regex r1 = new Regex("(<p class=\"MenuDescPrice\">[0-9]*)");           


            mclName = r.Matches(pageContent);
            mclPrice = r1.Matches(pageContent);
            StringBuilder strBuilder = new StringBuilder();
            string name = "";
            string price = "";
            List<string> menuPriceList = new List<string>();



            foreach (Match ml in mclPrice)
            {
                price = ml.Value.Remove(0, ml.Value.IndexOf(">") + 1).Trim();
                if (price != "")
                {

                    menuPriceList.Add(ml.Value);
                }

            }



            int j = 0;
            for (int i = 0; i < mclName.Count; i++)
            {


                name = mclName[i].Value.Remove(0, mclName[i].Value.IndexOf(">") + 1);
                if (i == 0 || i == 4)
                {
                    price = menuPriceList[j].Remove(0, menuPriceList[j].IndexOf(">") + 1);
                    strBuilder.Append(name.Trim() + ", " + price.Trim() + " , PizzaHut\r\n");
                    j++;

                }
                price = menuPriceList[j].Remove(0, menuPriceList[j].IndexOf(">") + 1);
                strBuilder.Append(name.Trim() + ", " + price.Trim() + " ,PizzaHut\r\n");
                j++;




            }`

我想只选择数值......但它也会提取字母..

我想只从HTML中选择数值并使用[0-9] *作为正则表达式,但它不起作用并显示字母表。我只想要数值,正确的正则表达式是什么?任何想法?

1 个答案:

答案 0 :(得分:0)

在这里,你要找的是Grouping Constructs

        MatchCollection mclName;
        MatchCollection mclPrice;
        WebClient webClient = new WebClient();
        string strUrl = "http://www.pizzahut.com.pk/pizzas.html";

        byte[] reqHTML;
        reqHTML = webClient.DownloadData(strUrl);

        UTF8Encoding objUTF8 = new UTF8Encoding();
        string pageContent = objUTF8.GetString(reqHTML);

        Regex nameRegex = new Regex("<p class=\"MenuDescHead\">([A-Za-z\\s]+[0-9]*)");
        Regex priceRegex = new Regex("<p class=\"MenuDescPrice\">[^0-9]*([0-9]*)");

        mclName = nameRegex.Matches(pageContent);
        mclPrice = priceRegex.Matches(pageContent);
        StringBuilder strBuilder = new StringBuilder();
        List<string> menuPriceList = new List<string>();

        foreach (Match ml in mclPrice)
        {
            string price = ml.Groups[1].ToString();
            if (price != "" && price != "0")
            {
                menuPriceList.Add(price);
            }
        }

        int j = 0;
        for (int i = 0; i < mclName.Count; i++)
        {
            string price;
            string name = mclName[i].Groups[1].ToString();
            if (i == 0 || i == 4)
            {
                price = menuPriceList[j];
                strBuilder.Append(name.Trim() + ", " + price.Trim() + " , PizzaHut\r\n");
                j++;

            }
            price = menuPriceList[j];
            strBuilder.Append(name.Trim() + ", " + price.Trim() + " ,PizzaHut\r\n");
            j++;
        }

        Console.WriteLine(strBuilder.ToString());