Question

我是法国人，抱歉我的英语不好。 搜索是输入字段

IEnumerator GetHtml()
{
    WWW www = new WWW("https://www.random-site.com/" + search.text); // Get the html of the site
    yield return www; // Wait for the end of the operation
    if (www.text.Contains("No search results were found for"))
    {
        Debug.LogError("Aucun résultat pour \"" + search.text + "\".");
        StopCoroutine(AllMusic());
    }
    string[] str;
    str = Extract(www.text, "<li class=\"a-class\">", "</li>"); // Extract string between "<li class="a-class">"  and "</li>"
    File.WriteAllLines(@"azaaac.txt", str); // Debug only
}  

string[] Extract(string data, string startString, string endString)
{
    try
    {
        Regex regex = new Regex("(?<=" + startString + ")(.*?)(?=" + endString + ")"); // Regex patern
        MatchCollection matches = regex.Matches(data); // Apply the patern

        List<string> res = new List<string>();
        foreach(Match m in matches) // Convert a match collection to a list of strings
            res.Add(m.ToString());
        return res.ToArray();
    } catch (Exception e)
    {
        Debug.LogError("Erreur lors de l'extraction : " + e); // A sentence in french ^^
        return new string[] { };
    }
}

string[] Extract不能使用HTML代码...因为

data="<li class=\"a-class\">aaaa</li> ab <li class=\"a-class\">aaaa</li>"

提取返回{ "aaaa", "aaaa" }。所以，问题来自HTML代码..我试图应用HttUtility.HtmlDecode，但它不起作用......

MatchCollection不起作用（Regex）

0 个答案: