C#Regex删除C样式注释并在括号之间提取文本

时间:2011-11-30 09:11:19

标签: c# javascript .net regex

问题:

我需要从这个JavaScript中自动提取所有名称属性 (分别针对供应商大型和供应商小型)

/*
    Simple OpenID Plugin
    http://code.google.com/p/openid-selector/

    This code is licensed under the New BSD License.
*/

var providers_large = {
    google : {
        name : 'Google',
        url : 'https://www.google.com/accounts/o8/id'
    },
    yahoo : {
        name : 'Yahoo',
        url : 'http://me.yahoo.com/'
    },
    aol : {
        name : 'AOL',
        label : 'Enter your AOL screenname.',
        url : 'http://openid.aol.com/{username}'
    },
    myopenid : {
        name : 'MyOpenID',
        label : 'Enter your MyOpenID username.',
        url : 'http://{username}.myopenid.com/'
    },
    openid : {
        name : 'OpenID',
        label : 'Enter your OpenID.',
        url : null
    }
};

var providers_small = {
    livejournal : {
        name : 'LiveJournal',
        label : 'Enter your Livejournal username.',
        url : 'http://{username}.livejournal.com/'
    },
    /* flickr: {
        name: 'Flickr',        
        label: 'Enter your Flickr username.',
        url: 'http://flickr.com/{username}/'
    }, */
    /* technorati: {
        name: 'Technorati',
        label: 'Enter your Technorati username.',
        url: 'http://technorati.com/people/technorati/{username}/'
    }, */
    wordpress : {
        name : 'Wordpress',
        label : 'Enter your Wordpress.com username.',
        url : 'http://{username}.wordpress.com/'
    },
    blogger : {
        name : 'Blogger',
        label : 'Your Blogger account',
        url : 'http://{username}.blogspot.com/'
    },
    verisign : {
        name : 'Verisign',
        label : 'Your Verisign username',
        url : 'http://{username}.pip.verisignlabs.com/'
    },
    /* vidoop: {
        name: 'Vidoop',
        label: 'Your Vidoop username',
        url: 'http://{username}.myvidoop.com/'
    }, */
    /* launchpad: {
        name: 'Launchpad',
        label: 'Your Launchpad username',
        url: 'https://launchpad.net/~{username}'
    }, */
    claimid : {
        name : 'ClaimID',
        label : 'Your ClaimID username',
        url : 'http://claimid.com/{username}'
    },
    clickpass : {
        name : 'ClickPass',
        label : 'Enter your ClickPass username',
        url : 'http://clickpass.com/public/{username}'
    },
    google_profile : {
        name : 'Google Profile',
        label : 'Enter your Google Profile username',
        url : 'http://www.google.com/profiles/{username}'
    }
};

openid.locale = 'en';
openid.sprite = 'en'; // reused in german& japan localization
openid.demo_text = 'In client demo mode. Normally would have submitted OpenID:';
openid.signin_text = 'Sign-In';
openid.image_title = 'log in with {provider}';

所以我需要: A)删除所有C-Style注释 和B)获取[providers_large,providers_small]的所有名称值(删除注释后)

到目前为止,我已经尝试使用正则表达式来删除C风格的注释(并且失败了) 和正则表达式来获取大括号(和失败的)之间的所有内容

我随后尝试将其作为JSON读取, 但这当然失败了“无效的json primitve what”

这是我使用的stackoverflow-sites 这是我到目前为止尝试的例子

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;


namespace ConsoleExperiments
{

    public class Program
    {

        // http://stackoverflow.com/questions/2538279/strip-out-c-style-multi-line-comments
        // NOT working
        static string RemoveCstyleComments(string strInput)
        {
            string strPattern = @"/[*][\w\d\s]+[*]/";
            //strPattern = @"/\*.*?\*/";
            strPattern = "/\\*.*?\\*/";

            string strOutput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, string.Empty, System.Text.RegularExpressions.RegexOptions.Multiline);
            Console.WriteLine(strOutput);
            return strOutput;
        }


        // http://stackoverflow.com/questions/413071/regex-to-get-string-between-curly-braces-i-want-whats-between-the-curly-brace
        // http://stackoverflow.com/questions/5337166/regular-expression-get-string-between-curly-braces
        // http://stackoverflow.com/questions/1904617/regex-for-removing-curly-brackets-with-nested-curly-brackets
        // http://stackoverflow.com/questions/378415/how-do-i-extract-a-string-of-text-that-lies-between-two-brackets-using-net
        static string GetCurlyValues(string strInput)
        {
            string strPattern = "/{(.*?)}/";
            strPattern = "/{([^}]*)}/";
            strPattern = @"\{(\s*?.*?)*?\}";
            strPattern = @"(?<=\{).*(?=\})";
            strPattern = "{(.*{(.*)}.*)}";
            strPattern = "{{([^}]*)}}";
            strPattern = "{{({?}?[^{}])*}}";
            strPattern = @"\(([^)]*)\)";

            System.Text.RegularExpressions.Regex rex = new System.Text.RegularExpressions.Regex(strPattern, System.Text.RegularExpressions.RegexOptions.Multiline);

            System.Text.RegularExpressions.Match mMatch = rex.Match(strInput);

            foreach (System.Text.RegularExpressions.Group g in mMatch.Groups)
            {
                Console.WriteLine("Group: " + g.Value);
                foreach (System.Text.RegularExpressions.Capture c in g.Captures)
                {
                    Console.WriteLine("Capture: " + c.Value);
                }
            }

            return "";
        }


        static void ReadFile()
        {
            try
            {
                string strFilePath = @"TestFile.txt";
                if (System.IO.File.Exists(strFilePath))
                {
                    // Create an instance of StreamReader to read from a file.
                    // The using statement also closes the StreamReader.
                    using (System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath))
                    {
                        string line;
                        // Read and display lines from the file until the end of
                        // the file is reached.
                        while ((line = sr.ReadLine()) != null)
                        {
                            Console.WriteLine(line);
                        } // Whend

                        sr.Close();
                    } // End Using

                } // End if (System.IO.File.Exists(strFilePath))
                else
                    Console.WriteLine("File \"" + strFilePath + "\" does not exist.");
            } // End Try
            catch (Exception e)
            {
                // Let the user know what went wrong.
                Console.WriteLine("The file could not be read:");
                Console.WriteLine(e.Message);
            } // End Catch

        } // End Sub

        public class cProvider
        {
            public string name = "abc";
            public string label ="def";
            public string url ="url";
        }


        public class cProviders_large
        {
            public List<cProvider> foo = new List<cProvider>();
        }


        static void Main(string[] args)
        {
            string strContent = System.IO.File.ReadAllText(@"D:\UserName\Downloads\openid-selector-1.3\openid-selector\js\openid-en - Kopie.js.txt");
            Console.WriteLine(strContent);
            //RemoveCstyleComments(strContent);
            //GetCurlyValues(strContent);
            System.Web.Script.Serialization.JavaScriptSerializer js = new System.Web.Script.Serialization.JavaScriptSerializer();
            //object obj = js.DeserializeObject(strContent);

            cProviders_large xx = new cProviders_large();
            cProvider ap = new cProvider();
            xx.foo.Add(ap);
            xx.foo.Add(ap);

            string res = js.Serialize(xx);
            Console.WriteLine(res);


            Console.WriteLine(Environment.NewLine);
            Console.WriteLine(" --- Press any key to continue --- ");
            Console.ReadKey();
        } // End Sub Main

    } // End Class Program


} // End namespace ConsoleExperiments

任何比我更了解正则表达式的人能为我提供必要的正则表达式吗? 现在,看起来每次文件更改时我都会手工完成, 我真的很讨厌这个...

修改 在旁注中,v8包装器使用C ++ .NET,因此在Linux上不起作用,尽管v8引擎在Linux上运行良好。

所以我坚持通过JSON转换解决问题。

3 个答案:

答案 0 :(得分:4)

您可以使用javascript engine

using System;
using System.IO;
using Noesis.Javascript;

class Program
{
    static void Main()
    {
        var context = new JavascriptContext();
        context.SetParameter("openid", new object());
        context.Run(File.ReadAllText("test.js"));
        dynamic providers_large = context.GetParameter("providers_large");
        foreach (var provider in providers_large)
        {
            Console.WriteLine(
                "name: {0}, url: {1}", 
                provider.Value["name"], 
                provider.Value["url"]
            );
        }
    }
}

在我的控制台上打印以下内容:

name: Google, url: https://www.google.com/accounts/o8/id
name: Yahoo, url: http://me.yahoo.com/
name: AOL, url: http://openid.aol.com/{username}
name: MyOpenID, url: http://{username}.myopenid.com/
name: OpenID, url:

答案 1 :(得分:0)

为此考虑JavaScriptSerializer,提供json反序列化,如果删除变量和注释,它应该能够创建对象图。

答案 2 :(得分:0)

达林迪米特洛夫的回答当然是最简单的 但是,Noesis.Javascript最令人讨厌的是用C ++ .NET编写,这意味着它无法在Linux上编译,尽管C#/ .NET(通过mono)和v8引擎在Linux上都运行良好。

所以这是通过转换为JSON和反序列化的锻炼:

static string RemoveCstyleComments(string strInput)
        {
            string strPattern = @"/[*][\w\d\s]+[*]/";
            //strPattern = @"/\*.*?\*/"; // Doesn't work
            //strPattern = "/\\*.*?\\*/"; // Doesn't work
            //strPattern = @"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/ "; // Doesn't work
            //strPattern = @"/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/ "; // Doesn't work

            // http://stackoverflow.com/questions/462843/improving-fixing-a-regex-for-c-style-block-comments
            strPattern = @"/\*(?>(?:(?>[^*]+)|\*(?!/))*)\*/";  // Works !

            string strOutput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, string.Empty, System.Text.RegularExpressions.RegexOptions.Multiline);
            Console.WriteLine(strOutput);
            return strOutput;
        } // End Function RemoveCstyleComments




        static string ReplaceVariables(string strInput)
    {
        string strPattern = @"var\s+providers_large(\s+)?=(\s+)?{(\s+)?";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"providers_large\" : {" + Environment.NewLine, System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"(\s+)?var\s+providers_small(\s+)?=(\s+)?{(\s+)?";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, ",   \"providers_small\" : {" + Environment.NewLine, System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"}(\s+)?;(\s+)?";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "}" + Environment.NewLine, System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"$(\s+)?(\w+)(\s+)?:(\s+)?{";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"$2\" : {", System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"name(\s+)?:(\s+)?'";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"name\" : '", System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"url(\s+)?:(\s+)?'";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"url\" : '", System.Text.RegularExpressions.RegexOptions.Multiline);

        strPattern = @"label(\s+)?:(\s+)?'";
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "\"label\" : '", System.Text.RegularExpressions.RegexOptions.Multiline);


        strInput = strInput.Replace("'", "\"");


        strPattern = "openid\\.locale.*";
        //strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "", System.Text.RegularExpressions.RegexOptions.Multiline);
        strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strPattern, "", System.Text.RegularExpressions.RegexOptions.Singleline);

        strPattern = null;

        /*
        string[] astrTrailingComments = {
                         @"openid\.locale"
                        ,@"openid\.sprite"
                        ,@"openid\.demo_text"
                        ,@"openid\.signin_text"
                        ,@"openid\.image_title"
        };

        foreach (string strThisPattern in astrTrailingComments)
        {
            strInput = System.Text.RegularExpressions.Regex.Replace(strInput, strThisPattern + ".+", "", System.Text.RegularExpressions.RegexOptions.Multiline);
        } // Next strThisPattern
        */

        strInput = "{" + strInput + "}";

        //Console.WriteLine(strInput);
        return strInput;
    } // End Function ReplaceVariables


        static System.Collections.Specialized.NameValueCollection TrySerialize(string strInput)
        {
            strInput = RemoveCstyleComments(strInput);
            strInput = ReplaceVariables(strInput);

            System.Collections.Specialized.NameValueCollection nvc = new System.Collections.Specialized.NameValueCollection(StringComparer.OrdinalIgnoreCase);

            System.Web.Script.Serialization.JavaScriptSerializer js = new System.Web.Script.Serialization.JavaScriptSerializer();
            dynamic objScript = js.DeserializeObject(strInput);
            js = null;


            foreach (dynamic kvp in objScript)
            {
                dynamic dictValues = kvp.Value;

                //Console.WriteLine(Environment.NewLine);
                //Console.WriteLine(Environment.NewLine);
                //Console.WriteLine(kvp.Key);
                //Console.WriteLine(Environment.NewLine);

                foreach (string strMemberVariable in dictValues.Keys)
                {

                    if(StringComparer.OrdinalIgnoreCase.Equals(kvp.Key,"providers_small"))
                    {
                        nvc.Add("providers_small", strMemberVariable);
                    }


                    if(StringComparer.OrdinalIgnoreCase.Equals(kvp.Key,"providers_large"))
                    {
                        nvc.Add("providers_large", strMemberVariable);
                    }

                    //Console.WriteLine(strMemberVariable + ":");

                    dynamic MemberVariable = dictValues[strMemberVariable];
                    //Console.WriteLine(MemberVariable.GetType().ToString());

                    foreach (string strProperty in MemberVariable.Keys)
                    {
                        //Console.WriteLine(strValue);
                        dynamic objPropertyValue = MemberVariable[strProperty];

                        //if (objPropertyValue != null)
                        //Console.WriteLine("     - " + (strProperty + ":").PadRight(8, ' ') + objPropertyValue.ToString());
                    } // Next strProperty

                } // Next strMemberVariable

            } // Next kvp


            // Console.WriteLine("providers large: ");
            // Console.WriteLine(nvc["providers_large"]);

            // Console.WriteLine(Environment.NewLine);
            // Console.WriteLine("providers small: ");
            // Console.WriteLine(nvc["providers_small"]);

            return nvc;
        } // End Function TrySerialize


        public static void GetProviders()
        {
            string strContent = System.IO.File.ReadAllText(@"D:\UserName\Downloads\openid-selector-1.3\openid-selector\js\openid-en.js");
            strContent = System.IO.File.ReadAllText(@"D:\UserName\Downloads\openid-selector-1.3\openid-selector\js\openid-ru.js");
            //Console.WriteLine(strContent);

            //JavaScriptEngineTest(strContent);
            //GetCurlyValues(strContent);
            System.Collections.Specialized.NameValueCollection nvc = TrySerialize(strContent);

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("providers large: ");
            foreach (string strValue in nvc.GetValues("providers_large"))
            {
                Console.WriteLine("    " + strValue);
            } // Next strValue

            //System.Runtime.Serialization.Json.DataContractJsonSerializer dcjs = new System.Runtime.Serialization.Json.DataContractJsonSerializer();
            // The above is bullshit in unadulterated filth. ==> Use System.Web.Extensions instead

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("providers small: ");
            foreach (string strValue in nvc.GetValues("providers_small"))
            {
                Console.WriteLine("    " + strValue);
            } // Next strValue

        } // End Sub GetProviders