避免lucence QueryParser Parse异常?

时间:2010-12-24 00:42:30

标签: lucene lucene.net

在第3行,我得到例外'IOException:read past eof'和'LookaheadSuccess:应用程序中的错误'。 有什么方法可以避免这种情况吗?我讨厌休息,每次执行搜索时按两次继续

注意我只会注意到这一点,当我告诉视觉工作室向我展示即使它们被捕获时抛出的异常。我没有得到例外情况,我只是看到他们每次搜索都被抛出两个(或三个)断点。该应用运行良好。

var searcher = new IndexSearcher(directory,true);  var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29,“all”,analyzer);  var query = parser.Parse(text); //这里

3 个答案:

答案 0 :(得分:3)

这些是在Lucene内发生并被捕获的第一次机会异常。您已将Visual Studio配置为中断所有异常,而不仅仅是那些未处理的异常。使用“例外”对话框(ctrl-alt-e iirc)并更改设置。

答案 1 :(得分:3)

Lucene.NET(在3.0.3版本时)使用IOExceptions来管理解析器流的几个部分。这对性能产生了不良影响(在我的开发机器上高达90毫秒)。

好消息是,目前位于http://lucenenet.apache.org/community.html的源代码存储库中的版本似乎已删除导致此问题的特定异常。当然对我来说,这已经提高了很多性能。希望这会有所帮助。

答案 2 :(得分:3)

在Lucene 3.0.3中修补QueryParser以避免抛出LookaheadSuccess异常:​​

--- a/src/core/QueryParser/QueryParser.cs
+++ b/src/core/QueryParser/QueryParser.cs
@@ -1708,16 +1708,13 @@ namespace Lucene.Net.QueryParsers
         }

         private bool Jj_2_1(int xla)
-        {
+        {
+            bool lookaheadSuccess = false;
             jj_la = xla;
             jj_lastpos = jj_scanpos = token;
             try
             {
-                return !Jj_3_1();
-            }
-            catch (LookaheadSuccess)
-            {
-                return true;
+                return !Jj_3_1(out lookaheadSuccess);
             }
             finally
             {
@@ -1725,29 +1722,31 @@ namespace Lucene.Net.QueryParsers
             }
         }

-        private bool Jj_3R_2()
-        {
-            if (jj_scan_token(TermToken)) return true;
-            if (jj_scan_token(ColonToken)) return true;
+        private bool Jj_3R_2(out bool lookaheadSuccess)
+        {
+            if (jj_scan_token(TermToken, out lookaheadSuccess)) return true;
+            if (lookaheadSuccess) return false;
+            if (jj_scan_token(ColonToken, out lookaheadSuccess)) return true;
             return false;
         }

-        private bool Jj_3_1()
+        private bool Jj_3_1(out bool lookaheadSuccess)
         {
             Token xsp;
             xsp = jj_scanpos;
-            if (Jj_3R_2())
+            if (Jj_3R_2(out lookaheadSuccess))
             {
                 jj_scanpos = xsp;
-                if (Jj_3R_3()) return true;
+                if (Jj_3R_3(out lookaheadSuccess)) return true;
             }
             return false;
         }

-        private bool Jj_3R_3()
-        {
-            if (jj_scan_token(StarToken)) return true;
-            if (jj_scan_token(ColonToken)) return true;
+        private bool Jj_3R_3(out bool lookaheadSuccess)
+        {
+            if (jj_scan_token(StarToken, out lookaheadSuccess)) return true;
+            if (lookaheadSuccess) return false;
+            if (jj_scan_token(ColonToken, out lookaheadSuccess)) return true;
             return false;
         }

@@ -1861,14 +1860,9 @@ namespace Lucene.Net.QueryParsers
             throw GenerateParseException();
         }

-        [Serializable]
-        private sealed class LookaheadSuccess : System.Exception
-        {
-        }
-
-        private LookaheadSuccess jj_ls = new LookaheadSuccess();
-        private bool jj_scan_token(int kind)
-        {
+        private bool jj_scan_token(int kind, out bool lookaheadSuccess)
+        {
+            lookaheadSuccess = false;
             if (jj_scanpos == jj_lastpos)
             {
                 jj_la--;
@@ -1896,8 +1890,8 @@ namespace Lucene.Net.QueryParsers
                 }
                 if (tok != null) Jj_add_error_token(kind, i);
             }
-            if (jj_scanpos.kind != kind) return true;
-            if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls;
+            if (jj_scanpos.kind != kind) return true;
+            if (jj_la == 0 && jj_scanpos == jj_lastpos) lookaheadSuccess = true;
             return false;
         }

@@ -2029,32 +2023,34 @@ namespace Lucene.Net.QueryParsers
         }

         private void Jj_rescan_token()
-        {
+        {
+            bool lookaheadSuccess = false;
             jj_rescan = true;
             for (int i = 0; i < 1; i++)
             {
-                try
+                JJCalls p = jj_2_rtns[i];
+                do
                 {
-                    JJCalls p = jj_2_rtns[i];
-                    do
+                    if (p.gen > jj_gen)
                     {
-                        if (p.gen > jj_gen)
+                        jj_la = p.arg;
+                        jj_lastpos = jj_scanpos = p.first;
+                        switch (i)
                         {
-                            jj_la = p.arg;
-                            jj_lastpos = jj_scanpos = p.first;
-                            switch (i)
-                            {
-                                case 0:
-                                    Jj_3_1();
-                                    break;
-                            }
+                            case 0:
+                                Jj_3_1(out lookaheadSuccess);
+                                if (lookaheadSuccess)
+                                {
+                                    goto Jj_rescan_token_after_while_label;
+                                }
+                                break;
                         }
-                        p = p.next;
-                    } while (p != null);
-                }
-                catch (LookaheadSuccess)
-                {
-                }
+                    }
+                    p = p.next;
+                } while (p != null);
+
+            Jj_rescan_token_after_while_label:
+                lookaheadSuccess = false;
             }
             jj_rescan = false;
         }
-- 

在Lucene 3.0.3中修补QueryParser以避免大量的System.IO.IOException异常抛出:

CharStream.cs:

--- CharStream.cs
+++ CharStream.cs
@@ -44,6 +44,7 @@
        /// implementing this interface.  Can throw any java.io.IOException.
        /// </summary>
        char ReadChar();
+       char ReadChar(ref bool? systemIoException);

        /// <summary> Returns the column position of the character last read.</summary>
        /// <deprecated>
@@ -93,6 +94,7 @@
        /// to this method to implement backup correctly.
        /// </summary>
        char BeginToken();
+       char BeginToken(ref bool? systemIoException);

        /// <summary> Returns a string made up of characters from the marked token beginning
        /// to the current buffer position. Implementations have the choice of returning

FastCharStream.cs:

--- FastCharStream.cs
+++ FastCharStream.cs
@@ -48,12 +48,35 @@

        public char ReadChar()
        {
+           bool? systemIoException = null;
            if (bufferPosition >= bufferLength)
-               Refill();
+           {
+               Refill(ref systemIoException);
+           }
+           return buffer[bufferPosition++];
+       }
+       
+       public char ReadChar(ref bool? systemIoException)
+       {
+           if (bufferPosition >= bufferLength)
+           {
+               Refill(ref systemIoException);
+               // If using this Nullable as System.IO.IOException signal and is signaled.
+               if (systemIoException.HasValue && systemIoException.Value == true)
+               {
+                   return '\0';
+               }
+           }
            return buffer[bufferPosition++];
        }

-       private void  Refill()
+       // You may ask to be signaled of a System.IO.IOException through the systemIoException parameter.
+       // Set it to false if you are interested, it will be set to true to signal a System.IO.IOException.
+       // Set it to null if you are not interested.
+       // This is used to avoid having a lot of System.IO.IOExceptions thrown while running the code under a debugger.
+       // Having a lot of exceptions thrown under a debugger causes the code to execute a lot more slowly.
+       // So use this if you are experimenting a lot of slow parsing at runtime under a debugger.
+       private void Refill(ref bool? systemIoException)
        {
            int newPosition = bufferLength - tokenStart;

@@ -86,7 +109,18 @@

            int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition);
            if (charsRead <= 0)
-               throw new System.IO.IOException("read past eof");
+           {
+               // If interested in using this Nullable to signal a System.IO.IOException
+               if (systemIoException.HasValue)
+               {
+                   systemIoException = true;
+                   return;
+               }
+               else
+               {
+                   throw new System.IO.IOException("read past eof");
+               }
+           }
            else
                bufferLength += charsRead;
        }
@@ -96,6 +130,12 @@
            tokenStart = bufferPosition;
            return ReadChar();
        }
+
+       public char BeginToken(ref bool? systemIoException)
+       {
+           tokenStart = bufferPosition;
+           return ReadChar(ref systemIoException);
+       }

        public void  Backup(int amount)
        {
@@ -156,4 +196,4 @@
            get { return 1; }
        }
    }
-}
\ No newline at end of file
+}

QueryParserTokenManager.cs:

--- QueryParserTokenManager.cs
+++ QueryParserTokenManager.cs
@@ -1341,9 +1341,16 @@

            for (; ; )
            {
+               bool? systemIoException = false;
                try
                {
-                   curChar = input_stream.BeginToken();
+                   curChar = input_stream.BeginToken(ref systemIoException);
+                   if (systemIoException != null && systemIoException.HasValue && systemIoException.Value == true)
+                   {
+                       jjmatchedKind = 0;
+                       matchedToken = JjFillToken();
+                       return matchedToken;
+                   }
                }
                catch (System.IO.IOException)
                {
@@ -1459,4 +1466,4 @@
            while (start++ != end);
        }
    }
-}
\ No newline at end of file
+}

您也可以使用我的github版本 https://github.com/franckspike/lucenenet.git