JNI分段错误bug

时间:2009-09-22 12:12:30

标签: java c java-native-interface segmentation-fault

有这种形态分析器(开源,用OCml编写),名为ocamorph。 download and make instructions here

java绑定是错误的,我将不得不修复它,经过几个小时的努力,现在在我看来它需要几天时间来解决它,因为我不熟悉C,JNI,OCml和这个特殊的软件。

在这里你可以看到,对于一个小文件(subtitles_136.hu.tok)它可以工作,但是对于一个更大的文件(Tolkien_1.hu.tok),会抛出“分段错误”:

bpgergo@krusovice:~/hunglish_tools/ocamorph/ocamorph/src/bindings/java $ java -Djava.library.path=./output/ -cp output mokk.nlp.ocamorph.FileStemmer $HULEXICON src/java/mokk/nlp/ocamorph/cache2.txt > src/java/mokk/nlp/ocamorph/subtitles_136.hu.stem < src/java/mokk/nlp/ocamorph/subtitles_136.hu.tok
bpgergo@krusovice:~/hunglish_tools/ocamorph/ocamorph/src/bindings/java $ java -Djava.library.path=./output/ -cp output mokk.nlp.ocamorph.FileStemmer $HULEXICON src/java/mokk/nlp/ocamorph/cache.txt > src/java/mokk/nlp/ocamorph/Tolkien_1.en.stem < src/java/mokk/nlp/ocamorph/Tolkien_1.en.tok
Segmentation fault
bpgergo@krusovice:~/hunglish_tools/ocamorph/ocamorph/src/bindings/java $ ls -l src/java/mokk/nlp/ocamorph/
total 2116
-rw-rw-r-- 1 bpgergo breka    8505 2009-09-22 13:53 cache2.txt
-rw-rw-r-- 1 bpgergo breka      65 2009-07-07 18:48 Compounds.java
drwxrwxr-x 2 bpgergo breka    4096 2009-09-22 13:54 CVS
-rw-rw-r-- 1 bpgergo breka    5888 2009-09-18 17:19 FileStemmer.java
-rw-rw-r-- 1 bpgergo breka      77 2009-07-07 18:48 Guess.java
-rw-rw-r-- 1 bpgergo breka     953 2009-08-31 18:58 IOcamorphStemmer.java
-rw-rw-r-- 1 bpgergo breka    5419 2009-08-31 18:58 OcamorphCachedStemmer.java
-rw-rw-r-- 1 bpgergo breka    2836 2009-08-03 16:00 OcamorphStemmer.java
-rw-rw-r-- 1 bpgergo breka    4612 2009-09-22 12:51 OcamorphWrapper.java
-rw-rw-r-- 1 bpgergo breka    6731 2009-09-22 13:53 subtitles_136.hu.stem
-rw-rw-r-- 1 bpgergo breka    7356 2009-09-20 21:12 subtitles_136.hu.tok
-rw-rw-r-- 1 bpgergo breka    2907 2009-09-18 17:22 Tester.java
-rw-rw-r-- 1 bpgergo breka       0 2009-09-22 13:53 Tolkien_1.en.stem
-rw-rw-r-- 1 bpgergo breka 1033059 2009-09-17 16:09 Tolkien_1.en.tok
-rw-rw-r-- 1 bpgergo breka       0 2009-09-22 13:14 Tolkien_1.hu.stem
-rw-rw-r-- 1 bpgergo breka 1041968 2009-09-17 16:09 Tolkien_1.hu.tok
bpgergo@krusovice:~/hunglish_tools/ocamorph/ocamorph/src/bindings/java $

这是Java绑定的C部分(/ocamorph/src/bindings/java/src/c/hunmorph_jnistub.c)。这可能是有缺陷的部分,感谢您找到错误的任何提示或帮助:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mokk_nlp_ocamorph_OcamorphWrapper.h"

#include "ocamorph.h"
#define MAX_ANALYSIS 100
#define ANALYSIS_MAXLEN 100

 // initialize the analysis string
  char analysis[ANALYSIS_MAXLEN];
  // initialize input buffer
  char buffer[500];
  char* analyses[MAX_ANALYSIS];

jmethodID MID_InstanceMethodCall_callback;


JNIEXPORT void JNICALL Java_mokk_nlp_ocamorph_OcamorphWrapper_initIDs
  (JNIEnv *env, jclass cls) {

  MID_InstanceMethodCall_callback =
         (*env)->GetMethodID(env, cls, "callback", "([B)V");

}
JNIEXPORT jlong JNICALL Java_mokk_nlp_ocamorph_OcamorphWrapper_init
  (JNIEnv * env, jobject obj, jstring bin_arg) {

  /* Convert to UTF8 */
  const char *bin_file  = (*env)->GetStringUTFChars(env, bin_arg, JNI_FALSE);

  ocamorph_startup();
  ocamorph_engine engine = init_from_bin(bin_file,0/*Don't pass the stupid no_caps argument*/);

  /* Release created UTF8 string */
  (*env)->ReleaseStringUTFChars(env, bin_arg, bin_file);

  int i;
  for (i=0; i<MAX_ANALYSIS;i++) {
    analyses[i] = (char *) malloc(ANALYSIS_MAXLEN * sizeof(char));
  };

  return  (jlong) engine;

}

JNIEXPORT jlong JNICALL Java_mokk_nlp_ocamorph_OcamorphWrapper_make_1analyzer
  (JNIEnv *env, jobject obj, jlong engine , jint blocking, jint compunds, jint stop_at_first, jint guess) {

  ocamorph_engine analyzer = make_analyzer((ocamorph_engine) engine, blocking, compunds, stop_at_first, guess);

  return (jlong) analyzer;

}

JNIEXPORT void JNICALL Java_mokk_nlp_ocamorph_OcamorphWrapper_analyze
  (JNIEnv * env, jobject obj, jlong analyzer, jbyteArray word) {

  ocamorph_engine analyzerc = (ocamorph_engine) analyzer;

  /* Convert to UTF8 */
  // const char *wordc  = (*env)->GetStringUTFChars(env, word, JNI_FALSE);

  //char *wordc =  (char *) (*env)->GetByteArrayElements( env, word, 0);

  const int maxInputLength = 1000;
  char wordc[maxInputLength];
  jsize len = (*env)->GetArrayLength(env,word);
  if (len>=maxInputLength) { len = maxInputLength-1; }

  if (len!=0)
  {
    (*env)->GetByteArrayRegion(env,word,0,len,(jbyte*)wordc);
  }
  wordc[len] = '\0';

  int n = analyze(analyzerc,wordc,analyses,MAX_ANALYSIS, ANALYSIS_MAXLEN);

  int i;
    for (i=0; i < n; ++i) {
      //  jstring ana = (*env)->NewStringUTF(env, analyses[i]);
      char* ana = analyses[i];
     jbyteArray jb=(*env)->NewByteArray(env, strlen(ana));
  (*env)->SetByteArrayRegion(env, jb, 0, strlen(ana), (jbyte *)ana);
     (*env)->CallVoidMethod(env, obj, MID_InstanceMethodCall_callback, jb);

     }
//  (*env)->ReleaseStringUTFChars(env, word, wordc);
}

这是Java部分(/ocamorph/src/bindings/java/src/java/mokk/nlp/ocamorph/OcamorphWrapper.java):

package mokk.nlp.ocamorph;

import java.io.UnsupportedEncodingException;
import java.util.LinkedList;
import java.util.List;

/**
 * JNI interface for Ocamorph. Constructor loads ocamorph engine and a specified binary resource.
 * 
 * @author bpgergo
 *
 */
public class OcamorphWrapper {

 private long analyzerId;
 private long engineId;


 private native static void initIDs();

 private native long init(String bin);

 // const ocamorph_engine engine, const int blocking, const int compounds,
 // const int stop_at_first, const int guess
 // valami hiba van az ocamorph-ban, mert a stop_at_first vezerli az
 // osszetettszosagot
 private native long make_analyzer(long engine, int blocking, int compounds,
   int stop_at_first, int guess);

 private native void analyze(long analyzer, byte[] word);

 static {
  //TODO FIXME how to define the library dynamically?
  System.loadLibrary("ocamorph");
  initIDs();
 }

 /**
  * the encoding required by the ocamorph lib
  */
 private static String encoding = "ISO-8859-2";

 //private static boolean debug = false;

 /**
  * analyze result (the callback will add the result strings)
  */
 private List<String> analyzeResult = null;

 /**
  * Loads a new Ocamorph engine, using the given binary resource and the arguments.
  *  
  * @param bin
  * @param blocking
  * @param stopAtFirst
  * @param compounds
  * @param guess
  */
 public OcamorphWrapper(String bin, boolean blocking, boolean stopAtFirst, 
   Compounds compounds, Guess guess) {
  super();
  engineId = init(bin);
  int comp = compounds2Code(compounds);
  int gu = guessToCode(guess);
  analyzerId = make_analyzer(engineId, boolean2Code(blocking), boolean2Code(stopAtFirst),
    comp, gu);
  //debug("engineId:"+engineId);
  //debug("analyzerId:"+analyzerId);
  //debug = false;
 }


 /**
  * This is the interface method for ocamorph analysis for the java side.
  * @param ba
  */
 public List<String> analyze(String word) {
  //debug("analyze:");
  analyzeResult = new LinkedList<String>();
  byte[] ba = null;
  try {
   ba = word.getBytes(encoding);
  } catch (UnsupportedEncodingException e1) {
   System.err
     .println("Ocamorph analyze UnsupportedEncodingException: ");
   e1.printStackTrace();
  }
  if (ba != null){
   //debug //printBytes(ba, "analizze:");
   analyze(analyzerId, ba);
  }
  return analyzeResult;
 }

 /**
  * The C interface will call this method to return analysis results
  */
 private void callback(byte[] ana) {

  String s = null;
  try {
   // bpgergo 20090618 this was a bug
   // s = new String(ana);
   s = new String(ana, encoding);
  } catch (UnsupportedEncodingException e) {
   System.err.println("callback new String(ana, encoding) UnsupportedEncodingException:");
   e.printStackTrace();
  }

  analyzeResult.add(s);

  //if (s != null) {
   //debug("!callback recieved: ");
   // debug //printBytes(ana, s);
  //} else {
   //debug("callback s == null");
  //}
 }

 /* static argument conversion methods */

 private static int boolean2Code(boolean bool){
  if (bool){
   return 1; 
  } else {
   return 0;
  }

 }
 private static int compounds2Code(Compounds compounds){
  int comp = 0;
  switch (compounds) {
  case No:
   comp = 0;
   break;
  case Allow:
   comp = 1;
   break;
  }
  return comp;
 }

 private static int guessToCode(Guess guess){
  int gu = 0;
  switch (guess) {
  case NoGuess:
   gu = 0;
   break;
  case Fallback:
   gu = 1;
   break;
  case Global:
   gu = 2;
   break;
  }
  return gu;
 }

 public String getEncoding() {
  return encoding;
 }
 public long getAnalyzerId() {
  return analyzerId;
 }


 /*private static void debug(String string) {
  if (debug) {
   System.out.println(string);
  }
 }*/

 /* getter/setter methods */

 public boolean isDebug() {
  return false; //debug;
 }

 public void setDebug(boolean debug) {
  //OcamorphWrapper.debug = debug;
 }

 /* static debug methods */

 /*public static void printBytes(byte[] array, String name) {
  if (debug) {
   for (int k = 0; k < array.length; k++) {
    debug(name + "[" + k + "] = " + "0x" + byteToHex(array[k]));
   }
  }
 }*/

 /*static public String byteToHex(byte b) {
  // Returns hex String representation of byte b
  char hexDigit[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
    'a', 'b', 'c', 'd', 'e', 'f' };
  char[] array = { hexDigit[(b >> 4) & 0x0f], hexDigit[b & 0x0f] };
  return new String(array);
 }*/

 /*static public String charToHex(char c) {
  // Returns hex String representation of char c
  byte hi = (byte) (c >>> 8);
  byte lo = (byte) (c & 0xff);
  return byteToHex(hi) + byteToHex(lo);
 }*/



}

2 个答案:

答案 0 :(得分:1)

系统崩溃时是否创建了任何类型的hs_pid ###。日志文件?他们可以偶尔帮助解决这些问题。

我的猜测是它与设置MID_InstanceMethodCall_callback方法id的古怪方式有关。 id存储为全局值,只有在调用initIDs静态方法时才会设置它,这在您的示例代码中似乎不会发生。如果没有设置,那么在尝试调用回调方法时,analyze会barf。确保获得回调方法ID的方法如下:

jclass cls = (*env)->GetObjectClass(env, obj);
if(cls == NULL){
  //Handle any errors
}
jmethodID mid = (*env)->GetMethodID(env, cls, "callback", "([B)V");
if(mid == NULL){
  //Handle any more errors
}
int i;
for (i=0; i < n; ++i) {
  //  jstring ana = (*env)->NewStringUTF(env, analyses[i]);
  char* ana = analyses[i];
  jbyteArray jb=(*env)->NewByteArray(env, strlen(ana));
  (*env)->SetByteArrayRegion(env, jb, 0, strlen(ana), (jbyte *)ana);
  (*env)->CallVoidMethod(env, obj, mid, jb);

}

答案 1 :(得分:-1)

尝试使用调试信息构建C代码,并查找如何在您的(看似类Unix)操作系统上启用核心转储。这应该给你一个起点。