使用OpenNLP在文本文件上进行情感分析

时间:2018-02-22 16:04:50

标签: java analysis sentiment-analysis fileinputstream opennlp

我有100个测试数据句子。我试图对它们进行情绪分析,但无论我使用什么输入字符串,我只得到输入字符串的正面估计。每个句子的返回值为1.0。知道为什么会这样吗?即使我使用.txt文件中的负示例输入,结果也是正值。

public class StartSentiment 
{

public static DoccatModel model = null;
public static String[] analyzedTexts = {"Good win"};

public static void main(String[] args) throws IOException {


 //     begin of sentiment analysis
    trainModel();
    for(int i=0; i<analyzedTexts.length;i++){
        classifyNewText(analyzedTexts[i]);}
    }


  private static String readFile(String pathname) throws IOException {

        File file = new File(pathname);
        StringBuilder fileContents = new StringBuilder((int)file.length());
        Scanner scanner = new Scanner(file);
        String lineSeparator = System.getProperty("line.separator");

        try {
            while(scanner.hasNextLine()) {
                fileContents.append(scanner.nextLine() + lineSeparator);
            }
            return fileContents.toString();
        } finally {
            scanner.close();
        }
    }

  public static void trainModel() {
      MarkableFileInputStreamFactory  dataIn = null;
     try {
        dataIn = new MarkableFileInputStreamFactory(
                new File("src\\sentiment\\Results.txt"));

        ObjectStream<String> lineStream = null;
        lineStream = new PlainTextByLineStream(dataIn, StandardCharsets.UTF_8);
        ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);

        TrainingParameters tp = new TrainingParameters();
        tp.put(TrainingParameters.CUTOFF_PARAM, "1");
        tp.put(TrainingParameters.ITERATIONS_PARAM, "100");

        DoccatFactory df = new DoccatFactory();
        model = DocumentCategorizerME.train("en", sampleStream, tp, df);

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (dataIn != null) {
            try {
            } catch (Exception e2) {
                e2.printStackTrace();
                }
            }
        }
     }

  public static void classifyNewText(String text) throws IOException{
      DocumentCategorizerME myCategorizer = new DocumentCategorizerME(model);


      double[] outcomes = myCategorizer.categorize(text.split(" ") );
      String category = myCategorizer.getBestCategory(outcomes);


          if (category.equalsIgnoreCase("1")){
              System.out.print("The text is positive");
          } else {
              System.out.print("The text is negative");
          }

  }

0 个答案:

没有答案