UIMA管道与斯坦福NER

时间:2015-07-21 06:45:06

标签: java stanford-nlp named-entity-recognition uima

我在UIMA中整合了Stanford NER并开发了一条管道。 管道包含一个FileSystemCollectionReader,一个NERAnnotator和一个CasConsumer,但输出是不合适的。在我的输入目录中,我有两个文件,在运行管道后,我得到两个文件作为输出,但第二个文件与第二个输出中的第一个文件合并。我不知道这里发生了什么。

CasConsumer的代码:

`

 package org.gds.uima;


import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;

import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.AnalysisComponent_ImplBase;
import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.component.CasConsumer_ImplBase;
import org.apache.uima.fit.component.JCasConsumer_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;


public class CasConsumer extends JCasConsumer_ImplBase
{
    public final static String PARAM_OUTPUT="outputDir";

     @ConfigurationParameter(name = PARAM_OUTPUT)
     private String outputDirectory;

     public final static String PARAM_ANNOTATION_TYPES = "annotationTypes";


    enter code here
     @ConfigurationParameter(name = PARAM_ANNOTATION_TYPES,defaultValue="String")
     public List<String> annotationTypes;

     public void initialize(final UimaContext context) throws ResourceInitializationException 
     {
         super.initialize(context);
     }

     @Override
     public void process(JCas jcas)
     {

            String original = jcas.getDocumentText();

            try
            {
                String onlyText="";
                JCas sofaText = jcas.getView(NERAnnotator.SOFA_NAME);
                onlyText = sofaText.getDocumentText();
                String name = UUID.randomUUID().toString().substring(20);   
                    File outputDir = new File(this.outputDirectory+"/"+name);
                    System.out.print("Saving file to "+outputDir.getAbsolutePath());
                    FileOutputStream fos = new FileOutputStream(outputDir.getAbsoluteFile());
                    PrintWriter pw = new PrintWriter(fos);
                    pw.println(onlyText);
                    pw.close();

            }
            catch(CASException cae)
            {
                System.out.println(cae);
            }
            catch(FileNotFoundException fne)
            {
                System.out.print(fne);
            }               
    }



}
`
}

0 个答案:

没有答案