从本体中检索同义词

时间:2018-03-21 05:36:26

标签: java iterator ontology similarity protege

我的问题是关于我的Ontology,它使用isSynonymOf对象属性从本体中检索同义词。 (请注意,我的Ontology文件是260 kb,预计会变成500kb)。我使用以下代码来检索同义词。响应时间方面的表现非常低。显示同义词需要花费大量时间(分钟)。我怎样才能改善它?注意:问题看似于相似性算法和迭代器。

这是我的完整代码

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;

import org.apache.jena.ontology.Individual;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.ontology.OntModelSpec;
import org.apache.jena.ontology.SymmetricProperty;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.util.FileManager;

import xyzWordAnalyzer;
import cde.model.SimilarityModel;
public class ontologyConnector
{
    private static ontologyConnector    instance;
    private static OntModel             ontModel;
    protected static final Sring        SOURCE_FILE = "http://abc.owl";
    protected static final String       NS          = SOURCE_FILE + "abc";

    public static synchronized ontologyConnector getInstance()
    {
        if (instance == null)
        {
            instance = new ontologyConnector();
        }

        return instance;
    }

    public ontologyConnector()
    {
        ontModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM_RULE_INF);
        InputStream in = FileManager.get().open(SOURCE_FILE);
        ontModel.read(in, "");
    }

    public ArrayList<SimilarityModel> getRelatums(String keyword)
    {
        ArrayList<String> list = new ArrayList<String>();

        keyword = keyword.replaceAll(" ", "_");

        SymmetricProperty isSynonymOf = ontModel.getSymmetricProperty("http://www.semanticweb.org/abc#isSynonymOf");

        Iterator<Individual> iterInd = ontModel.listIndividuals();
        while (iterInd.hasNext())
        {
            Individual ind = iterInd.next();
            if (ind.getLocalName().equalsIgnoreCase(keyword))
            {
                NodeIterator iterVal = ind.listPropertyValues(isSynonymOf);
                while (iterVal.hasNext())
                {
                    list.add(iterVal.nextNode().asResource().getLocalName().toString().replace("_", " "));
                }
            }
        }

      list = removeDuplicationfromList(keyword, list);

    // list.remove(keyword);

    ArrayList<SimilarityModel> simList = new ArrayList<SimilarityModel>();

    Double tot = (double) 0;
    WordAnalyzer wa = new WordAnalyzer();

    for (int i = 0; i < list.size(); i++) {
        Double tmpDouble = wa.getSimilarity(keyword.toLowerCase(), 
      list.get(i));

        int tmp = 0;
        if (Double.isNaN(tmpDouble)) {
            // tmpDouble = (double) -200;
            tmp = -200;
        } else {
            tmpDouble *= 100;
            tot += tmpDouble;
            tmp = tmpDouble.intValue();
        }

        SimilarityModel simModel = new SimilarityModel(list.get(i), tmp);
        simList.add(simModel);
    }

    Double avg = tot / simList.size();
    if (!Double.isNaN(avg)) {
        for (int i = 0; i < simList.size(); i++) {
            if (simList.get(i).getSimilarity() == -200) {
                simList.get(i).setSimilarity(avg.intValue());
            }
        }
    }

    return simList;
}

 private ArrayList<String> removeDuplicationfromList(String str, 
  ArrayList<String> list) {
    for (int i = list.size() - 1; i >= 0; i--) {
        if (list.get(i).toString().equalsIgnoreCase(str)) {
            list.remove(i);
        }
      }
     return list;
 }
}

0 个答案:

没有答案