使用代码和DBPedia Spotlight端点在DBPedia Spotlight上的结果不同

时间:2015-03-11 14:29:51

标签: java github dbpedia spotlight-dbpedia

这是解雇查询的主要类

package extractKeyword;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.methods.GetMethod;
import org.dbpedia.spotlight.exceptions.AnnotationException;
import org.dbpedia.spotlight.model.DBpediaResource;
import org.dbpedia.spotlight.model.Text;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import java.io.File;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.LinkedList;
import java.util.List;

public class db extends AnnotationClient {

    //private final static String API_URL = "http://jodaiber.dyndns.org:2222/";
    private static String  API_URL    = "http://spotlight.dbpedia.org/";
    private static  double  CONFIDENCE = 0.0;
   private static  int     SUPPORT    = 0;
   // private static  String  powered_by ="non";
   // private static  String  spotter ="CoOccurrenceBasedSelector";//"LingPipeSpotter"=Annotate all spots 
                                                //AtLeastOneNounSelector"=No verbs and adjs.    
                                                //"CoOccurrenceBasedSelector" =No 'common words'
                                                //"NESpotter"=Only Per.,Org.,Loc.
    //private static String  disambiguator ="Default";//Default ;Occurrences=Occurrence-centric;Document=Document-centric
    //private static String  showScores ="yes";

@SuppressWarnings("static-access")
public void configiration(double CONFIDENCE,int SUPPORT)
//, String powered_by,String spotter,String disambiguator,String showScores)
{
    this.CONFIDENCE=CONFIDENCE;
    this.SUPPORT=SUPPORT;
   // this.powered_by=powered_by;
    //this.spotter=spotter;
    //this.disambiguator=disambiguator;
    //showScores=showScores;

}
    public List<DBpediaResource> extract(Text text) throws AnnotationException {
       // LOG.info("Querying API.");
        String spotlightResponse;
        try {
            String Query=API_URL + "rest/annotate/?" +
                    "confidence=" + CONFIDENCE
                 + "&support=" + SUPPORT
                 // + "&spotter=" + spotter
                 // + "&disambiguator=" + disambiguator
                 // + "&showScores=" + showScores
                 // + "&powered_by=" + powered_by
                  + "&text=" + URLEncoder.encode(text.text(), "utf-8");
            //LOG.info(Query);

            GetMethod getMethod = new GetMethod(Query);
            getMethod.addRequestHeader(new Header("Accept", "application/json"));
            spotlightResponse = request(getMethod);

        } catch (UnsupportedEncodingException e) {
            throw new AnnotationException("Could not encode text.", e);
        }
        assert     spotlightResponse != null;
        JSONObject resultJSON         = null;
        JSONArray  entities           = null;

        try {                   
            resultJSON = new JSONObject(spotlightResponse);
            entities = resultJSON.getJSONArray("Resources");

        } catch (JSONException e) {
            //throw new AnnotationException("Received invalid response from DBpedia Spotlight API.");
        }

        LinkedList<DBpediaResource> resources = new LinkedList<DBpediaResource>();
        if(entities!=null) 
        for(int i = 0; i < entities.length(); i++) {
            try {
                JSONObject entity = entities.getJSONObject(i);
                resources.add(
                        new DBpediaResource(entity.getString("@URI"),
                        Integer.parseInt(entity.getString("@support"))));
            } catch (JSONException e) {
                //((Object) LOG).error("JSON exception "+e);
            }
        }
        return resources;
    }

}

扩展类

package extractKeyword;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethodBase;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.dbpedia.spotlight.exceptions.AnnotationException;
import org.dbpedia.spotlight.model.DBpediaResource;
import org.dbpedia.spotlight.model.Text;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Logger;

import javax.ws.rs.HttpMethod;

/**
 * @author pablomendes
 */
public abstract class AnnotationClient {

    //public Logger LOG = Logger.getLogger(this.getClass());
    private List<String> RES = new ArrayList<String>();

    // Create an instance of HttpClient.
    private static HttpClient client = new HttpClient();
    public List<String> getResu(){
        return RES;     
    }

    public String request(GetMethod getMethod) throws AnnotationException {
        String response = null;
        // Provide custom retry handler is necessary
        ( getMethod).getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
                new DefaultHttpMethodRetryHandler(3, false));
        try {
            // Execute the method.
            int statusCode = client.executeMethod((org.apache.commons.httpclient.HttpMethod) getMethod);
            if (statusCode != HttpStatus.SC_OK) {
               // LOG.error("Method failed: " + ((HttpMethodBase) method).getStatusLine());
            }

            // Read the response body.
            byte[] responseBody = ((HttpMethodBase) getMethod).getResponseBody(); //TODO Going to buffer response body of large or unknown size. Using getResponseBodyAsStream instead is recommended.

            // Deal with the response.
            // Use caution: ensure correct character encoding and is not binary data
            response = new String(responseBody);

        } catch (HttpException e) {
           // LOG.error("Fatal protocol violation: " + e.getMessage());
            throw new AnnotationException("Protocol error executing HTTP request.",e);
        } catch (IOException e) {
            //((Object) LOG).error("Fatal transport error: " + e.getMessage());
            //((Object) LOG).error(((HttpMethodBase) method).getQueryString());
            throw new AnnotationException("Transport error executing HTTP request.",e);
        } finally {
            // Release the connection.
            ((HttpMethodBase) getMethod).releaseConnection();
        }
        return response;

    }

    protected static String readFileAsString(String filePath) throws java.io.IOException{
        return readFileAsString(new File(filePath));
    }

    protected static String readFileAsString(File file) throws IOException {
        byte[] buffer = new byte[(int) file.length()];
        @SuppressWarnings("resource")
        BufferedInputStream f = new BufferedInputStream(new FileInputStream(file));
        f.read(buffer);
        return new String(buffer);
    }

    static abstract class LineParser {

        public abstract String parse(String s) throws ParseException;

        static class ManualDatasetLineParser extends LineParser {
            public String parse(String s) throws ParseException {
                return s.trim();
            }
        }

        static class OccTSVLineParser extends LineParser {
            public String parse(String s) throws ParseException {
                String result = s;
                try {
                    result = s.trim().split("\t")[3];
                } catch (ArrayIndexOutOfBoundsException e) {
                    throw new ParseException(e.getMessage(), 3);
                }
                return result; 
            }
        }
    }

    public void saveExtractedEntitiesSet(String Question, LineParser parser, int restartFrom) throws Exception {
        String text = Question;
        int i=0;
        //int correct =0 ; int error = 0;int sum = 0;

        for (String snippet: text.split("\n")) {
            String s = parser.parse(snippet);
            if (s!= null && !s.equals("")) {
                i++;

                if (i<restartFrom) continue;

                List<DBpediaResource> entities = new ArrayList<DBpediaResource>();

                try {
                    entities = extract(new Text(snippet.replaceAll("\\s+"," ")));
                    System.out.println(entities.get(0).getFullUri());

                } catch (AnnotationException e) {
                   // error++;
                    //LOG.error(e);
                    e.printStackTrace();
                }
                for (DBpediaResource e: entities) {
                    RES.add(e.uri());
                }
            }
        }
    }


    public abstract List<DBpediaResource> extract(Text text) throws AnnotationException;

    public void evaluate(String Question) throws Exception {
        evaluateManual(Question,0);
    }

    public void evaluateManual(String Question, int restartFrom) throws Exception {
         saveExtractedEntitiesSet(Question,new LineParser.ManualDatasetLineParser(), restartFrom);
    }

}

主类

package extractKeyword;

public class startAnnonation {
    public static void main(String[] args) throws Exception {
        String question = "What is the winning chances of BJP in New Delhi elections?";
        db c = new db ();
        c.configiration(0.25,0);
        //, 0, "non", "AtLeastOneNounSelector", "Default", "yes");
        c.evaluate(question);
        System.out.println("resource : "+c.getResu());
    }
}

当我使用聚光灯jar(上面的代码)使用DBPedia聚光灯时,主要问题就在这里,然后与dbpedia聚光灯端点(dbpedia-spotlight.github.io/demo /)相比,我获得了不同的结果

使用上述代码的结果: - 文: - BJP在新德里选举中获胜的机会是什么? 置信水平:-0.35 资源:[选举] DBPedia Spotlight端点的结果(// dbpedia-spotlight.github.io/demo/) 文: - BJP在新德里选举中获胜的机会是什么? 置信水平:-0.35 资源:[Bharatiya_Janata_Party,New_Delhi,Election]

为什么聚光灯现在没有支持作为参数?

0 个答案:

没有答案