Question

我有一句话

public class HttpServiceClass {
private ArrayList<NameValuePair> params;
private ArrayList<NameValuePair> headers;
private String ansi;
private String url;
private int responseCode;
private String message;
private String response;

public String getResponse() {
    return response;
}

public String getErrorMessage() {
    return message;
}

public int getResponseCode() {
    return responseCode;
}

public HttpServiceClass(String url, String ansi) {

    this.url = url;
    this.ansi = ansi;
    params = new ArrayList<NameValuePair>();
    headers = new ArrayList<NameValuePair>();
}

public void AddParam(String name, String value) {
    params.add(new BasicNameValuePair(name, value));
}

public void AddHeader(String name, String value) {
    headers.add(new BasicNameValuePair(name, value));
}

public void ExecuteGetRequest() throws Exception {
    String combinedParams = "";
    if (!params.isEmpty()) {
        combinedParams += "?";
        for (NameValuePair p : params) {
            String paramString = p.getName() + "=" + URLEncoder.encode(p.getValue(), "UTF-8");
            if (combinedParams.length() > 1) {
                combinedParams += "&" + paramString;
            } else {
                combinedParams += paramString;
            }
        }
    }

    HttpGet request = new HttpGet(url + combinedParams);
    for (NameValuePair h : headers) {
        request.addHeader(h.getName(), h.getValue());
    }

    executeRequest(request, url, ansi);
}

public void ExecutePostRequest() throws Exception {
    HttpPost request = new HttpPost(url);
    request.setEntity(new StringEntity(ansi));
    for (NameValuePair h : headers) {
        request.addHeader(h.getName(), h.getValue());
    }

    if (!params.isEmpty()) {
        request.setEntity(new UrlEncodedFormEntity(params, HTTP.UTF_8));
    }

    executeRequest(request, url, ansi);
}

private void executeRequest(HttpUriRequest request, String url, String ansi) {
    HttpParams httpParameters = new BasicHttpParams();
    int timeoutConnection = 10000;
    HttpConnectionParams.setConnectionTimeout(httpParameters, timeoutConnection);
    int timeoutSocket = 10000;
    HttpConnectionParams.setSoTimeout(httpParameters, timeoutSocket);

    HttpClient client = new DefaultHttpClient(httpParameters);
    HttpResponse httpResponse;
    try {
        httpResponse = client.execute(request);
        responseCode = httpResponse.getStatusLine().getStatusCode();
        message = httpResponse.getStatusLine().getReasonPhrase();

        HttpEntity entity = httpResponse.getEntity();
        if (entity != null) {
            InputStream instream = entity.getContent();
            response = convertStreamToString(instream);
            instream.close();
        }
    } catch (ClientProtocolException e) {
        client.getConnectionManager().shutdown();
        e.printStackTrace();
    } catch (IOException e) {
        client.getConnectionManager().shutdown();
        e.printStackTrace();
    }
}

private String convertStreamToString(InputStream is) {
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    StringBuilder sb = new StringBuilder();

    String line = null;
    try {
        while ((line = reader.readLine()) != null) {
            sb.append(line + "\n");
        }
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            is.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return sb.toString();
}

}

我在其上应用了NLTK分块并将树作为输出。

text  = '''If you're in construction or need to pass fire inspection, or just want fire resistant materials for peace of mind, this is the one to use. Check out 3rd party sellers as well Skylite'''

输出如下：

sentences = nltk.sent_tokenize(d)
sentences = [nltk.word_tokenize(sent) for sent in sentences]
sentences = [nltk.pos_tag(sent) for sent in sentences]

grammar = """NP: {<DT>?<JJ>*<NN.*>+}
       RELATION: {<V.*>}
                 {<DT>?<JJ>*<NN.*>+}
       ENTITY: {<NN.*>}"""

cp = nltk.RegexpParser(grammar)
for i in sentences:
    result = cp.parse(i)
    print(result)
    print(type(result))
    result.draw()

我能否以字符串列表的格式获得名词短语：

(S If/IN you/PRP (RELATION 're/VBP) in/IN (NP construction/NN) or/CC (NP need/NN) to/TO (RELATION pass/VB) (NP fire/NN inspection/NN) ,/, or/CC just/RB (RELATION want/VB) (NP fire/NN) (NP resistant/JJ materials/NNS) for/IN (NP peace/NN) of/IN (NP mind/NN) ,/, this/DT (RELATION is/VBZ) (NP the/DT one/NN) to/TO (RELATION use/VB) ./.)

请一些建议......？

Answer 1

这样的事情：

noun_phrases_list = [[' '.join(leaf[0] for leaf in tree.leaves()) 
                      for tree in cp.parse(sent).subtrees() 
                      if tree.label()=='NP'] 
                      for sent in sentences]
#[['construction', 'need', 'fire inspection', 'fire', 'resistant materials', 
#  'peace', 'mind', 'the one'], 
# ['party sellers', 'Skylite']]

Answer 2

可以在下面的子树上使用过滤器

grammar = "NP: {<DT>?<JJ>*<NN>}"
cp = nltk.RegexpParser(grammar)
result = cp.parse(sentences[1])
result.subtrees(filter =lambda t: t.label() == 'NP') # gives you generator

在名词短语列表中解析NLTK树输出

2 个答案: