Question

我创建了一个使用twitter4j接收推文的管道。使用Redis push命令推送推文。进一步处理推文，即删除“＃”，“@”并查找情绪，使用pop命令弹出推文。

我面临的问题是，虽然推文是流媒体，但有些推文不完整（长篇推文）。例如：

原创推文：“莫迪政府的另一场无声革命 - 街道灯被LED灯泡取代，节省能源消耗，并阻止有毒物质排放。”

显示推文：“RT @ BJP4India：Modi govt的另一场无声革命 - 街道灯被LED灯泡取代，节省能源消耗......”

无法理解其发生的原因。这在情绪分析中成为一个问题，因为它也将“......”视为一个单词。

以下是推送和弹出推文的代码： -

import analytics.twitter.filter.Abbreviations;
import analytics.twitter.filter.TwitterFilter;
import redis.clients.jedis.Jedis;
public class RedisJava {

Jedis jedis = new Jedis("localhost");
public void RedisTweets(String text)
{

    //System.out.println("Server is running? " + jedis.ping());
    jedis.rpush("list", text);

}
public String RedisPop(String text)
{
    String tweet = null;
    Abbreviations abbrv = new Abbreviations();
    TwitterFilter twitterfilter = new TwitterFilter();
    String pop = jedis.rpop("list");
    //System.out.println("pop" + pop);
    if(pop != null)
    {
        String filtered = twitterfilter.filter(text);
        tweet = abbrv.tweetAbbr(filtered);
        System.out.println("Popped Content: " + tweet);
        return tweet;
    }
    else
    {
        return null;
    }
}
}

以下是完成过滤的代码： -

import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TwitterFilter {

public String filter(String text)
{
    String str = null;

    Pattern pat1 = Pattern.compile("(@|#|:)(\\w+|\\s+)");
    Matcher mat = pat1.matcher(text);
    /*String prefilter = mat.replaceAll("");
    Pattern pat2 = Pattern.compile("[^\\w\\s]");
    mat = pat2.matcher(prefilter);*/
    String filtered = mat.replaceAll("");
    String urlPattern = "((https?|ftp|gopher|telnet|file|Unsure|http):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?\\+-=\\\\\\.&]*)";
    Pattern p = Pattern.compile(urlPattern,Pattern.CASE_INSENSITIVE);
    Matcher m = p.matcher(filtered);
    int i = 0;
    while (m.find()) {
        filtered = filtered.replaceAll(m.group(i),"").trim();
        i++;
    }
    if(filtered.startsWith("RT", 0))
    {
        str = filtered.replaceAll("RT", "");
        return str;
    }
    else
    {
        return filtered;
    }
}
}

以下代码找到了情绪： -

import java.util.Properties;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.CoreMap;


public class TwitterSentiment {

public String findSentiment(String line) {

    if (line == null || line.isEmpty()) {
        throw new IllegalArgumentException("The line must not be null or empty.");
    }

    Annotation annotation = processLine(line);

    int mainSentiment = findMainSentiment(annotation);

    if (mainSentiment < 0 || mainSentiment > 4) {
        return null;
    }

    //App42Sentiment app42Sentiment = new App42Sentiment(line, toCss(mainSentiment));

    return toCss(mainSentiment);

}

private String toCss(int sentiment) {
    switch (sentiment) {
    case 0:
        return "very negative";
    case 1:
        return "negative";
    case 2:
        return "neutral";
    case 3:
        return "positive";
    case 4:
        return "very positive";
    default:
        return "default";
    }

}

private int findMainSentiment(Annotation annotation) {

    int mainSentiment = Integer.MIN_VALUE;
    int longest = Integer.MIN_VALUE;

    for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {

        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {

            String word = token.get(CoreAnnotations.TextAnnotation.class);
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

            System.out.println("word: " + word);
            System.out.println("pos: " + pos);
            System.out.println("ne: " + ne);
            System.out.println("Lemmas: " + lemma);

        }

        int sentenceLength = String.valueOf(sentence).length();

        if (sentenceLength > longest) {

            Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);

            mainSentiment = RNNCoreAnnotations.getPredictedClass(tree);

            longest = sentenceLength;

        }
    }

    return mainSentiment;

}

private Annotation processLine(String line) {

    StanfordCoreNLP pipeline = createPieline();

    return pipeline.process(line);

}

private StanfordCoreNLP createPieline() {

    Properties props = createPipelineProperties();

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    return pipeline;

}

private Properties createPipelineProperties() {

    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, sentiment");

    return props;

}
}

流式传输时推文不完整

0 个答案: