我有100个测试数据句子。我试图对它们进行情绪分析,但无论我使用什么输入字符串,我只得到输入字符串的正面估计。每个句子的返回值为1.0。知道为什么会这样吗?即使我使用.txt文件中的负示例输入,结果也是正值。
public class StartSentiment
{
public static DoccatModel model = null;
public static String[] analyzedTexts = {"Good win"};
public static void main(String[] args) throws IOException {
// begin of sentiment analysis
trainModel();
for(int i=0; i<analyzedTexts.length;i++){
classifyNewText(analyzedTexts[i]);}
}
private static String readFile(String pathname) throws IOException {
File file = new File(pathname);
StringBuilder fileContents = new StringBuilder((int)file.length());
Scanner scanner = new Scanner(file);
String lineSeparator = System.getProperty("line.separator");
try {
while(scanner.hasNextLine()) {
fileContents.append(scanner.nextLine() + lineSeparator);
}
return fileContents.toString();
} finally {
scanner.close();
}
}
public static void trainModel() {
MarkableFileInputStreamFactory dataIn = null;
try {
dataIn = new MarkableFileInputStreamFactory(
new File("src\\sentiment\\Results.txt"));
ObjectStream<String> lineStream = null;
lineStream = new PlainTextByLineStream(dataIn, StandardCharsets.UTF_8);
ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
TrainingParameters tp = new TrainingParameters();
tp.put(TrainingParameters.CUTOFF_PARAM, "1");
tp.put(TrainingParameters.ITERATIONS_PARAM, "100");
DoccatFactory df = new DoccatFactory();
model = DocumentCategorizerME.train("en", sampleStream, tp, df);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (dataIn != null) {
try {
} catch (Exception e2) {
e2.printStackTrace();
}
}
}
}
public static void classifyNewText(String text) throws IOException{
DocumentCategorizerME myCategorizer = new DocumentCategorizerME(model);
double[] outcomes = myCategorizer.categorize(text.split(" ") );
String category = myCategorizer.getBestCategory(outcomes);
if (category.equalsIgnoreCase("1")){
System.out.print("The text is positive");
} else {
System.out.print("The text is negative");
}
}