如何从文件中打印标记值?

时间:2015-11-28 04:50:16

标签: java tagging

我必须编写一个代码来识别推文的语言并打印出某种语言的推文。我已经编写了语言识别部分,但无法仅打印必要的行。

以下是代码:

import java.io.*;
import java.util.*;

import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.functions.SMO;
import weka.classifiers.trees.RandomForest;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ConverterUtils.DataSource;

public class Lang_Detect 
{
    public static weka.classifiers.Classifier c;
    public static HashMap<String,String> trigram=new HashMap<String,String>();

    public static void initiate() throws Exception
    {
        c = loadModel("C:\\Users\\DIV\\ff\\Maithili\\nb.model"); // loads nb model
    }
    public static NaiveBayes loadModel(String path) throws Exception 
    {
        NaiveBayes classifier;
        FileInputStream fis = new FileInputStream(path);
        ObjectInputStream ois = new ObjectInputStream(fis);

        classifier = (NaiveBayes) ois.readObject();
        ois.close();
        return classifier;
    }
    public static void read_trigram()
    {
        try
        {
            FileInputStream fis = new FileInputStream("C:\\Users\\DIV\\ff\\Maithili\\Trigram.txt");
            BufferedReader br = new BufferedReader(new InputStreamReader(fis,"UTF-8"));
            String line;
            while((line = br.readLine())!=null)
            {
                String words[]=line.split(":");
                trigram.put(words[0].trim(), "");
            }
            fis.close();
        }catch(IOException f){}    
    }
    public static String feature_vector(String line)
    {
        String vector="";

        String words[]=line.split(" ");
        HashMap<String,String> local_word=new HashMap<String,String>();

        for(int i=0;i<words.length;i++)
        {
            char ch[]=words[i].toCharArray();
            for(int j=0;j<ch.length-2;j++)
            {
                local_word.put(ch[j]+""+ch[j+1]+""+ch[j+2], "");    
            }
        }

        for (Map.Entry<String, String> entry : trigram.entrySet()) 
        {
            if(local_word.containsKey(entry.getKey()))
            {
                vector+="1,";
            }
            else
            {
                vector+="0,";
            }
        }

        return vector;
    }
    public static String lang_tag(String file) throws Exception
    {

        String tagged_sentence="";
        int l=0,cntr=0;;
        //String words[]=sentence.toLowerCase().split(" ");
        StringBuffer str=new StringBuffer();

        read_trigram();

        // TODO Auto-generated method stub
        int count=1;
        str.append("@relation Language\n");
        for (Map.Entry<String, String> entry : trigram.entrySet()) 
        {
            str.append("@attribute Trigram"+count+" numeric\n");
            count++;
        }
        str.append("@attribute class {HN,NP,MT}\n");
        str.append("@DATA\n");

        try
        {
            FileInputStream fis = new FileInputStream(file);
            BufferedReader br = new BufferedReader(new InputStreamReader(fis,"UTF-8"));
            String line;
            while((line = br.readLine())!=null)
            {
                str.append(feature_vector(line)+"?\n");
            }
            fis.close();
        }catch(IOException f){}   

        Global.file_update("C:\\Users\\DIV\\ff\\Maithili\\HN_NP_MT_Unlabelled.arff", str.toString());

        Instances unlabeled = new Instances(
                new BufferedReader(
                  new FileReader("HN_NP_MT_Unlabelled.arff")));

        // set class attribute
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);

        Instances labeled = new Instances(unlabeled);

        // label instances
        for (int i = 0; i < unlabeled.numInstances(); i++) 
        {
          double clsLabel = c.classifyInstance(unlabeled.instance(i));
          String tag="";

          if(clsLabel==0.0)
              tag="HN";
          else if(clsLabel==1.0)
              tag="NP";
          else if(clsLabel==2.0)
          {
              tag="MT";
              Global.file_append("C:\\Users\\DIV\\ff\\Maithili\\Detected_Maithili_Tweets.txt", tag);
          }

          System.out.println(tag);

        }
        return tagged_sentence.trim();
    }

    public static void main(String[] args) throws Exception 
    {
        initiate();
        lang_tag("C:\\Users\\DIV\\ff\\Maithili\\tweets.txt");

    }

}

正如你在lang_tag()中看到的,我想打印标记为MT的行,但我无法获取任何特定变量中的行。 有人能帮助我吗?

0 个答案:

没有答案