使用文本文件中的参数替换所有()方法

时间:2012-06-11 16:24:49

标签: java regex file

我在数据库的表中有一组原始文本,我需要使用一组单词替换此集合中的一些单词。 我将所有术语替换为其替代品并将其替换为文本文件,如下所示

min=admin
lelet=lambat
lemot=lambat
nii=nih
ntu=itu

等等。 我已成功启动文件和扫描仪的变量,以阅读该术语及其替代品的集合。

我循环所有数据集并将原始文本保存在字符串中 在同一个循环中  我循环所有术语集合并将其行保存为字符串名称'pattern',并将模式拆分为两个名为'term'和'replacer'的字符串 在这个循环中,我启动一个新的字符串,其值是由replaceAll修改的数据集中的字符串(term,replacer) 术语集合的结束循环 然后我将新字符串插入数据库中的另一个表 数据集的结束循环

我是这样做的,如下所示 的replaceAll( “MIN”, “管理员”) 和它的作品,但它真的需要手动编码它近2000个术语来替换它。

任何人都会遇到这种真正的东西...... 我现在真的需要一个绝望的帮助:(

 package sentimenrepo;
    import javax.swing.*;
    import java.sql.*;
    import java.io.*;
    //import java.util.HashMap;
    import java.util.Scanner;
    //import java.util.Map;
    /**
     *
     * @author herman
     */
    public class synonimReplaceV2 extends SwingWorker {
        protected Object doInBackground() throws Exception {
                 new skripsisentimen.sentimenttwitter().setVisible(true);

                  Integer row = 0;
                  File synonimV2 = new File("synV2/catatan_kata_sinonim.txt");
                  String newTweet = "";
                DB db = new DB();
            Connection conn = db.dbConnect("jdbc:mysql://localhost:3306/tweet", "root", "");
            try{
              Statement select = conn.createStatement();
              select.executeQuery("select * from synonimtweet");
              ResultSet RS = select.getResultSet();
              Scanner scSynV2 = new Scanner(synonimV2);
              while(RS.next()){
                   row++;

                       String no = RS.getString("no");
                  String tweet = " "+ RS.getString("tweet");
                  String published = RS.getString("published");
                  String label = RS.getString("label");
                  clean2 cleanv2 = new clean2();

                  newTweet = cleanv2.cleanTweet(tweet);
                    try{
                         Statement insert = conn.createStatement();
                         insert.executeUpdate("INSERT INTO synonimtweet_v2(no,tweet,published,label) values('"
                                 +no+"','"+newTweet+"','"+published+"','"+label+"')");
                        String current = skripsisentimen.sentimenttwitter.txtAreaResult.getText();
                  skripsisentimen.sentimenttwitter.txtAreaResult.setText(current+"\n"+row+"original : "+tweet+"\n"+newTweet+"\n______________________\n");
                  skripsisentimen.sentimenttwitter.lblStat.setText(row+" tweet read");
                  skripsisentimen.sentimenttwitter.txtAreaResult.setCaretPosition(skripsisentimen.sentimenttwitter.txtAreaResult.getText().length() - 1);

                    }catch(Exception e){
                         skripsisentimen.sentimenttwitter.lblStat.setText(e.getMessage());

                    }

                 skripsisentimen.sentimenttwitter.lblStat.setText(e.getMessage());

              }
            }catch(Exception e){
                skripsisentimen.sentimenttwitter.lblStat.setText(e.getMessage());

            }
           return row;
        }
        class clean2{

            public clean2(){}

            public String cleanTweet(String tweet){
               File synonimV2 = new File("synV2/catatan_kata_sinonim.txt");

                String pattern = "";
              String term = "";
              String replacer = "";
              String newTweet="";
             try{
                Scanner scSynV2 = new Scanner(synonimV2);
                 while(scSynV2.hasNext()){
                  pattern = scSynV2.next();
                  term = pattern.split("=")[0];
                  replacer = pattern.split("=")[1];
                  newTweet = tweet.replace(term, replacer);
                 }
             }catch(Exception e){
                 e.printStackTrace();
             }

                System.out.println(newTweet+"\n"+tweet);
              return newTweet;

            }
        }

    }

更新


我只是意识到代码实际上是有效的,但仅适用于数据库中的第一行,第二行等等仍然存在。这是我更新我建立的最新代码

public class synonimReplaceV2 extends SwingWorker {

    protected Object doInBackground() throws Exception {
             new skripsisentimen.sentimenttwitter().setVisible(true);

              Integer row = 0;

              String newTweet = "";
            DB db = new DB();
        Connection conn = db.dbConnect("jdbc:mysql://localhost:3306/tweet", "root", "");
        try{
          Statement select = conn.createStatement();
          select.executeQuery("select * from synonimtweet limit 2,10");
          ResultSet RS = select.getResultSet();
          FileReader readSyn = new FileReader("synV2/catatan_kata_sinonim.txt");
          BufferedReader buffSyn = new BufferedReader(readSyn);
          while(RS.next()){
               row++;
                   String no = RS.getString("no");
              String tweet = " "+ RS.getString("tweet");
              String published = RS.getString("published");
              String label = RS.getString("label");
              String pattern = "";
             while((pattern=buffSyn.readLine())!=null){
                 String patternTerm = pattern.split("=")[0];
                 String patternSubs = pattern.split("=")[1];
                 tweet = tweet.replaceAll("\\s"+patternTerm, patternSubs);
             }

                try{
                     Statement insert = conn.createStatement();
                     insert.executeUpdate("INSERT INTO synonimtweet_v2(no,tweet,published,label) values('"
                             +no+"','"+tweet+"','"+published+"','"+label+"')");
                    String current = skripsisentimen.sentimenttwitter.txtAreaResult.getText();
              skripsisentimen.sentimenttwitter.txtAreaResult.setText(current+"\n"+row+"original : "+tweet+"\n"+newTweet+"\n______________________\n");
              skripsisentimen.sentimenttwitter.lblStat.setText(row+" tweet read");
              skripsisentimen.sentimenttwitter.txtAreaResult.setCaretPosition(skripsisentimen.sentimenttwitter.txtAreaResult.getText().length() - 1);

                }catch(Exception e){
                     skripsisentimen.sentimenttwitter.lblStat.setText(e.getMessage());
                }


          }
        }catch(Exception e){
            skripsisentimen.sentimenttwitter.lblStat.setText(e.getMessage());
           // System.out.println(e.getMessage());
        }
        Thread.sleep(100);
       return row;
    }
}

4 个答案:

答案 0 :(得分:2)

打开同义词文件并为ResultSet中的每一行迭代超过2,000行有点浪费。

将您的同义词加载到内存中的Map一次,用唯一的拼写错误术语键入,然后在地图上查找结果集中的每一行,并根据需要进行替换。

答案 1 :(得分:1)

让我们使用这两种解决方案为您构建单一解决方案:

首先,使用所有密钥创建HashMap:

public static HashMap<String, String> getMap() {
        //your version would read from the file
        HashMap<String,String> myMap=new HashMap<String,String>();
        myMap.put("min", "admin");
        myMap.put("lelet", "lambat");
        myMap.put("lemot", "lambat");
        myMap.put("nii", "nih");
        myMap.put("ntu", "itu");
        return(myMap);
    }

其次,您创建一个包含hashmap中所有键的模式:

public static String getPattern(HashMap<String,String> mapReplacement) {
        String pattern="";
        for (String s : mapReplacement.keySet()) {
            if (!pattern.isEmpty()) {
                pattern=pattern+"|";
            }
            pattern=pattern+s;
        }        
        return(pattern);
    }

接下来,您可以创建一个使用您创建的结构的cleanTweet方法:

public static String cleanTweet(String tweet, Pattern pattern,HashMap<String, String> myMap) {
        String newTweet=tweet;
        Matcher matcher = pattern.matcher(newTweet);
        int start=0;
        while (matcher.find()) {
            String key=matcher.group();
            String replacement=myMap.get(key);
            if (replacement!=null) {
                newTweet=newTweet.replace(key, replacement );
            }            
        }
        return(newTweet);
    }

这可能需要一些调整才能完美(我在onyl上测试了几个案例),但关键是你要在你的密钥中迭代一次,然后只在你的推文上迭代。

我希望它有所帮助。

答案 2 :(得分:0)

我没试过,但在我看来你几乎得到了它 - 只需更换这一行:

newTweet = tweet.replace(term, replacer);

用这个:

tweet = tweet.replaceAll(term, replacer);

由于您不再使用newTweet,请返回tweet

return tweet;

您还应删除newTweet声明。

另外,您不应该阅读Scanner来读取行。请改用FileReader

答案 3 :(得分:0)

谢谢大家 我找到了代码无效的答案,

每次程序从数据库中读取一行时,都应启动包含术语及其替代项的txt文件。

代码就像这样

public class synonimReplaceV2 extends SwingWorker {

    protected Object doInBackground() throws Exception {
             new skripsisentimen.sentimenttwitter().setVisible(true);

              Integer row = 0;

              String newTweet = "";
            DB db = new DB();
        Connection conn = db.dbConnect("jdbc:mysql://localhost:3306/tweet", "root", "");
        try{
          Statement select = conn.createStatement();
          select.executeQuery("select * from synonimtweet limit 2,10");
          ResultSet RS = select.getResultSet();

          while(RS.next()){
               row++;


          FileReader readSyn = new FileReader("synV2/catatan_kata_sinonim.txt");
          BufferedReader buffSyn = new BufferedReader(readSyn);

                   String no = RS.getString("no");
              String tweet = " "+ RS.getString("tweet");
              String published = RS.getString("published");
              String label = RS.getString("label");
              String pattern = "";
             while((pattern=buffSyn.readLine())!=null){
                 String patternTerm = pattern.split("=")[0];
                 String patternSubs = pattern.split("=")[1];
                 tweet = tweet.replaceAll("\\s"+patternTerm, patternSubs);
             }

                try{
                     Statement insert = conn.createStatement();
                     insert.executeUpdate("INSERT INTO synonimtweet_v2(no,tweet,published,label) values('"
                             +no+"','"+tweet+"','"+published+"','"+label+"')");
                    String current = skripsisentimen.sentimenttwitter.txtAreaResult.getText();
              skripsisentimen.sentimenttwitter.txtAreaResult.setText(current+"\n"+row+"original : "+tweet+"\n"+newTweet+"\n______________________\n");
              skripsisentimen.sentimenttwitter.lblStat.setText(row+" tweet read");
              skripsisentimen.sentimenttwitter.txtAreaResult.setCaretPosition(skripsisentimen.sentimenttwitter.txtAreaResult.getText().length() - 1);

                }catch(Exception e){
                     skripsisentimen.sentimenttwitter.lblStat.setText(e.getMessage());
                }


          }
        }catch(Exception e){
            skripsisentimen.sentimenttwitter.lblStat.setText(e.getMessage());
           // System.out.println(e.getMessage());
        }
        Thread.sleep(100);
       return row;
    }
}

但我实际上想要应用上面rlinden所做的代码,但是无法弄清楚如何调用cleanTweet函数。