Java - 许多txt文件之间的字符串比较

时间:2013-07-21 13:16:44

标签: java

我有一个简单的程序:

  • 解析java中的txt文件
  • 转换unix时间戳中的数据
  • 每秒只保留一行值

下面是代码。 txt文件采用以下格式:

  

21/03/2013 04:18:23 6890 6830 6850 6770 6830 6400 6630 6710 6770 6850 35024 34976   21/03/2013 04:18:23 6910 6800 6850 6770 6820 6410 6590 6710 6780 6820 35056 34976   21/03/2013 04:18:24 6890 6820 6860 6770 6830 6400 6580 6720 6770 6830 34912 34880   21/03/2013 04:18:24 6860 6840 6840 6770 6830 6390 6660 6700 6740 6890 35008 34880

我的程序以这种方式转换代码:

  

put sensor.rat.128 1364278801 7100 sensor = A

     

把sensor.rat.128 1364278801 6910 sensor = B

     

把sensor.rat.128 1364278801 6890 sensor = C

     

put sensor.rat.128 1364278801 6630 sensor = D

该程序在txt文件上运行良好,因为它每秒只保留一行值,但如果在不同的txt文件中存在相同的第二个值,则无法识别它们

所以问题是:如何让代码每秒只保存一个值列表并跨多个文件? 我希望大家都明白。

import java.util.Scanner;
import java.util.List;
import java.util.ArrayList;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.io.FileUtils;


public class Downsampler {


    public static void main(String[] args) throws Exception{

        /*
        * Scans all the files in a specified folder
        * Obtains Cell number from the file name
        */
        String path = "/home/alessandro/Data128prova"; // name of path
        File folder = new File(path);

        for (File file : folder.listFiles()) {
            Scanner s = new Scanner(file);
            ArrayList<String> list = new ArrayList<String>();
                while (s.hasNext()){
                    list.add(s.next());
                }
            s.close();

            //Arraylist to save modified values
            ArrayList<String> ds = new ArrayList<String>();

           int i;
            String app = "";
                for(i=0; i<=list.size()-13; i=i+14){

        //combining the first to values to obtain data  
        String str = list.get(i)+" "+list.get(i+1);

        //------convert data in epoch time
            Date dt= new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").parse(str);
            long epochlong = dt.getTime()/1000;
            String epoch = Long.toString(epochlong);
        //------end conversion data 

                    if (!str.equalsIgnoreCase(app)){
                    //add all the other values to arraylist ds
                    ds.add(epoch);
                    int j;
            for(j=1; j<14; j++){
            ds.add(list.get(i+j));
                        }
                    }
        app = str;
                }

    int k;
        String metric = "sensor.rat.128.riprova"; //name of the metric
    for (k=0; k<=ds.size()-13; k=k+14){
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+2)+" sensor=A");
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+3)+" sensor=B");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+4)+" sensor=C");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+5)+" sensor=D");
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+6)+" sensor=E");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+7)+" sensor=F");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+8)+" sensor=G");   
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+9)+" sensor=H");
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+10)+" sensor=I");  
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+11)+" sensor=L");  
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+12)+" sensor=M");  
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+13)+" sensor=N");
    }


    } //end of for  
}
}

1 个答案:

答案 0 :(得分:0)

使用地图和时间作为关键来存储数据集。下次在您正在解析的任何文件中看到该时间点时,您可以决定如何处理新数据集(删除它,使用当前数据集对其进行平均,等等)。这是您更新的代码,使用Map将数据集与特定时间相关联。此代码只打印一个语句,表明它已经处理了特定时间。您应该插入自己的代码来执行重复操作。

import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.List;
import java.util.ArrayList;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.commons.io.FileUtils;

public class Downsampler
{

   public static void main(String[] args) throws Exception
   {

        /*
        * Scans all the files in a specified folder
        * Obtains Cell number from the file name
        */
      String path = "/tmp/data"; // name of path
      File folder = new File(path);

      Map<Long, List<String>> timeValuesMap = new HashMap<Long, List<String>>();
      for (File file : folder.listFiles())
      {
         Scanner s = new Scanner(file);
         ArrayList<String> list = new ArrayList<String>();
         while (s.hasNext())
         {
            list.add(s.next());
         }
         s.close();

         //Arraylist to save modified values
         ArrayList<String> ds = new ArrayList<String>();

         int i;
         String app = "";
         for (i = 0; i <= list.size() - 13; i = i + 14)
         {

            //combining the first to values to obtain data
            String str = list.get(i) + " " + list.get(i + 1);

            //------convert data in epoch time
            Date dt = new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").parse(str);
            long epochlong = dt.getTime() / 1000;
            String epoch = Long.toString(epochlong);
            //------end conversion data

            if (!str.equalsIgnoreCase(app))
            {
               //add all the other values to arraylist ds
               ds.add(epoch);
               int j;
               for (j = 1; j < 14; j++)
               {
                  ds.add(list.get(i + j));
               }
            }
            app = str;

            if(timeValuesMap.containsKey(epochlong))
            {
               System.out.println("Already processed time: " +  str);
               //do something - ignore values, average across sensor, min/max, etc...
               //newds = doSomeOperation(ds);
               //timeValuesMap.put(epochlong, newds);
            }
            else
            {

               System.out.println("New time: " +  str);
               timeValuesMap.put(epochlong, ds);
            }
         }

         int k;
         String metric = "sensor.rat.128.riprova"; //name of the metric
         for (k = 0; k <= ds.size() - 13; k = k + 14)
         {
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 2) + " sensor=A");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 3) + " sensor=B");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 4) + " sensor=C");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 5) + " sensor=D");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 6) + " sensor=E");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 7) + " sensor=F");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 8) + " sensor=G");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 9) + " sensor=H");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 10) + " sensor=I");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 11) + " sensor=L");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 12) + " sensor=M");
            System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 13) + " sensor=N");
         }

      } //end of for
   }
}

注意:对于非常大的数据集,这可能会导致JVM内存不足,因为Map中的项目数量会变得非常大。