我有一个简单的程序:
下面是代码。 txt文件采用以下格式:
21/03/2013 04:18:23 6890 6830 6850 6770 6830 6400 6630 6710 6770 6850 35024 34976 21/03/2013 04:18:23 6910 6800 6850 6770 6820 6410 6590 6710 6780 6820 35056 34976 21/03/2013 04:18:24 6890 6820 6860 6770 6830 6400 6580 6720 6770 6830 34912 34880 21/03/2013 04:18:24 6860 6840 6840 6770 6830 6390 6660 6700 6740 6890 35008 34880
我的程序以这种方式转换代码:
put sensor.rat.128 1364278801 7100 sensor = A
把sensor.rat.128 1364278801 6910 sensor = B
把sensor.rat.128 1364278801 6890 sensor = C
put sensor.rat.128 1364278801 6630 sensor = D
该程序在txt文件上运行良好,因为它每秒只保留一行值,但如果在不同的txt文件中存在相同的第二个值,则无法识别它们
所以问题是:如何让代码每秒只保存一个值列表并跨多个文件? 我希望大家都明白。
import java.util.Scanner;
import java.util.List;
import java.util.ArrayList;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.io.FileUtils;
public class Downsampler {
public static void main(String[] args) throws Exception{
/*
* Scans all the files in a specified folder
* Obtains Cell number from the file name
*/
String path = "/home/alessandro/Data128prova"; // name of path
File folder = new File(path);
for (File file : folder.listFiles()) {
Scanner s = new Scanner(file);
ArrayList<String> list = new ArrayList<String>();
while (s.hasNext()){
list.add(s.next());
}
s.close();
//Arraylist to save modified values
ArrayList<String> ds = new ArrayList<String>();
int i;
String app = "";
for(i=0; i<=list.size()-13; i=i+14){
//combining the first to values to obtain data
String str = list.get(i)+" "+list.get(i+1);
//------convert data in epoch time
Date dt= new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").parse(str);
long epochlong = dt.getTime()/1000;
String epoch = Long.toString(epochlong);
//------end conversion data
if (!str.equalsIgnoreCase(app)){
//add all the other values to arraylist ds
ds.add(epoch);
int j;
for(j=1; j<14; j++){
ds.add(list.get(i+j));
}
}
app = str;
}
int k;
String metric = "sensor.rat.128.riprova"; //name of the metric
for (k=0; k<=ds.size()-13; k=k+14){
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+2)+" sensor=A");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+3)+" sensor=B");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+4)+" sensor=C");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+5)+" sensor=D");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+6)+" sensor=E");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+7)+" sensor=F");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+8)+" sensor=G");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+9)+" sensor=H");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+10)+" sensor=I");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+11)+" sensor=L");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+12)+" sensor=M");
System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+13)+" sensor=N");
}
} //end of for
}
}
答案 0 :(得分:0)
使用地图和时间作为关键来存储数据集。下次在您正在解析的任何文件中看到该时间点时,您可以决定如何处理新数据集(删除它,使用当前数据集对其进行平均,等等)。这是您更新的代码,使用Map将数据集与特定时间相关联。此代码只打印一个语句,表明它已经处理了特定时间。您应该插入自己的代码来执行重复操作。
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.List;
import java.util.ArrayList;
import java.io.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.io.FileUtils;
public class Downsampler
{
public static void main(String[] args) throws Exception
{
/*
* Scans all the files in a specified folder
* Obtains Cell number from the file name
*/
String path = "/tmp/data"; // name of path
File folder = new File(path);
Map<Long, List<String>> timeValuesMap = new HashMap<Long, List<String>>();
for (File file : folder.listFiles())
{
Scanner s = new Scanner(file);
ArrayList<String> list = new ArrayList<String>();
while (s.hasNext())
{
list.add(s.next());
}
s.close();
//Arraylist to save modified values
ArrayList<String> ds = new ArrayList<String>();
int i;
String app = "";
for (i = 0; i <= list.size() - 13; i = i + 14)
{
//combining the first to values to obtain data
String str = list.get(i) + " " + list.get(i + 1);
//------convert data in epoch time
Date dt = new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").parse(str);
long epochlong = dt.getTime() / 1000;
String epoch = Long.toString(epochlong);
//------end conversion data
if (!str.equalsIgnoreCase(app))
{
//add all the other values to arraylist ds
ds.add(epoch);
int j;
for (j = 1; j < 14; j++)
{
ds.add(list.get(i + j));
}
}
app = str;
if(timeValuesMap.containsKey(epochlong))
{
System.out.println("Already processed time: " + str);
//do something - ignore values, average across sensor, min/max, etc...
//newds = doSomeOperation(ds);
//timeValuesMap.put(epochlong, newds);
}
else
{
System.out.println("New time: " + str);
timeValuesMap.put(epochlong, ds);
}
}
int k;
String metric = "sensor.rat.128.riprova"; //name of the metric
for (k = 0; k <= ds.size() - 13; k = k + 14)
{
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 2) + " sensor=A");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 3) + " sensor=B");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 4) + " sensor=C");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 5) + " sensor=D");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 6) + " sensor=E");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 7) + " sensor=F");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 8) + " sensor=G");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 9) + " sensor=H");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 10) + " sensor=I");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 11) + " sensor=L");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 12) + " sensor=M");
System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 13) + " sensor=N");
}
} //end of for
}
}
注意:对于非常大的数据集,这可能会导致JVM内存不足,因为Map中的项目数量会变得非常大。