我使用Spring Batch框架使用Partitioner框架读取大文件。示例代码已附上。
如果文件记录超过1000,那么由于执行上下文中的ConcurrentHashMap处理,我得到Outofmemory错误。
public Map<String, ExecutionContext> partition(int gridSize) {
System.out.println("inside Partition" +inboundDir);
// Map<String, ExecutionContext> partitionMap = new HashMap<String, ExecutionContext>();
File file = new File(inboundDir);
System.out.println("is directory"+file.isDirectory());
Map<String, ExecutionContext> queue = new HashMap<>();
try {
List<List<String>> trunks = new ArrayList<>();
System.out.println("Inside File"+file);
// read and store data to a list of trunk
int chunkSize = 1;
int count = 1;
int totalCount=0;
try (BufferedReader br = new BufferedReader(new FileReader(file))) {
String line;
List items = new ArrayList();
System.out.println("chunkSize"+chunkSize);
System.out.println("count"+count);
while ((line = br.readLine()) != null) {
totalCount=totalCount+1;
System.out.println("line---------"+count % chunkSize);
if (count % chunkSize == 0) {
trunks.add(items);
items = new ArrayList();
System.out.println("trunks---------"+trunks.size());
}
System.out.println("items---------"+line);
items.add(line);
count++;
System.out.println("count---------"+count);
}
if(items.size()>0){
trunks.add(items);
}
System.out.println("inside total count"+totalCount);
for (int i=2; i<trunks.size(); i++) {
ExecutionContext value = new ExecutionContext();
value.put("fileResource", trunks.get(i));
value.put("totalCount", totalCount);
queue.put("trunk"+i+file, value);
}
}catch(FileNotFoundException e1){
e1.printStackTrace();
}
}
catch (Exception e) {
e.printStackTrace();
}
return queue;
}