在Spring Batch中,我试图读取一个CSV文件,并希望将每一行分配给一个单独的线程并进行处理。我试图通过使用Task Executor实现它,如果我没有使用job参数获取文件名,它正在工作。如果我通过作业参数,因为scope="step"
所有线程都从文件中读取同一行。如果我更改scope="job"
是否可以解决,如果是,请建议方式?目前,我收到如下错误:
引起:java.lang.IllegalStateException:没有为范围名称'job'注册的范围
请帮助......
找到下面的Job.xml
<job id="partitionJob" xmlns="http://www.springframework.org/schema/batch" restartable="true">
<step id="step" allow-start-if-complete="true">
<partition step="step2" partitioner="partitioner">
<handler grid-size="3" task-executor="taskExecutor" />
</partition>
</step>
</job>
<bean id="partitioner" class="com.range.part.RangePartitioner">
</bean>
<bean id="taskExecutor" class="org.springframework.core.task.SimpleAsyncTaskExecutor" />
<step id="step2" xmlns="http://www.springframework.org/schema/batch">
<tasklet transaction-manager="transactionManager">
<chunk reader="itemReader" writer="cutomitemWriter" processor="itemProcessor" commit-interval="100" />
</tasklet>
</step>
<bean id="itemProcessor" class="com.range.processor.UserProcessor" scope="step">
<property name="threadName" value="#{stepExecutionContext[name]}"/>
</bean>
<bean id="itemReader" class="org.springframework.batch.item.file.FlatFileItemReader" scope="job">
<property name="resource" value="file:#{jobParameters[file]}">
</property>
<!-- <property name="linesToSkip" value="1"/> -->
<property name="lineMapper">
<bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
<property name="lineTokenizer">
<bean class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
<property name="delimiter" value="," />
<!-- <property name="names" value="transactionBranch,batchEntryDate,batchNo,channelID,CountryCode" />-->
</bean>
</property>
<property name="fieldSetMapper">
<bean class="com.fieldset.FieldsetMapper">
</bean>
</property>
</bean>
</property>
</bean>
<bean id="cutomitemWriter" class="com.range.processor.customitemWritter">
</bean>
答案 0 :(得分:1)
我正在考虑一种可以在其上使用分区程序的方法。在分区级别,我们可以读取文件(通过使用任何CSV阅读器或Spring Reader也可以),然后处理每一行。
每一行都会被添加到分区器的队列(Map)中,以便达到您的要求。
我在这里发布了代码供您参考
公共类LinePartitioner实现了Partitioner {
@Value("#{jobParameters['fileName']}")
private String fileName;
Map<String, ExecutionContext> queue = new HashMap<>();
@Override
public Map<String, ExecutionContext> partition(int gridSize) {
BufferedReader reader = new BufferedReader(new FileReader(this.fileName));
List<String> lines = new ArrayList<>();
int count = 0;
while ((line = reader.readLine()) != null) {
ExecutionContext value = new ExecutionContext();
value.put("lineContent", line);
value.put("lineCount", count+1);
queue.put(++count, value);
}
return queue;
}
}
如上面的代码所示,您可以使用任何CSV阅读器或Spring Reader将Reader替换为Pojo对象的简化映射字段。
如果您需要完整的程序,请告诉我,我会为您编写并上传。
谢谢, Nghia酒店
- 使用示例更新以构建具有1000个阅读器读取器的分区器
@Override
public Map<String, ExecutionContext> partition(int gridSize) {
try {
Map<String, ExecutionContext> queue = new HashMap<>();
List<List<String>> trunks = new ArrayList<>();
// read and store data to a list of trunk
int chunkSize = 1000;
int count = 1;
try (BufferedReader br = new BufferedReader(new FileReader("your file"))) {
String line;
List items = null;
while ((line = br.readLine()) != null) {
if (count % chunkSize == 0) {
items = new ArrayList();
trunks.add(items);
}
items.add(line);
}
}
// add to queue to start prorcessing
for (int i=0; i<trunks.size(); i++) {
ExecutionContext value = new ExecutionContext();
value.put("items", trunks.get(i));
queue.put("trunk"+i, value);
}
return queue;
}
catch (Exception e) {
// handle exception
}
}
答案 1 :(得分:0)
您可以看到this example (on Github)带有多线程作业,用于将大型CSV文件(如200,000行)导入数据库并将其从DB导出到JSON文件(FileReader和FileWriter将具有无线程安全)。
<batch:job id="transformJob">
<batch:step id="deleteDir" next="cleanDB">
<batch:tasklet ref="fileDeletingTasklet" />
</batch:step>
<batch:step id="cleanDB" next="countThread">
<batch:tasklet ref="cleanDBTasklet" />
</batch:step>
<batch:step id="countThread" next="split">
<batch:tasklet ref="countThreadTasklet" />
</batch:step>
<batch:step id="split" next="partitionerMasterImporter">
<batch:tasklet>
<batch:chunk reader="largeCSVReader" writer="smallCSVWriter"
commit-interval="#{jobExecutionContext['chunk.count']}" />
</batch:tasklet>
</batch:step>
<batch:step id="partitionerMasterImporter" next="partitionerMasterExporter">
<partition step="importChunked" partitioner="filePartitioner">
<handler grid-size="10" task-executor="taskExecutor" />
</partition>
</batch:step>
<batch:step id="partitionerMasterExporter" next="concat">
<partition step="exportChunked" partitioner="dbPartitioner">
<handler grid-size="10" task-executor="taskExecutor" />
</partition>
</batch:step>
<batch:step id="concat">
<batch:tasklet ref="concatFileTasklet" />
</batch:step>
</batch:job>
<batch:step id="importChunked">
<batch:tasklet>
<batch:chunk reader="smallCSVFileReader" writer="dbWriter"
processor="importProcessor" commit-interval="500">
</batch:chunk>
</batch:tasklet>
</batch:step>
<batch:step id="exportChunked">
<batch:tasklet>
<batch:chunk reader="dbReader" writer="jsonFileWriter"
processor="exportProcessor" commit-interval="#{jobExecutionContext['chunk.count']}">
</batch:chunk>
</batch:tasklet>
</batch:step>
<bean id="jsonFileWriter" class="com.batch.writer.PersonWriterToFile"
scope="step">
<property name="outputPath" value="csv/chunked/paged-#{stepExecutionContext[page]}.json" />
</bean>
<bean id="dbReader" class="com.batch.reader.PersonReaderFromDataBase" scope="step">
<property name="iPersonRepository" ref="IPersonRepository" />
<property name="page" value="#{stepExecutionContext[page]}"/>
<property name="size" value="#{stepExecutionContext[size]}"/>
</bean>
<bean id="countThreadTasklet" class="com.batch.tasklet.CountingTasklet"
scope="step">
<property name="input" value="file:csv/input/#{jobParameters[filename]}" />
</bean>
<bean id="cleanDBTasklet" class="com.batch.tasklet.CleanDBTasklet" />
<bean id="fileDeletingTasklet" class="com.batch.tasklet.FileDeletingTasklet">
<property name="directory" value="file:csv/chunked/" />
</bean>
<bean id="concatFileTasklet" class="com.batch.tasklet.FileConcatTasklet">
<property name="directory" value="file:csv/chunked/" />
<property name="outputFilename" value="csv/output/export.json" />
</bean>
<bean id="filePartitioner" class="com.batch.partitioner.FilePartitioner">
<property name="outputPath" value="csv/chunked/" />
</bean>
<bean id="dbPartitioner" class="com.batch.partitioner.DBPartitioner" scope="step">
<property name="pageSize" value="#{jobExecutionContext['chunk.count']}" />
</bean>
<bean id="largeCSVReader" class="com.batch.reader.LineReaderFromFile"
scope="step">
<property name="inputPath" value="csv/input/#{jobParameters[filename]}" />
</bean>
<bean id="smallCSVWriter" class="com.batch.writer.LineWriterToFile"
scope="step">
<property name="outputPath" value="csv/chunked/"></property>
</bean>
<bean id="smallCSVFileReader" class="com.batch.reader.PersonReaderFromFile"
scope="step">
<constructor-arg value="csv/chunked/#{stepExecutionContext[file]}" />
</bean>
<bean id="importProcessor" class="com.batch.processor.ImportPersonItemProcessor" />
<bean id="exportProcessor" class="com.batch.processor.ExportPersonItemProcessor" />
<bean id="dbWriter" class="com.batch.writer.PersonWriterToDataBase">
<property name="iPersonRepository" ref="IPersonRepository" />
</bean>
在这两种情况下,分离器用于拼接成10个文件(每个线程一个文件),用于导入和导出到10个文件(每个线程也有一个文件),然后我们将所有文件连接成一个文件。
希望得到这个帮助。