我一直试图从DB2读取50万条记录作为压缩流,然后想要创建2个jar文件,其中一个是固定宽度格式,另一个是BSON格式。
我已经能够使代码工作,但是由于拉动代码并将其写入这些文件所花费的时间接近120分钟,我重新设计了代码以使用生产者消费者模型。我怎么看到使用这个模型降低性能。由于某种原因,多线程无法按预期工作。
Producer producer = new Producer(queue1,queue2,url,user,pass,driver,strQuery);
Consumer1 consumer1 = new Consumer1(queue1, outputDatFile, fileDat.getName());
Consumer2 consumer2 = new Consumer2(queue2, outputDatBSONFile, fileBSONDat.getName());
ExecutorService threadPool = Executors.newFixedThreadPool(3);
Future producerStatus = threadPool.submit(producer);
threadPool.execute(consumer1);
threadPool.execute(consumer2);
try {
System.out.println("This will wait for the producer to wait " + producerStatus.get());
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
threadPool.shutdown();
long end = System.currentTimeMillis();
System.out.println("End Time: " + end);
long elapsedTimeMillis = end - start;
float elapsedTimeSec = (float) elapsedTimeMillis / 1000.0F;
System.out.println("Total Time: " + elapsedTimeSec + " seconds..");
if (!(errorStatus)) {
System.out.println("Successful exit...");
System.exit(0);
} else {
System.out.println("Exiting - Fatal Errors Encountered!");
System.exit(1);
}
}
private static JarOutputStream getJar(String outputDatFile, String fileName)
throws FileNotFoundException, IOException {
JarOutputStream jarOutPutStream = new JarOutputStream(
new BufferedOutputStream(new FileOutputStream(new File(outputDatFile + ".jar"))));
jarOutPutStream.setMethod(JarOutputStream.DEFLATED);
JarEntry ze = new JarEntry(fileName);
jarOutPutStream.putNextEntry(ze);
return jarOutPutStream;
}
public class Consumer1 implements Runnable {
private BlockingQueue<String> queue;
private String outputDatFile;
private String datFileName;
public Consumer1(BlockingQueue<String> queue, String outputDatFile, String datFileName) {
this.queue = queue;
this.outputDatFile = outputDatFile;
this.datFileName = datFileName;
}
@Override
public void run() {
JarOutputStream jarOutPutStreamText = null;
try {
jarOutPutStreamText = getJar(outputDatFile, datFileName);
int recordsWritten = 0;
while (true) {
recordsWritten++;
try {
String objectRetrieved = queue.take();
jarOutPutStreamText.write(objectRetrieved.getBytes());
jarOutPutStreamText.flush();
if (recordsWritten % 100000 == 0) {
System.out.println("Written Records Count Queue 1 " + recordsWritten);
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
} catch (FileNotFoundException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
} finally {
if (jarOutPutStreamText != null) {
try {
jarOutPutStreamText.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
}
public class Consumer2 implements Runnable {
private BlockingQueue<DBObject> queue2;
private String outputDatBSONFile;
private String bsonFileName;
public Consumer2(BlockingQueue<DBObject> queue2, String outputDatBSONFile, String bsonFileName) {
this.queue2 = queue2;
this.outputDatBSONFile = outputDatBSONFile;
this.bsonFileName = bsonFileName;
}
@Override
public void run() {
JarOutputStream jarOutPutStreamBSON = null;
try {
jarOutPutStreamBSON = getJar(outputDatBSONFile, bsonFileName);
BSONFileWriter bsonWriter = new BSONFileWriter(jarOutPutStreamBSON);
int recordsWritten = 0;
while (true) {
recordsWritten++;
try {
DBObject objectRetrieved = queue2.take();
bsonWriter.write(objectRetrieved);
bsonWriter.flush();
if (recordsWritten % 100000 == 0) {
System.out.println("Written Records Count Queue 2 " + recordsWritten);
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
} catch (FileNotFoundException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
} finally {
if (jarOutPutStreamBSON != null) {
try {
jarOutPutStreamBSON.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
public class Producer implements Runnable {
private BlockingQueue<String> queue1;
private BlockingQueue<DBObject> queue2;
private String url;
private String user;
private String pass;
private String driver;
private String strQuery;
public Producer(BlockingQueue<String> queue1, BlockingQueue<DBObject> queue2, String url, String user,
String pass, String driver, String strQuery) {
this.queue1 = queue1;
this.queue2 = queue2;
this.url = url;
this.pass = pass;
this.driver = driver;
this.strQuery = strQuery;
}
@Override
public void run() {
Connection con = null;
Statement st = null;
ResultSet rs = null;
try {
Class.forName(driver);
con = DriverManager.getConnection(url, user, pass);
con.setAutoCommit(false);
Map<String, Object> mapper = new HashMap<String, Object>();
try {
st = con.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY);
st.setFetchSize(20000);
System.out.println(
"Attempting to execute statement: " + System.getProperty("line.separator") + strQuery);
strQuery = strQuery.replace(System.getProperty("line.separator"), " ");
rs = st.executeQuery(strQuery);
ResultSetMetaData md = rs.getMetaData();
try {
int col = md.getColumnCount();
int resultSetCounter = 0;
while (rs.next()) {
String strQueryOutput = "";
resultSetCounter++;
for (int x = 1; x <= col; ++x) {
String outPut = "";
if (md.getColumnTypeName(x).equals("DECIMAL")) {
StringAlignUtils util = new StringAlignUtils(md.getColumnDisplaySize(x),
Alignment.RIGHT);
outPut = util.format(rs.getString(x));
strQueryOutput = strQueryOutput + outPut;
mapper.put(md.getColumnName(x), outPut);
} else if (md.getColumnTypeName(x).equals("NUMERIC")) {
StringAlignUtils util = new StringAlignUtils(md.getColumnDisplaySize(x),
Alignment.RIGHT);
outPut = util.format(rs.getString(x));
strQueryOutput = strQueryOutput + outPut;
mapper.put(md.getColumnName(x), outPut);
} else if (md.getColumnTypeName(x).equals("CHAR() FOR BIT DATA")) {
char charData = rs.getString(x).charAt(0);
outPut = charData + "";
StringAlignUtils util = new StringAlignUtils(md.getColumnDisplaySize(x),
Alignment.RIGHT);
outPut = util.format(outPut);
strQueryOutput = strQueryOutput + outPut;
mapper.put(md.getColumnName(x), outPut);
} else {
StringAlignUtils util = new StringAlignUtils(md.getColumnDisplaySize(x),
Alignment.RIGHT);
outPut = util.format(rs.getString(x));
strQueryOutput = strQueryOutput + outPut;
mapper.put(md.getColumnName(x), outPut);
}
}
if (resultSetCounter % 100000 == 0) {
System.out.println(" The counter is " + resultSetCounter);
}
strQueryOutput = strQueryOutput + '\n';
queue1.put(strQueryOutput);
queue2.put(new BasicDBObject(mapper));
}
} catch (Exception e) {
e.printStackTrace();
System.err.println("Error: " + e.getMessage());
System.err.println("Exiting!");
System.exit(1);
}
System.out.println("Query results successfully returned...");
} catch (SQLException s) {
System.err.println("SQL statement is not executed!");
System.err.println("Error: " + s.getMessage());
} finally {
System.out.println("Trying to Close ResultSet and Statement...");
if (rs != null) {
System.out.println("Closing ResultSet..");
rs.close();
}
if (st != null) {
System.out.println("Closing Statement..");
st.close();
}
}
} catch (Exception exception) {
exception.printStackTrace();
} finally {
try {
System.out.println("Trying to Close database connection..");
if (con != null) {
System.out.println("Closing database connection..");
con.close();
}
} catch (SQLException exception) {
exception.printStackTrace();
}
}
}
}
答案 0 :(得分:1)
我想我得到了文件生成格式不正确的原因。
有2个问题
1)没有写完整的数据。 2)溪流没有关闭。
问题已得到解决,但即使在拥有1名制作人和2位消费者之后,处理时间超过90分钟,对于5000万条记录,可能会有人指出可能使该程序更快的区域。
答案 1 :(得分:0)
有足够的元素可以减慢速度。
一个小错误
String objectRetrieved = queue.take();
jarOutPutStreamText.write(objectRetrieved.getBytes());
也许最后else
左对齐?
应指定编码
jarOutPutStreamText.write(objectRetrieved.getBytes(StandardCharsets.UTF_8));
最容易发现:
String strQueryOutput = "";
应该是
StringBuilder strQueryOutput = new StringBuilder(1000 /* output size */);
DB2 SQL可能会受益于最后一行WITH ur
(uncommited read)。
StringAlignUtils
应该在循环外创建一次。
事实上,将其留给数据库进行格式化可能是最快的解决方案。
如果需要为每条记录传递一个映射,您可以准备一个映射,并按列索引(减1)保存一个Map.Entry
条目数组。要按列索引立即更改值。
较小的提取大小(?)和更多的Java堆可能会有所帮助。
我在这个领域的经验主要涉及 GzippedOutputStream (我在这里也期望),其中最快的压缩不是最高的压缩。