下面是我的代码和线程转储。我不知道为什么CPU会100%。有人可以帮忙吗?以下方法是可疑的,我只是试图解析结果集并使用Apache CSV(commons-csv-1.5.jar)将其写入csv。当我评论调用此方法的行时,cpu保持在3%。
public static void writeResultSetToFile(ResultSet resultSet, String fileName) {
BufferedWriter writer = null;
CSVPrinter csvPrinter = null;
//If the file with the same filename already exist, a date stamp is appended to the end of the file.
if(checkIfFileExist(fileName)) {
LOGGER.info("FILE EXIST:"+fileName);
String fileNamePostFix = new SimpleDateFormat(Constants.FORMAT_yyyyMMddHHmm).format(new Date());
fileName=fileName.concat(Constants.UNDERSCORE).concat(fileNamePostFix);
LOGGER.info("WRITING TO FILE: "+fileName);
}
try {
ResultSetMetaData metadata = resultSet.getMetaData();
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName),StandardCharsets.UTF_8));
//Fetch the column header from the metadata and inserts to an arrayList
columnCount = metadata.getColumnCount();
List<String> headerList = new ArrayList<String>(0);
for (int i = 1; i <= columnCount; i++) {
headerList.add(metadata.getColumnName(i));
}
String[] headerArray = new String[headerList.size()];
headerArray = headerList.toArray(headerArray);
//Creates a csv printer with the column names fetched from the database
csvPrinter = new CSVPrinter(writer, CSVFormat.DEFAULT.withHeader(headerArray).withDelimiter(Constants.C_DELIMITER));
recordCount = 0;
List<String> valueList = new ArrayList<String>(0);
while (resultSet.next()) {
recordCount++;
for (int i = 1; i <= columnCount; i++) {
valueList.add(resultSet.getString(i));
}
csvPrinter.printRecord(valueList);
valueList = new ArrayList<String>(0);
}
} catch (SQLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
finally {
if (writer != null) {
try {
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (csvPrinter != null) {
try {
csvPrinter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
THREAD DUMP
Full thread dump OpenJDK 64-Bit Server VM (24.171-b01 mixed mode):
"pool-2-thread-1" prio=10 tid=0x00007f2eb839d000 nid=0x264e runnable [0x00007f2ea6dc4000]
java.lang.Thread.State: RUNNABLE
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.read(SocketInputStream.java:153)
at java.net.SocketInputStream.read(SocketInputStream.java:122)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:235)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:275)
at java.io.BufferedInputStream.read(BufferedInputStream.java:334)
- locked <0x00000000e01bc220> (a java.io.BufferedInputStream)
at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:127)
at org.apache.thrift.transport.TTransport.readAll(TTransport.java:84)
at org.apache.thrift.transport.TSaslTransport.readLength(TSaslTransport.java:346)
at org.apache.thrift.transport.TSaslTransport.readFrame(TSaslTransport.java:423)
at org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:405)
at org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:37)
at org.apache.thrift.transport.TTransport.readAll(TTransport.java:84)
at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:378)
at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:297)
at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:204)
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:69)
at org.apache.hive.service.cli.thrift.TCLIService$Client.recv_FetchResults(TCLIService.java:515)
at org.apache.hive.service.cli.thrift.TCLIService$Client.FetchResults(TCLIService.java:502)
at com.cloudera.hive.hivecommon.api.HS2Client.fetchNRows(HS2Client.java:321)
at com.cloudera.hive.hive.api.ExtendedHS2Client.fetchNRows(ExtendedHS2Client.java:499)
at com.cloudera.hive.hivecommon.api.HS2Client.fetchRows(HS2Client.java:301)
at com.cloudera.hive.hivecommon.dataengine.BackgroundFetcher.run(BackgroundFetcher.java:138)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:473)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1152)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:622)
at java.lang.Thread.run(Thread.java:748)
"Service Thread" daemon prio=10 tid=0x00007f2eb80b4800 nid=0x2562 runnable [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"C2 CompilerThread1" daemon prio=10 tid=0x00007f2eb80b2000 nid=0x2561 waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"C2 CompilerThread0" daemon prio=10 tid=0x00007f2eb80af000 nid=0x2560 waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"Signal Dispatcher" daemon prio=10 tid=0x00007f2eb80ad000 nid=0x255f waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"Finalizer" daemon prio=10 tid=0x00007f2eb807f800 nid=0x255e in Object.wait() [0x00007f2eb45f4000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000e00247f0> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:135)
- locked <0x00000000e00247f0> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:151)
at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:209)
"Reference Handler" daemon prio=10 tid=0x00007f2eb807d800 nid=0x255d in Object.wait() [0x00007f2eb46f5000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000e000e1c8> (a java.lang.ref.Reference$Lock)
at java.lang.Object.wait(Object.java:503)
at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:133)
- locked <0x00000000e000e1c8> (a java.lang.ref.Reference$Lock)
"main" prio=10 tid=0x00007f2eb800b800 nid=0x2557 runnable [0x00007f2ec05fb000]
java.lang.Thread.State: RUNNABLE
at java.lang.String.split(String.java:2289)
at java.lang.String.split(String.java:2355)
at com.cloudera.hive.hivecommon.dataengine.HiveJDBCQueryAnalyserUtils.queryAnalysis(HiveJDBCQueryAnalyserUtils.java:49)
at com.cloudera.hive.hivecommon.api.HS2Buffer.getData(HS2Buffer.java:181)
at com.cloudera.hive.hivecommon.api.HS2Client.getData(HS2Client.java:705)
at com.cloudera.hive.hivecommon.dataengine.HiveJDBCResultSet.getData(HiveJDBCResultSet.java:265)
at com.cloudera.hive.jdbc.common.SForwardResultSet.getData(SForwardResultSet.java:4590)
at com.cloudera.hive.jdbc.common.SForwardResultSet.getString(SForwardResultSet.java:2138)
at xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.utils.FileUtils.writeResultSetToFile(FileUtils.java:153)
at xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.utils.DatabaseUtils.executeQueryAndWriteToFile(DatabaseUtils.java:135)
at xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.processor.JDBCProcessor.processCustomQueries(JDBCProcessor.java:84)
at xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.processor.JDBCProcessor.process(JDBCProcessor.java:47)
at xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.main.App.main(App.java:49)
"VM Thread" prio=10 tid=0x00007f2eb8077000 nid=0x255c runnable
"GC task thread#0 (ParallelGC)" prio=10 tid=0x00007f2eb8021000 nid=0x2558 runnable
"GC task thread#1 (ParallelGC)" prio=10 tid=0x00007f2eb8023000 nid=0x2559 runnable
"GC task thread#2 (ParallelGC)" prio=10 tid=0x00007f2eb8025000 nid=0x255a runnable
"GC task thread#3 (ParallelGC)" prio=10 tid=0x00007f2eb8027000 nid=0x255b runnable
"VM Periodic Task Thread" prio=10 tid=0x00007f2eb80bf800 nid=0x2563 waiting on condition
JNI global references: 234
Heap
PSYoungGen total 160768K, used 126191K [0x00000000f5500000, 0x00000000ffd80000, 0x0000000100000000)
eden space 149504K, 84% used [0x00000000f5500000,0x00000000fd033ef8,0x00000000fe700000)
from space 11264K, 0% used [0x00000000fe700000,0x00000000fe708000,0x00000000ff200000)
to space 10240K, 0% used [0x00000000ff380000,0x00000000ff380000,0x00000000ffd80000)
ParOldGen total 349184K, used 192583K [0x00000000e0000000, 0x00000000f5500000, 0x00000000f5500000)
object space 349184K, 55% used [0x00000000e0000000,0x00000000ebc11df8,0x00000000f5500000)
PSPermGen total 21504K, used 12023K [0x00000000d5a00000, 0x00000000d6f00000, 0x00000000e0000000)
object space 21504K, 55% used [0x00000000d5a00000,0x00000000d65bde38,0x00000000d6f00000)
答案 0 :(得分:2)
嗯,不是答案,而是一些需要空间的提醒:
1)您正在使用ArrayList,从零容量开始......它必须经常重新调整其缓冲区大小,因为arraylist大小增长如下:0 -> 1 -> 2 -> 4 -> 7 -> 11 -> 17
。
因此,而不是List<String> valueList = new ArrayList<String>(0);
使用List<String> headerList = new ArrayList<String>(columnCount);
而不是valueList = new ArrayList<String>(0);
使用valueList.clear();
2)你应该使用resource-try-catch,更容易处理。
3)代码 - 或者更确切地说:幕后 - 是很多解析...解析SQL结果,然后解析&#39;数据(必须以CSV兼容方式转义)。不要小看这个!更重要的是,如果此CSVPrinter执行一些额外的格式设置,例如额外的间距以使文本文件看起来像表格等等。
4)此外,Apache库的速度和资源效率都不知道!
5)手动刷新(如评论中所述)不利于表现!
6)从它的外观看,你的方法writeResultSetToFile()
是从循环中调用的。我不知道,但是如果你告诉我们约3%的使用率&#39;这听起来像一个持续的任务,即一个循环。因此,SUPPOSING有一个循环,并且该循环直接负责3%的CPU使用率,它似乎每秒循环几千次。
如果您现在 - 在每次调用时 - 检索并存储两百万行文本,这将减慢整个程序的速度。相当。
也许你不应该经常写这个文件?也许每分钟一次就足够了?但是,通常情况下,您可能会考虑使用与主循环并行运行的解耦线程。