我尝试使用rocksdb来缓存ProcessFunction所需的信息,并且以下似乎是让它到目前为止工作的唯一方法:
(1)从数据存储区加载数据(例如mysql)并将数据放入rocksdb然后在open()中关闭rocksdb句柄。
(2)open&每当调用processElement()时关闭rocksdb句柄。
像这样:public static class MatchFunction extends ProcessFunction<TaxiRide, TaxiRide> {
// keyed, managed state
// holds an END event if the ride has ended, otherwise a START event
private ValueState<TaxiRide> rideState;
private RocksDB rocksdb = null;
private String dbPath = null;
@Override
public void close() throws Exception {
super.close();
if(rocksdb != null) {
rocksdb.close();
}
}
@Override
public void open(Configuration config) {
ValueStateDescriptor<TaxiRide> startDescriptor =
new ValueStateDescriptor<>("saved ride", TaxiRide.class);
rideState = getRuntimeContext().getState(startDescriptor);
if(rocksdb == null) {
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
Connection connect = null;
PreparedStatement preparedStatement = null;
ResultSet resultSet = null;
try {
connect = DriverManager
.getConnection("jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&"
+ "user=user&password=password");
preparedStatement = connect.prepareStatement("select * from test.feature");
resultSet = preparedStatement.executeQuery();
RocksDB.loadLibrary();
try (final Options options = new Options().setCreateIfMissing(true)) {
// a factory method that returns a RocksDB instance
dbPath = "/tmp/checkpoints/rocksdb/test01_" + UUID.randomUUID();
try (final RocksDB db = RocksDB.open(options, dbPath)) {
rocksdb = db;
System.out.println("db opened: " + dbPath);
String key01 = "key01";
String val01 = "val01";
while (resultSet.next()) {
key01 = resultSet.getString(1);
val01 = resultSet.getString(2);
System.out.println("before put " + key01 + ":" + val01);
rocksdb.put(key01.getBytes(), val01.getBytes());
System.out.println("after put " + key01 + ":" + val01);
}
}
} catch (RocksDBException e) {
// do some error handling
e.printStackTrace();
} finally {
if(rocksdb != null) {
rocksdb.close();
System.out.println("db closed: " + dbPath);
}
}
} catch (SQLException e) {
e.printStackTrace();
} finally {
if(resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if(preparedStatement != null) {
try {
preparedStatement.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if(connect != null) {
try {
connect.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
}
@Override
public void processElement(TaxiRide ride, Context context, Collector<TaxiRide> out) throws Exception {
TimerService timerService = context.timerService();
try (final Options options = new Options().setCreateIfMissing(true)) {
// a factory method that returns a RocksDB instance
try (final RocksDB db = RocksDB.open(options, dbPath)) {
rocksdb = db;
// System.out.println("db opened: " + dbPath);
String val01 = new String(rocksdb.get("f8416af7-b895-4f28-bcea-be1eef6bbdb2".getBytes()));
// System.out.println(">>> val01 = " + val01);
rocksdb.close();
// System.out.println("db closed: " + dbPath);
}
} catch (RocksDBException e) {
// do some error handling
e.printStackTrace();
}
if (ride.isStart) {
// the matching END might have arrived first (out of order); don't overwrite it
if (rideState.value() == null) {
rideState.update(ride);
}
} else {
rideState.update(ride);
}
timerService.registerEventTimeTimer(ride.getEventTime() + 120 * 60 * 1000);
}
@Override
public void onTimer(long timestamp, OnTimerContext context, Collector<TaxiRide> out) throws Exception {
TaxiRide savedRide = rideState.value();
if (savedRide != null && savedRide.isStart) {
out.collect(savedRide);
}
rideState.clear();
}
}
由于在processElement()中发生了大量IO,因此效率非常低。此ProcessFunction能够在10分钟内处理所有数据,在添加rocksdb相关行后处理部分数据需要40多分钟。所以我尝试使用以下实现重新使用在open()中创建的rocksdb处理。
public static class MatchFunction extends ProcessFunction<TaxiRide, TaxiRide> {
// keyed, managed state
// holds an END event if the ride has ended, otherwise a START event
private ValueState<TaxiRide> rideState;
private RocksDB rocksdb = null;
private String dbPath = null;
@Override
public void close() throws Exception {
super.close();
if(rocksdb != null) {
rocksdb.close();
}
}
@Override
public void open(Configuration config) {
ValueStateDescriptor<TaxiRide> startDescriptor =
new ValueStateDescriptor<>("saved ride", TaxiRide.class);
rideState = getRuntimeContext().getState(startDescriptor);
if(rocksdb == null) {
try {
Class.forName("com.mysql.jdbc.Driver");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
Connection connect = null;
PreparedStatement preparedStatement = null;
ResultSet resultSet = null;
try {
connect = DriverManager
.getConnection("jdbc:mysql://127.0.0.1:3306/test?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&"
+ "user=user&password=password");
preparedStatement = connect.prepareStatement("select * from test.feature");
resultSet = preparedStatement.executeQuery();
RocksDB.loadLibrary();
try (final Options options = new Options().setCreateIfMissing(true)) {
// a factory method that returns a RocksDB instance
dbPath = "/tmp/checkpoints/rocksdb/test01_" + UUID.randomUUID();
try (final RocksDB db = RocksDB.open(options, dbPath)) {
rocksdb = db;
System.out.println("db opened: " + dbPath);
String key01 = "key01";
String val01 = "val01";
while (resultSet.next()) {
key01 = resultSet.getString(1);
val01 = resultSet.getString(2);
System.out.println("before put " + key01 + ":" + val01);
rocksdb.put(key01.getBytes(), val01.getBytes());
System.out.println("after put " + key01 + ":" + val01);
}
}
} catch (RocksDBException e) {
// do some error handling
e.printStackTrace();
} finally {
// if(rocksdb != null) {
// rocksdb.close();
// System.out.println("db closed: " + dbPath);
// }
}
} catch (SQLException e) {
e.printStackTrace();
} finally {
if(resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if(preparedStatement != null) {
try {
preparedStatement.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if(connect != null) {
try {
connect.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
}
@Override
public void processElement(TaxiRide ride, Context context, Collector<TaxiRide> out) throws Exception {
TimerService timerService = context.timerService();
//try (final Options options = new Options().setCreateIfMissing(true)) {
// // a factory method that returns a RocksDB instance
// try (final RocksDB db = RocksDB.open(options, dbPath)) {
// rocksdb = db;
// System.out.println("db opened: " + dbPath);
String val01 = new String(rocksdb.get("f8416af7-b895-4f28-bcea-be1eef6bbdb2".getBytes()));
// System.out.println(">>> val01 = " + val01);
// rocksdb.close();
// System.out.println("db closed: " + dbPath);
// }
//} catch (RocksDBException e) {
// // do some error handling
// e.printStackTrace();
//}
if (ride.isStart) {
// the matching END might have arrived first (out of order); don't overwrite it
if (rideState.value() == null) {
rideState.update(ride);
}
} else {
rideState.update(ride);
}
timerService.registerEventTimeTimer(ride.getEventTime() + 120 * 60 * 1000);
}
@Override
public void onTimer(long timestamp, OnTimerContext context, Collector<TaxiRide> out) throws Exception {
TaxiRide savedRide = rideState.value();
if (savedRide != null && savedRide.isStart) {
out.collect(savedRide);
}
rideState.clear();
}
}
此实现的问题在于它不起作用,这是我收到的错误消息:
#
# A fatal error has been detected by the Java Runtime Environment:
#
# SIGSEGV (0xb) at pc=0x000000012c94cf55, pid=64626, tid=39683
#
# JRE version: Java(TM) SE Runtime Environment (8.0_60-b27) (build 1.8.0_60-b27)
# Java VM: Java HotSpot(TM) 64-Bit Server VM (25.60-b23 mixed mode bsd-amd64 compressed oops)
# Problematic frame:
# [thread 39171 also had an error]
06:52:56.163 [pool-11-thread-1] INFO o.a.flink.contrib.streaming.state.RocksDBKeyedStateBackend - Asynchronous RocksDB snapshot (File Stream Factory @ file:/tmp/checkpoints/53224270d2f2be67a9d20f9deac66d09, asynchronous part) in thread Thread[pool-11-thread-1,5,Flink Task Threads] took 10 ms.
06:52:56.163 [pool-16-thread-1] INFO o.a.flink.contrib.streaming.state.RocksDBKeyedStateBackend - Asynchronous RocksDB snapshot (File Stream Factory @ file:/tmp/checkpoints/53224270d2f2be67a9d20f9deac66d09, asynchronous part) in thread Thread[pool-16-thread-1,5,Flink Task Threads] took 12 ms.
C06:52:56.163 [pool-13-thread-1] INFO o.a.flink.contrib.streaming.state.RocksDBKeyedStateBackend - Asynchronous RocksDB snapshot (File Stream Factory @ file:/tmp/checkpoints/53224270d2f2be67a9d20f9deac66d09, asynchronous part) in thread Thread[pool-13-thread-1,5,Flink Task Threads] took 9 ms.
[librocksdbjni-osx.jnilib+0x3ff55] _Z18rocksdb_get_helperP7JNIEnv_PN7rocksdb2DBERKNS1_11ReadOptionsEPNS1_18ColumnFamilyHandleEP11_jbyteArrayii+0xe5
#
# Failed to write core dump. Core dumps have been disabled. To enable core dumping, try "ulimit -c unlimited" before starting Java again
#
06:52:56.163 [pool-12-thread-1] INFO o.a.flink.contrib.streaming.state.RocksDBKeyedStateBackend - Asynchronous RocksDB snapshot (File Stream Factory @ file:/tmp/checkpoints/53224270d2f2be67a9d20f9deac66d09, asynchronous part) in thread Thread[pool-12-thread-1,5,Flink Task Threads] took 13 ms.
[thread 46339 also had an error]
# An error report file with more information is saved as:
# /Users/abc/MyFiles/workspace/flink-java-project/hs_err_pid64626.log
[thread 22279 also had an error]
[thread 33027 also had an error]
#
# If you would like to submit a bug report, please visit:
# http://bugreport.java.com/bugreport/crash.jsp
# The crash happened outside the Java Virtual Machine in native code.
# See problematic frame for where to report the bug.
#
Process finished with exit code 134 (interrupted by signal 6: SIGABRT)
详细信息来自&#34; /Users/abc/MyFiles/workspace/flink-java-project/hs_err_pid64626.log"可以在此链接中找到(http://memyselfandtaco.blogspot.tw/2018/04/how-to-correctly-access-rocksdb-in.html)