我为BucketingSink创建了一个作家。接收器和写入器工作没有错误,但是当写入avro genericrecord写入镶木地板时,该文件是从正在进行中创建的,等待完成。但是文件是空的,有0个字节。谁能告诉我代码有什么问题?我已经尝试将AvroParquetWriter的初始化放在open()方法中,但结果仍然相同。
调试代码时,我确认writer.write(element)执行了,元素包含avro genericrecord数据
流数据
BucketingSink<DataEventRecord> sink =
new BucketingSink<DataEventRecord>("hdfs://localhost:9000/tmp/");
sink.setBucketer(new DateTimeBucketer<DataEventRecord>("yyyy-MM-dd--HHmm"));
sink.setWriter(new ParquetSinkWriter<DataEventRecord>());
ParquetSinkWriter
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.flink.streaming.connectors.fs.StreamWriterBase;
import org.apache.flink.streaming.connectors.fs.Writer;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import com.any.DataEventRecord;
public class ParquetSinkWriter<T> extends StreamWriterBase<T> {
private transient ParquetWriter<GenericRecord> writer;
private Path path;
private FileSystem fs;
private final CompressionCodecName compressionCodecName = CompressionCodecName.SNAPPY;
private final int blockSize = 256 * 1024 * 1024;
private final int pageSize = 64 * 1024;
@Override
// workaround
public void open(FileSystem fs, Path path) throws IOException {
super.open(fs, path);
this.path = path;
this.fs = fs;
}
@Override
public void write(T event) throws IOException {
DataEventRecord element = (DataEventRecord) event;
if (writer == null) {
writer = new AvroParquetWriter<GenericRecord>(this.path, element.getSchema(), compressionCodecName, blockSize, pageSize);
}
if (writer != null) {
GenericRecord datum = element.getRecord();
writer.write(datum);
}
}
@Override
public void close() throws IOException {
if (writer != null) {
writer.close();
}
super.close();
}
@Override
public Writer<T> duplicate() {
return new ParquetSinkWriter<T>();
}
}
答案 0 :(得分:1)
直接实施Writer
应该看起来像
import org.apache.flink.util.Preconditions;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import java.io.IOException;
/**
* Parquet writer.
*
* @param <T>
*/
public class ParquetSinkWriter<T extends GenericRecord> implements Writer<T> {
private static final long serialVersionUID = -975302556515811398L;
private final CompressionCodecName compressionCodecName = CompressionCodecName.SNAPPY;
private final int pageSize = 64 * 1024;
private final String schemaRepresentation;
private transient Schema schema;
private transient ParquetWriter<GenericRecord> writer;
private transient Path path;
private int position;
public ParquetSinkWriter(String schemaRepresentation) {
this.schemaRepresentation = Preconditions.checkNotNull(schemaRepresentation);
}
@Override
public void open(FileSystem fs, Path path) throws IOException {
this.position = 0;
this.path = path;
if (writer != null) {
writer.close();
}
writer = createWriter();
}
@Override
public long flush() throws IOException {
Preconditions.checkNotNull(writer);
position += writer.getDataSize();
writer.close();
writer = createWriter();
return position;
}
@Override
public long getPos() throws IOException {
Preconditions.checkNotNull(writer);
return position + writer.getDataSize();
}
@Override
public void close() throws IOException {
if (writer != null) {
writer.close();
writer = null;
}
}
@Override
public void write(T element) throws IOException {
Preconditions.checkNotNull(writer);
writer.write(element);
}
@Override
public Writer<T> duplicate() {
return new ParquetSinkWriter<>(schemaRepresentation);
}
private ParquetWriter<GenericRecord> createWriter() throws IOException {
if (schema == null) {
schema = new Schema.Parser().parse(schemaRepresentation);
}
return AvroParquetWriter.<GenericRecord>builder(path)
.withSchema(schema)
.withDataModel(new GenericData())
.withCompressionCodec(compressionCodecName)
.withPageSize(pageSize)
.build();
}
}