这是我的代码:
package hadoopPlayground;
import java.io.BufferedReader;
import org.apache.avro.Schema;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.commons.io.IOUtils;
public class json2AVRO {
public static void main( String[] args ) throws Exception
{
String filename = "ds214_arrivi_mensili.json";
File JSONFile = new File(filename);
String filename2 = "ds214_arrivi_mensili.avsc";
File AVSCFile = new File(filename2);
BufferedReader read = new BufferedReader(new FileReader(JSONFile));
BufferedReader read2 = new BufferedReader(new FileReader(AVSCFile));
String outputName = JSONFile.toString().substring(0,
JSONFile.toString().lastIndexOf(".")) + ".avro";
String json = org.apache.commons.io.IOUtils.toString(read);
String schema = org.apache.commons.io.IOUtils.toString(read2);
InputStream input;
Encoder encoder;
ByteArrayOutputStream output;
Schema schema1 = new Schema.Parser().parse(AVSCFile);
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema1);
input = new ByteArrayInputStream(json.getBytes());
output = new ByteArrayOutputStream();
DataInputStream din = new DataInputStream(input);
Decoder decoder = DecoderFactory.get().jsonDecoder(schema1, din);
System.out.println(decoder);
encoder = EncoderFactory.get().binaryEncoder(output, null);
GenericRecord datum;
GenericDatumWriter<GenericRecord> writer1 = new GenericDatumWriter<GenericRecord>(schema1);
File file= new File(outputName);
DataFileWriter<GenericRecord> dataWriter = new DataFileWriter<GenericRecord>(writer1);
dataWriter.create(schema1, file);
try
{
for (int i = 0; i < json.length(); i++) {
datum = reader.read(null, decoder);
dataWriter.append(datum);
System.out.println(datum);
output.close();
}
}
catch (IOException e)
{
}
finally
{
//Here is the flushing and closing
try
{
if (encoder != null)
{
encoder.flush();
}
if (output != null)
{
output.close();
}
} catch (IOException e)
{
throw new RuntimeException(e);
}
}
}
}
该文件正确(显然)已转换为AVRO,但是当我尝试从终端将AVRO转换为JSON时,它仅显示2016年预期的1692行...这是怎么回事?
我已经检查了JSON并将其从终端转换为AVRO,我没有任何问题。在第1692行处没有奇怪的符号。