将Json转换为Avro文件时,仅转换了3/4的行,我在做什么错?

时间:2018-10-29 16:05:18

标签: java json hadoop avro

这是我的代码:

package hadoopPlayground;

import java.io.BufferedReader;

import org.apache.avro.Schema;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.commons.io.IOUtils;

public class json2AVRO {
    public static void main( String[] args ) throws Exception
    {
    String filename = "ds214_arrivi_mensili.json";
    File JSONFile = new File(filename);
    String filename2 = "ds214_arrivi_mensili.avsc";
    File AVSCFile = new File(filename2);

    BufferedReader read = new BufferedReader(new FileReader(JSONFile));
    BufferedReader read2 = new BufferedReader(new FileReader(AVSCFile));

    String outputName = JSONFile.toString().substring(0, 
            JSONFile.toString().lastIndexOf(".")) + ".avro"; 

    String json = org.apache.commons.io.IOUtils.toString(read);
    String schema = org.apache.commons.io.IOUtils.toString(read2);

    InputStream input;
    Encoder encoder;
    ByteArrayOutputStream output;

            Schema schema1 = new Schema.Parser().parse(AVSCFile);

            DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema1);

            input = new ByteArrayInputStream(json.getBytes());

            output = new ByteArrayOutputStream();

            DataInputStream din = new DataInputStream(input);

            Decoder decoder = DecoderFactory.get().jsonDecoder(schema1, din);

            System.out.println(decoder);

            encoder = EncoderFactory.get().binaryEncoder(output, null);
            GenericRecord datum;

            GenericDatumWriter<GenericRecord> writer1 = new GenericDatumWriter<GenericRecord>(schema1);

            File file= new File(outputName);

            DataFileWriter<GenericRecord> dataWriter = new DataFileWriter<GenericRecord>(writer1);


            dataWriter.create(schema1, file);
            try
            {
            for (int i = 0; i < json.length(); i++) {
                            datum = reader.read(null, decoder);
                            dataWriter.append(datum);

                            System.out.println(datum);

                            output.close();
                        }                   
            }
                    catch (IOException e)
                    {
            }
            finally
            {
                //Here is the flushing and closing
                try
                {
                    if (encoder != null)
                    {
                        encoder.flush();
                    }
                    if (output != null)
                    {
                        output.close();
                    }
                } catch (IOException e)
                {
                    throw new RuntimeException(e);
                }
            }

            }
        }

该文件正确(显然)已转换为AVRO,但是当我尝试从终端将AVRO转换为JSON时,它仅显示2016年预期的1692行...这是怎么回事?

我已经检查了JSON并将其从终端转换为AVRO,我没有任何问题。在第1692行处没有奇怪的符号。

0 个答案:

没有答案