需要Java程序的帮助,它将avro.avsc模式文件和avrofile作为输入并将它们转换为java中的文本文件。
答案 0 :(得分:0)
这个java代码对我有用希望对别人有用。 import java.io. ; import java.util。;
import org.apache.avro.*;
import org.apache.avro.generic.*;
import org.apache.avro.file.*;
import org.apache.avro.io.*;
public class AvrotoTextFormatter
{
public static void main ( String args[]) throws Exception
{
InputStream in = null;
in = new FileInputStream(args[0]);
BufferedReader br;
BufferedInputStream inStream = new BufferedInputStream(in);
PrintWriter pr1 = new PrintWriter(args[1], "UTF-8");
PrintWriter pr = new PrintWriter(args[2], "UTF-8");
StringTokenizer st;
StringTokenizer st1;
int row_counter = 0;
String header_fields = "";
String content_records = "";
String sCurrentLine = "";
GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
DataFileStream<Object> fileReader = new DataFileStream<Object>(inStream, reader);
pr1.println(fileReader.getSchema().getFields());
pr1.close();
br = new BufferedReader(new java.io.FileReader(args[1]));
while ((sCurrentLine = br.readLine()) != null)
{
st = new StringTokenizer(sCurrentLine," ");
while (st.hasMoreTokens())
{
header_fields = header_fields + st.nextToken() + "|";
st.nextToken();
st.nextToken();
}
}
header_fields = header_fields.substring(1,header_fields.length()-1);
pr.println(header_fields);
File file = new File(args[0]);
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(fileReader.getSchema());
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader);
GenericRecord user = null;
while (dataFileReader.hasNext())
{
content_records = "";
user = dataFileReader.next(user);
st1 = new StringTokenizer(header_fields,"|");
while (st1.hasMoreTokens())
{
content_records = content_records + user.get(st1.nextToken()) + "|";
}
content_records = content_records.substring(0,content_records.length()-1);
pr.println(content_records);
}
fileReader.close();
br.close();
pr.close();
}
}
答案 1 :(得分:0)
找到带有火花的片段来做同样的事情更快更容易。 分享以便对他人有所帮助
import org.apache.avro.mapreduce.AvroKeyInputFormat
import org.apache.avro.mapred.AvroKey
import org.apache.hadoop.io.NullWritable
val avroRdd = sc.newAPIHadoopFile("/sit/data/presentation/bbsbi/alayer/test/000000_0", classOf[AvroKeyInputFormat[String]], classOf[AvroKey[String]], classOf[NullWritable]).keys.map(_.toString)
val n=avroRdd.map(_.split(",").map(_.split(":")(1).trim).map(l=>l.substring(l.indexOf("\"")+1,l.lastIndexOf("\""))).mkString("|"))
n.collect.foreach(println)
答案 2 :(得分:0)
下面代码为我工作
private static JSONArray readJsonFromAvro(String absFilePath)throws IOException,
InterruptedException
{
JSONArray jsonarray = new JSONArray();
InputStream in = new FileInputStream(absFilePath);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BufferedInputStream inStream = new BufferedInputStream(in);
GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
DataFileStream<Object> fileReader = new DataFileStream<Object>(
inStream, reader);
try {
final Schema schema = fileReader.getSchema();
final JsonEncoder encoder = EncoderFactory.get().jsonEncoder(
schema, baos);
for (final Object datum : fileReader) {
//writer.write(datum, encoder);
JSONObject jsonObj = new JSONObject(datum.toString());
jsonarray.put(jsonObj);
}
encoder.flush();
System.out.println();
} finally {
fileReader.close();
}
return jsonarray;
}
答案 3 :(得分:-1)
如何在程序下运行?我的意思是我可以提供3个命令行输入?
import org.apache.avro.*;
import org.apache.avro.generic.*;
import org.apache.avro.file.*;
import org.apache.avro.io.*;
public class AvrotoTextFormatter
{
public static void main ( String args[]) throws Exception
{
InputStream in = null;
in = new FileInputStream(args[0]);
BufferedReader br;
BufferedInputStream inStream = new BufferedInputStream(in);
PrintWriter pr1 = new PrintWriter(args[1], "UTF-8");
PrintWriter pr = new PrintWriter(args[2], "UTF-8");
StringTokenizer st;
StringTokenizer st1;
int row_counter = 0;
String header_fields = "";
String content_records = "";
String sCurrentLine = "";
GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
DataFileStream<Object> fileReader = new DataFileStream<Object>(inStream, reader);
pr1.println(fileReader.getSchema().getFields());
pr1.close();
br = new BufferedReader(new java.io.FileReader(args[1]));
while ((sCurrentLine = br.readLine()) != null)
{
st = new StringTokenizer(sCurrentLine," ");
while (st.hasMoreTokens())
{
header_fields = header_fields + st.nextToken() + "|";
st.nextToken();
st.nextToken();
}
}
header_fields = header_fields.substring(1,header_fields.length()-1);
pr.println(header_fields);
File file = new File(args[0]);
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(fileReader.getSchema());
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader);
GenericRecord user = null;
while (dataFileReader.hasNext())
{
content_records = "";
user = dataFileReader.next(user);
st1 = new StringTokenizer(header_fields,"|");
while (st1.hasMoreTokens())
{
content_records = content_records + user.get(st1.nextToken()) + "|";
}
content_records = content_records.substring(0,content_records.length()-1);
pr.println(content_records);
}
fileReader.close();
br.close();
pr.close();
}
}