我创建了一些Avro文件。我可以使用以下命令将它们转换为json,只是检查文件是否正常
java -jar avro-tools-1.8.2.jar tojson FileName.avro>outputfilename.json
现在,我有一些大的avro文件,我想上传到REST API,但是它有大小限制,因此我试图使用流将其分批上传。
下面的示例只是从原始文件中分块读取并复制到另一个avro文件中,可以完美地创建文件
using System;
using System.IO;
class Test
{
public static void Main()
{
// Specify a file to read from and to create.
string pathSource = @"D:\BDS\AVRO\filename.avro";
string pathNew = @"D:\BDS\AVRO\test\filenamenew.avro";
try
{
using (FileStream fsSource = new FileStream(pathSource,
FileMode.Open, FileAccess.Read))
{
byte[] buffer = new byte[(20 * 1024 * 1024) + 100];
long numBytesToRead = (int)fsSource.Length;
int numBytesRead = 0;
using (FileStream fsNew = new FileStream(pathNew,
FileMode.Append, FileAccess.Write))
{
// Read the source file into a byte array.
//byte[] bytes = new byte[fsSource.Length];
//int numBytesToRead = (int)fsSource.Length;
//int numBytesRead = 0;
while (numBytesToRead > 0)
{
int bytesRead = fsSource.Read(buffer, 0, buffer.Length);
byte[] actualbytes = new byte[bytesRead];
Array.Copy(buffer, actualbytes, bytesRead);
// Read may return anything from 0 to numBytesToRead.
// Break when the end of the file is reached.
if (bytesRead == 0)
break;
numBytesRead += bytesRead;
numBytesToRead -= bytesRead;
fsNew.Write(actualbytes, 0, actualbytes.Length);
}
}
}
// Write the byte array to the other FileStream.
}
catch (FileNotFoundException ioEx)
{
Console.WriteLine(ioEx.Message);
}
}
}
我怎么知道这会创建一个好的Avro。因为先前的命令可以转换为json,所以再次可以正常工作
java -jar avro-tools-1.8.2.jar tojson filenamenew.avro>outputfilename.json
但是,当我使用相同的代码,而不是复制到另一个文件时,只需调用rest api,文件就会被上传,但是从服务器下载相同的文件并运行上面的命令以转换为json时- “不是数据文件”。
因此,显然有些东西已损坏,我正在努力找出原因。
这是代码段
string filenamefullyqualified = path + filename;
Stream stream = System.IO.File.Open(filenamefullyqualified, FileMode.Open, FileAccess.Read, FileShare.None);
long? position = 0;
byte[] buffer = new byte[(20 * 1024 * 1024) + 100];
long numBytesToRead = stream.Length;
int numBytesRead = 0;
do
{
var content = new MultipartFormDataContent();
int bytesRead = stream.Read(buffer, 0, buffer.Length);
byte[] actualbytes = new byte[bytesRead];
Array.Copy(buffer, actualbytes, bytesRead);
if (bytesRead == 0)
break;
//Append Data
url = String.Format("https://{0}.dfs.core.windows.net/raw/datawarehouse/{1}/{2}/{3}/{4}/{5}?action=append&position={6}", datalakeName, filename.Substring(0, filename.IndexOf("_")), year, month, day, filename, position.ToString());
numBytesRead += bytesRead;
numBytesToRead -= bytesRead;
ByteArrayContent byteContent = new ByteArrayContent(actualbytes);
content.Add(byteContent);
method = new HttpMethod("PATCH");
request = new HttpRequestMessage(method, url)
{
Content = content
};
request.Headers.Add("Authorization", "Bearer " + accesstoken);
var response = await client.SendAsync(request);
response.EnsureSuccessStatusCode();
position = position + request.Content.Headers.ContentLength;
Array.Clear(buffer, 0, buffer.Length);
} while (numBytesToRead > 0);
stream.Close();
我浏览了论坛的话题,但是没有发现任何与拆分avro文件有关的事情。
我有一种直觉,认为我对http请求的“内容”不正确。我想念的是什么?
如果您需要更多详细信息,我们将很乐意提供。
答案 0 :(得分:0)
我现在发现了问题。问题是由于MultipartFormDataContent。以此上传avro文件时,它会添加额外的文本,例如content Type等,并删除许多行(我不知道为什么)。
因此,解决方案是将内容本身作为“ ByteArrayContent”上传,而不是像我之前所做的那样将其添加到MultipartFormDataContent。
这是代码段,与问题中的代码段几乎类似,除了我不再使用MultipartFormDataContent
string filenamefullyqualified = path + filename;
Stream stream = System.IO.File.Open(filenamefullyqualified, FileMode.Open, FileAccess.Read, FileShare.None);
//content.Add(CreateFileContent(fs, path, filename, "text/plain"));
long? position = 0;
byte[] buffer = new byte[(20 * 1024 * 1024) + 100];
long numBytesToRead = stream.Length;
int numBytesRead = 0;
//while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0)
//{
do
{
//var content = new MultipartFormDataContent();
int bytesRead = stream.Read(buffer, 0, buffer.Length);
byte[] actualbytes = new byte[bytesRead];
Array.Copy(buffer, actualbytes, bytesRead);
if (bytesRead == 0)
break;
//Append Data
url = String.Format("https://{0}.dfs.core.windows.net/raw/datawarehouse/{1}/{2}/{3}/{4}/{5}?action=append&position={6}", datalakeName, filename.Substring(0, filename.IndexOf("_")), year, month, day, filename, position.ToString());
numBytesRead += bytesRead;
numBytesToRead -= bytesRead;
ByteArrayContent byteContent = new ByteArrayContent(actualbytes);
//byteContent.Headers.ContentType= new MediaTypeHeaderValue("text/plain");
//content.Add(byteContent);
method = new HttpMethod("PATCH");
//request = new HttpRequestMessage(method, url)
//{
// Content = content
//};
request = new HttpRequestMessage(method, url)
{
Content = byteContent
};
request.Headers.Add("Authorization", "Bearer " + accesstoken);
var response = await client.SendAsync(request);
response.EnsureSuccessStatusCode();
position = position + request.Content.Headers.ContentLength;
Array.Clear(buffer, 0, buffer.Length);
} while (numBytesToRead > 0);
stream.Close();