Question

我创建了一些Avro文件。我可以使用以下命令将它们转换为json，只是检查文件是否正常

java -jar avro-tools-1.8.2.jar tojson FileName.avro>outputfilename.json

现在，我有一些大的avro文件，我想上传到REST API，但是它有大小限制，因此我试图使用流将其分批上传。

下面的示例只是从原始文件中分块读取并复制到另一个avro文件中，可以完美地创建文件

using System;
using System.IO;

class Test
{

    public static void Main()
    {
        // Specify a file to read from and to create.
        string pathSource = @"D:\BDS\AVRO\filename.avro";
        string pathNew = @"D:\BDS\AVRO\test\filenamenew.avro";

        try
        {

            using (FileStream fsSource = new FileStream(pathSource,
                FileMode.Open, FileAccess.Read))
            {
                byte[] buffer = new byte[(20 * 1024 * 1024) + 100];
                long numBytesToRead = (int)fsSource.Length;
                int numBytesRead = 0;
                using (FileStream fsNew = new FileStream(pathNew,
                    FileMode.Append, FileAccess.Write))
                {

                    // Read the source file into a byte array.
                    //byte[] bytes = new byte[fsSource.Length];
                    //int numBytesToRead = (int)fsSource.Length;
                    //int numBytesRead = 0;
                    while (numBytesToRead > 0)
                    {

                        int bytesRead = fsSource.Read(buffer, 0, buffer.Length);
                        byte[] actualbytes = new byte[bytesRead];

                        Array.Copy(buffer, actualbytes, bytesRead);
                        // Read may return anything from 0 to numBytesToRead.


                        // Break when the end of the file is reached.
                        if (bytesRead == 0)
                            break;

                        numBytesRead += bytesRead;
                        numBytesToRead -= bytesRead;



                        fsNew.Write(actualbytes, 0, actualbytes.Length);
                    }

                }
            }

                // Write the byte array to the other FileStream.


        }
        catch (FileNotFoundException ioEx)
        {
            Console.WriteLine(ioEx.Message);
        }
    }
}

我怎么知道这会创建一个好的Avro。因为先前的命令可以转换为json，所以再次可以正常工作

java -jar avro-tools-1.8.2.jar tojson filenamenew.avro>outputfilename.json

但是，当我使用相同的代码，而不是复制到另一个文件时，只需调用rest api，文件就会被上传，但是从服务器下载相同的文件并运行上面的命令以转换为json时- “不是数据文件”。

因此，显然有些东西已损坏，我正在努力找出原因。

这是代码段

 string filenamefullyqualified = path + filename;
            Stream stream = System.IO.File.Open(filenamefullyqualified, FileMode.Open, FileAccess.Read, FileShare.None);


            long? position = 0;

            byte[] buffer = new byte[(20 * 1024 * 1024) + 100];
            long numBytesToRead = stream.Length;
            int numBytesRead = 0;



            do
            {

                var content = new MultipartFormDataContent();
                int bytesRead = stream.Read(buffer, 0, buffer.Length);
                byte[] actualbytes = new byte[bytesRead];

                Array.Copy(buffer, actualbytes, bytesRead);

                if (bytesRead == 0)
                    break;

                //Append Data
                url = String.Format("https://{0}.dfs.core.windows.net/raw/datawarehouse/{1}/{2}/{3}/{4}/{5}?action=append&position={6}", datalakeName, filename.Substring(0, filename.IndexOf("_")), year, month, day, filename, position.ToString());
                numBytesRead += bytesRead;
                numBytesToRead -= bytesRead;

                ByteArrayContent byteContent = new ByteArrayContent(actualbytes);
                content.Add(byteContent);


                method = new HttpMethod("PATCH");

                request = new HttpRequestMessage(method, url)
                {
                    Content = content
                };


                request.Headers.Add("Authorization", "Bearer " + accesstoken);



                var response = await client.SendAsync(request);
                response.EnsureSuccessStatusCode();

                position = position + request.Content.Headers.ContentLength;

                Array.Clear(buffer, 0, buffer.Length);




            } while (numBytesToRead > 0);
            stream.Close();

我浏览了论坛的话题，但是没有发现任何与拆分avro文件有关的事情。

我有一种直觉，认为我对http请求的“内容”不正确。我想念的是什么？

如果您需要更多详细信息，我们将很乐意提供。

Answer 1

我现在发现了问题。问题是由于MultipartFormDataContent。以此上传avro文件时，它会添加额外的文本，例如content Type等，并删除许多行（我不知道为什么）。

因此，解决方案是将内容本身作为“ ByteArrayContent”上传，而不是像我之前所做的那样将其添加到MultipartFormDataContent。

这是代码段，与问题中的代码段几乎类似，除了我不再使用MultipartFormDataContent

            string filenamefullyqualified = path + filename;
            Stream stream = System.IO.File.Open(filenamefullyqualified, FileMode.Open, FileAccess.Read, FileShare.None);
            //content.Add(CreateFileContent(fs, path, filename, "text/plain"));


            long? position = 0;

            byte[] buffer = new byte[(20 * 1024 * 1024) + 100];
            long numBytesToRead = stream.Length;
            int numBytesRead = 0;


            //while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0)
            //{
            do
            {

                //var content = new MultipartFormDataContent();

                int bytesRead = stream.Read(buffer, 0, buffer.Length);
                byte[] actualbytes = new byte[bytesRead];

                Array.Copy(buffer, actualbytes, bytesRead);

                if (bytesRead == 0)
                    break;

                //Append Data
                url = String.Format("https://{0}.dfs.core.windows.net/raw/datawarehouse/{1}/{2}/{3}/{4}/{5}?action=append&position={6}", datalakeName, filename.Substring(0, filename.IndexOf("_")), year, month, day, filename, position.ToString());
                numBytesRead += bytesRead;
                numBytesToRead -= bytesRead;

                ByteArrayContent byteContent = new ByteArrayContent(actualbytes);
                //byteContent.Headers.ContentType= new MediaTypeHeaderValue("text/plain");
                //content.Add(byteContent);


                method = new HttpMethod("PATCH");

                //request = new HttpRequestMessage(method, url)
                //{
                //    Content = content
                //};


                request = new HttpRequestMessage(method, url)
                {
                    Content = byteContent
                };

                request.Headers.Add("Authorization", "Bearer " + accesstoken);



                var response = await client.SendAsync(request);
                response.EnsureSuccessStatusCode();

                position = position + request.Content.Headers.ContentLength;

                Array.Clear(buffer, 0, buffer.Length);




            } while (numBytesToRead > 0);
            stream.Close();

分割avro文件并上传到REST

1 个答案: