Protobuf-net与BinarySerializer序列化列表<t>大小</t>

时间:2013-01-28 12:53:50

标签: .net protobuf-net

我已开始使用protobuf-net库。它提高了30%的序列化速度,但是我遇到了文件大小的问题。

我的数据模型是:

    [Serializable]
    [ProtoContract(SkipConstructor = true)]
    private class ReportDataItem
    {
        [ProtoMember(1)]
        public Int32 C11 { get; set; }
        [ProtoMember(2)]
        public Int32 C12 { get; set; }
        [ProtoMember(3)]
        public Int32 C13 { get; set; }
        [ProtoMember(4)]
        public Int32 C14 { get; set; }
        [ProtoMember(5)]
        public Int32 C15 { get; set; }
        [ProtoMember(6)]
        public Int32 C16 { get; set; }
        [ProtoMember(7)]
        public Int32 C17 { get; set; }
        [ProtoMember(8)]
        public Int32 C18 { get; set; }
        [ProtoMember(9)]
        public Int32 C19 { get; set; }
        [ProtoMember(10)]
        public Int32 C110 { get; set; }

        [ProtoMember(11)]
        public Int64 C21 { get; set; }
        [ProtoMember(12)]
        public Int64 C22 { get; set; }
        [ProtoMember(13)]
        public Int64 C23 { get; set; }
        [ProtoMember(14)]
        public Int64 C24 { get; set; }
        [ProtoMember(15)]
        public Int64 C25 { get; set; }
        [ProtoMember(16)]
        public Int64 C26 { get; set; }
        [ProtoMember(17)]
        public Int64 C27 { get; set; }
        [ProtoMember(18)]
        public Int64 C28 { get; set; }
        [ProtoMember(19)]
        public Int64 C29 { get; set; }
        [ProtoMember(20)]
        public Int64 C210 { get; set; }

        [ProtoMember(21)]
        public String C31 { get; set; }
        [ProtoMember(22)]
        public String C32 { get; set; }
        [ProtoMember(23)]
        public String C33 { get; set; }
        [ProtoMember(24)]
        public String C34 { get; set; }
        [ProtoMember(25)]
        public String C35 { get; set; }
        [ProtoMember(26)]
        public String C36 { get; set; }
        [ProtoMember(27)]
        public String C37 { get; set; }
        [ProtoMember(28)]
        public String C38 { get; set; }
        [ProtoMember(29)]
        public String C39 { get; set; }
        [ProtoMember(30)]
        public String C310 { get; set; }
    }

    [Serializable]
    [ProtoContract()]
    private class ReportData
    {
        [ProtoMember(1, DataFormat = DataFormat.Group)]
        public List<ReportDataItem> ReportDataItems { get; set; }
    }

    [Serializable]
    [ProtoContract()]
    private class Report
    {
        [ProtoMember(1)]
        public ReportData ReportData { get; set; }
    }

所以当我尝试序列化时:

    private static void ObjectSerialization()
    {

const string someData = @“qtwretyfsjdabvfsjdlfudspogds; kfg; lkfdsl; gkl; dsfkgl; kdfsgr; iweprpo \ z \ xlvcfmxzcbvjiorsdifdlf \ jl; dsa”;

            Report report = new Report();
            report.ReportData = new ReportData {ReportDataItems = new List<ReportDataItem>()};

            for (int j = 0; j < 10; j++)
            {
                ReportDataItem reportDataItem = new ReportDataItem();

                reportDataItem.C11 = j;
                reportDataItem.C12 = j;
                reportDataItem.C13 = j;
                reportDataItem.C14 = j;
                reportDataItem.C15 = j;
                reportDataItem.C16 = j;
                reportDataItem.C17 = j;
                reportDataItem.C18 = j;
                reportDataItem.C19 = j;
                reportDataItem.C110 = j;

                reportDataItem.C21 = j;
                reportDataItem.C22 = j;
                reportDataItem.C23 = j;
                reportDataItem.C24 = j;
                reportDataItem.C25 = j;
                reportDataItem.C26 = j;
                reportDataItem.C27 = j;
                reportDataItem.C28 = j;
                reportDataItem.C29 = j;
                reportDataItem.C210 = j;

                reportDataItem.C31 =someData;
                reportDataItem.C32 = someData;
                reportDataItem.C33 = someData;
                reportDataItem.C34 = someData;
                reportDataItem.C35 = someData;
                reportDataItem.C36 = someData;
                reportDataItem.C37 = someData;
                reportDataItem.C38 = someData;
                reportDataItem.C39 = someData;
                reportDataItem.C310 = someData;

                report.ReportData.ReportDataItems.Add(reportDataItem);
            }

            using (Stream stream = new FileStream(@"c:\Test\Object\0.bin", FileMode.Create, FileAccess.Write, FileShare.Write))
            {
                Serializer.Serialize(stream, report);
            }

            using (Stream stream = new FileStream(@"c:\Test\Object\bf_0.bin", FileMode.Create, FileAccess.Write, FileShare.Write))
            {
                BinaryFormatter formatter = new BinaryFormatter();
                formatter.Serialize(stream, report);
            }
}

我的结果如下:

  • protobuf-net 文件大小 10428字节
  • BinaryFormatter 文件大小 3458字节

你能帮我找到一个合适的解决方案来减少结果protobuf-net文件的大小。 Protobuf-net我是从VS Package Manager安装的一个软件包。

1 个答案:

答案 0 :(得分:2)

我将最后几行更改为:

using (Stream stream = new FileStream(@"pb.bin", FileMode.Create,
     FileAccess.Write, FileShare.Write))
{
    Serializer.Serialize(stream, report);
    Console.WriteLine(stream.Length);
}
Console.WriteLine(new FileInfo("pb.bin").Length);

using (Stream stream = new FileStream(@"bf.bin", FileMode.Create,
     FileAccess.Write, FileShare.Write))
{
    BinaryFormatter formatter = new BinaryFormatter();
    formatter.Serialize(stream, report);
    Console.WriteLine(stream.Length);
}
Console.WriteLine(new FileInfo("bf.bin").Length);

获取写入流的数据量以及文件的最终大小。我的结果:

1628
1628
3144
3144

这对我来说很好看。请验证您的数据。

您是否可能使用比“某些数据”更大的字符串?如果是这样,那么有一个重要问题:您是否可能在实际代码中复制字符串?如果不是,则BF测试无效,因为默认情况下它将使用引用跟踪,因此只存储一次字符串 - 但您的实际数据的行为会有很大不同。如果 将多次使用相同的字符串,那么您可以在protobuf-net中模仿这种重复使用:

[ProtoMember(21, AsReference=true)]
public String C31 { get; set; }
[ProtoMember(22, AsReference = true)]
public String C32 { get; set; }
[ProtoMember(23, AsReference = true)]
public String C33 { get; set; }
[ProtoMember(24, AsReference = true)]
public String C34 { get; set; }
[ProtoMember(25, AsReference = true)]
public String C35 { get; set; }
[ProtoMember(26, AsReference = true)]
public String C36 { get; set; }
[ProtoMember(27, AsReference = true)]
public String C37 { get; set; }
[ProtoMember(28, AsReference = true)]
public String C38 { get; set; }
[ProtoMember(29, AsReference = true)]
public String C39 { get; set; }
[ProtoMember(30, AsReference = true)]
public String C310 { get; set; }

现在输出:

939
939
3144
3144

然而!如果字符串通常重复,这将略微增加输出,并且将使其他protobuf实现难以使用它(它是有效的protobuf数据,但通过一些巫术)。

例如,如果你有自定义名称/国家/地区名称/状态等代表字符串但重复批次的内容,则上述内容非常有用。