Question

作为一个研究项目，我们在标准C（BINARY）文件处理库（stdio）之上编写了一个抽象层，为事务处理文件提供了一些额外的功能。

工作流程如下：

用户使用我们的API（或使用标准fopen）打开文件。两者都返回FILE*。文件以BINARY模式打开！
用户使用标准库命令（例如fwrite）
用户使用我们的API在打开的文件上打开一个事务：TRANSACTION a = trans_start(FILE* )
用户为TRANSACTION对象（set_validator(TRANSACTION, int(*)(char*))
用户使用我们自己的API“将”数据写入文件（int trans_write_string(TRANSACTION*, char*, length)
- 实际上这个“写入”将其数据放入上面定义的验证器的内存中，这可能对数据进行操作并在某处设置一些标记......与问题无关。
用户使用trans_commit(TRANSACTION)来实际将数据写入文件。现在，根据验证器设置的标志，这可能不会将数据写入文件，而是向用户报告错误（以编程方式解析......与问题不太相关）。
用户使用标准API fclose关闭文件。

到目前为止，我们只有API的字符串处理方法（trans_write_string），它可以很好地工作。它在内存数据缓冲区中构造自己的，在需要时修改东西，调用验证器等等......在连续调用时，它将新数据附加到其内部内存缓冲区，处理分配等...并且在成功提交时它会写入使用fwrite的数据到文件（是的，这主要是一个C项目，但也不排除C ++答案）。

但是现在我们想（......必须）扩展API以便能够写入数字（16位，32位，64位）并且也浮动......以与标准非常相似的方式C stdio API就是这么做的。使用现有的字符串实现，假设我们在内存中有一个数据缓冲区，它保存N个字符的字符（字符串本身），我们可能需要有2个字节的16位值那么另一个字符串的另外M个字节，64位值的8个字节，16位值的2个字节等等......

我们陷入了“如何在文件中表示一个数字，以便其他人使用不同的计算机/体系结构/ os / endianness”的可读性。

理论上可以通过转换为char（char* addr = &my_16bit_int）来插入内存流中的数字，并将*(addr)和*(addr + 1)放置到所需的地址（即：{{之后} 1}}字符串的字符）并将其写入文件也是可能的，但是如果我想在不同的架构上读取生成的文件，其中endiannes是不同的？如果“其他”计算机只是一块16位古老的金属堆怎么办？在这种情况下，文件中写入的64位值会发生什么？

有什么好的做法可以解决这类问题？

编辑：目标文件必须是二进制文件，并附有描述其格式的文本文件（XML）（例如：N 8字节字符，N 16位值等。）（此文本文件是根据我们心爱的验证器的输出生成的）。验证器“说”类似于YES，我接受这个16位值，不，我拒绝这个长字符串等等......而其他人正在根据这个“输出”创建数据格式XML。

EDIT2 ：是的，我们需要在各种平台上共享文件，甚至是20年前的冰箱尺寸：）

EDIT3 ：是的，我们也需要浮动！

Answer 1

转换是不够的，我认为套接字方法htons和htonl将是int16和int32的充分解决方案。对于int64，你应该自己构建它，因为没有官方方法：

请注意，所有功能仅在需要时反转字节顺序，因此您也可以使用相同的方法将数字“修复”恢复正常。

typedef union{ unsigned char c[2]; unsigned short s; }U2; //you can use the standard htons or this unsigned short htons(unsigned short s) { U2 mask,res; unsigned char* p = (unsigned char*)&s; mask.s = 0x0001; res.c[mask.c[0]] = p[0]; res.c[mask.c[1]] = p[1]; return res.s; } //the same for 4 bytes typedef union{ unsigned char c[4]; unsigned short s[2]; unsigned long l; }U4; //you can use the standard htonl or this unsigned long htonl(unsigned long l) { U4 mask,res; unsigned char* p = (unsigned char*)&l; mask.l = 0x00010203; res.c[mask.c[0]] = p[0]; res.c[mask.c[1]] = p[1]; res.c[mask.c[2]] = p[2]; res.c[mask.c[3]] = p[3]; return res.l; } typedef union{ unsigned char c[8]; unsigned char c2[2][4]; unsigned short s[4]; unsigned long l[2]; unsigned long long ll; }U8; //for int64 you can use the int64 and do the same, or you can to do it with 2*4 like i did //you can give a void pointer as well.. unsigned long long htonll(unsigned long long ll)//void htonll(void* arg, void* result) { U2 mask1; U4 mask2; U8 res; unsigned char* p = (unsigned char*)&ll; //or (unsigned char*)arg mask1.s = 0x0001; mask2.l = 0x00010203; //I didn't use the int64 for convertion res.c2[mask1.c[0]][mask2.c[0]] = p[0]; res.c2[mask1.c[0]][mask2.c[1]] = p[1]; res.c2[mask1.c[0]][mask2.c[2]] = p[2]; res.c2[mask1.c[0]][mask2.c[3]] = p[3]; res.c2[mask1.c[1]][mask2.c[0]] = p[4]; res.c2[mask1.c[1]][mask2.c[1]] = p[5]; res.c2[mask1.c[1]][mask2.c[2]] = p[6]; res.c2[mask1.c[1]][mask2.c[3]] = p[7]; //memcpy(result,res.c,8); return res.ll; } //or if you want to use the htonl: unsigned long long htonll2(unsigned long long ll) { U2 mask1; U8 res; mask1.s = 0x0001; unsigned long* p = (unsigned long*)&ll; res.l[0] = htonl(p[mask1.c[0]]); res.l[1] = htonl(p[mask1.c[1]]); return res.ll; } int main() { unsigned short s = 0x1122; cout<<hex<<htons(s)<<endl; unsigned long l = 0x11223344; cout<<hex<<htonl(l)<<endl; unsigned long long ll=0x1122334455667788; cout<<hex<<htonll(ll)<<endl; cout<<hex<<htonll2(ll)<<endl; return 0; }

Answer 2

您必须定义格式，或选择现有的二进制文件格式如XDR，并读取和写入。因此，例如，到在XDR中写一个32位整数：

void
write32Bits( FILE* dest, uint_least32_t value )
{
    putc( (value >> 24) & 0xFF, dest );
    putc( (value >> 16) & 0xFF, dest );
    putc( (value >>  8) & 0xFF, dest );
    putc( (value      ) & 0xFF, dest );
}

浮点数更复杂，但如果你愿意的话您可以将您的平台限制为支持IEEE float的平台输入双关语float至uint32_t和double至uint64_t，以及将其输出为unsigned int。同样，如果你限制你自己的2位补码机器，32位整数类型，你也可以使用上面的shift和mask程序进行签名值（和整数类型将是uint32_t和 int32_t）。

关于便携性：我认为IEEE是通用的，除了大型机，2的补码是通用的，除了对于非常奇特的大型机。（IBM大型机是2的补充，但不是IEEE。 Unisys大型机不是2的补充，而且没有32位整数类型。我不确定还有什么大型机仍然存在，但在过去，有各种各样的外来物种。）

Answer 3

如果您正在使用glibc，那么您可以将其功能用于le＆lt; - ＆gt;是来自“endian.h”的转换：

SYNOPSIS
   #define _BSD_SOURCE             /* See feature_test_macros(7) */
   #include <endian.h>

   uint16_t htobe16(uint16_t host_16bits);
   uint16_t htole16(uint16_t host_16bits);
   uint16_t be16toh(uint16_t big_endian_16bits);
   uint16_t le16toh(uint16_t little_endian_16bits);

   uint32_t htobe32(uint32_t host_32bits);
   uint32_t htole32(uint32_t host_32bits);
   uint32_t be32toh(uint32_t big_endian_32bits);
   uint32_t le32toh(uint32_t little_endian_32bits);

   uint64_t htobe64(uint64_t host_64bits);
   uint64_t htole64(uint64_t host_64bits);
   uint64_t be64toh(uint64_t big_endian_64bits);
   uint64_t le64toh(uint64_t little_endian_64bits);

如果您没有使用glibc，那么您可以查看glibc-2.18 / bits / byteswap.h

C语言在不同体系结构上的文件操作

3 个答案: