Question

我刚刚被分配了一个项目来为unix创建一个归档工具。所以在创建程序之后我会做类似

的事情

"./bar -c test_archive.bar file.1"

它会在其中创建一个带有file.1的test_archive.bar。然后我可以做一些命令，我在里面列出文件等等。但是我无法理解制作test_archive.bar的概念，我实际上只知道它只是一个文件，但如果你要说打开.tgz＆＃34; vi file.tgz＆＃34;它会给出一个目录/文件列表，

那么，有没有什么好方法可以创建一个存档/目录，我可以在其中推断一些文件并列出其名称等。

注意：我查看了tar.c以及其中包含的所有文件，但每个文件都是如此抽象，很难理解。

注意：我知道如何阅读命令行标志等。

Answer 1

使用旧的（但仍然有效）tar格式实际上很容易。 Wikipedia has a nice explanation of the format here.您需要做的就是：

对于每个文件：

填写并向tar文件发出标题
发送文件内容
将文件大小填充为512字节的倍数

tar文件的最基本有效标头是:(基本上来自维基百科）

100字节：文件名
8字节：文件模式
8个字节：所有者的数字ID
8个字节：组的数字ID
12个字节：文件大小
12个字节：上次修改时间的时间戳
8字节：校验和
1个字节：文件类型
100字节：链接文件的名称

文件类型可以是0（普通文件），1（硬链接）或2（符号链接）。链接文件的名称是链接指向的文件的名称。如果我没记错的话，如果您有硬链接或符号链接，则文件内容应为空。

引用维基百科：

“数字值使用ASCII数字以八进制数字编码，前导零。出于历史原因，应使用最终的NUL或空格字符。”

“校验和的计算方法是将头记录的无符号字节值与8个校验和字节的总和作为ascii空格（十进制值32）。它存储为六位数的八进制数，后面跟着零通过NUL，然后是空格。“

这是一个简单的tarball生成器。创建一个提取器，处理自动文件传送等，留给读者练习。

#include<stdio.h>
#include<string.h>


struct tar_header{
    char name[100];
    char mode[8];
    char owner[8];
    char group[8];
    char size[12];
    char modified[12];
    char checksum[8];
    char type[1];
    char link[100];
    char padding[255];
};

void fexpand(FILE* f, size_t amount, int value){
    while( amount-- ){
        fputc( value, f );
    }
}

void tar_add(FILE* tar_file, const char* file, const char* internal_name){
    //Get current position; round to a multiple of 512 if we aren't there already
    size_t index = ftell( tar_file );
    size_t offset = index % 512;
    if( offset != 0 ){
        fexpand( tar_file, 512 - offset, 0);
    }
    //Store the index for the header to return to later
    index = ftell( tar_file );
    //Write some space for our header
    fexpand( tar_file, sizeof(struct tar_header), 0 );
    //Write the input file to the tar file
    FILE* input = fopen( file, "rb" );
    if( input == NULL ){
        fprintf( stderr, "Failed to open %s for reading\n", file);
        return;
    }
    //Copy the file content to the tar file
    while( !feof(input) ){
        char buffer[2000];
        size_t read = fread( buffer, 1, 2000, input );
        fwrite( buffer, 1, read, tar_file);
    }
    //Get the end to calculate the size of the file
    size_t end = ftell( tar_file );
    //Round the file size to a multiple of 512 bytes
    offset = end % 512;
    if( end != 0 ){
        fexpand( tar_file, 512 - offset, 0);
    }
    //Fill out a new tar header
    struct tar_header header;
    memset( &header, 0, sizeof( struct tar_header ) );
    snprintf( header.name, 100, "%s", internal_name  );
    snprintf( header.mode, 8, "%06o ", 0777 ); //You should probably query the input file for this info
    snprintf( header.owner, 8, "%06o ", 0 ); //^
    snprintf( header.group, 8, "%06o ", 0 ); //^
    snprintf( header.size, 12, "%011o", end - 512 - index );
    snprintf( header.modified, 12, "%011o ", time(0) ); //Again, get this from the filesystem
    memset( header.checksum, ' ', 8);
    header.type[0] = '0';

    //Calculate the checksum
    size_t checksum = 0;
    int i;
    const unsigned char* bytes = &header;
    for( i = 0; i < sizeof( struct tar_header ); ++i ){
        checksum += bytes[i];
    }

    snprintf( header.checksum, 8, "%06o ", checksum );

    //Save the new end to return to after writing the header
    end = ftell(tar_file);

    //Write the header
    fseek( tar_file, index, SEEK_SET );
    fwrite( bytes, 1, sizeof( struct tar_header ), tar_file );

    //Return to the end
    fseek( tar_file, end, SEEK_SET );
    fclose( input );
}

int main( int argc, char* argv[] ){
    if( argc > 1 ){
        FILE* tar = fopen( argv[1], "wb" );
        if( !tar ){
            fprintf( stderr, "Failed to open %s for writing\n", argv[1] );
            return 1;
        }
        int i;
        for( i = 2; i < argc; ++i ){
            tar_add( tar, argv[i], argv[i] );
        }
        //Pad out the end of the tar file
        fexpand( tar, 1024, 0);
        fclose( tar );
        return 0;
    }
    fprintf( stderr, "Please specify some file names!\n" );
    return 0;
}

Answer 2

那么，有没有什么好方法可以创建存档/目录我可以在其中推断一些文件并列出他们的名字等。

基本上有两种方法：

一个接一个地复制文件内容，每个内容都带有＆＃34;标题＆＃34;块，包含有关文件名，大小和（可选）其他属性的信息。 Tar就是一个例子。例如：
一个接一个地复制文件内容并放在某处（在结尾的开头）＆＃34; index＆＃34;其中包含文件名列表及其大小和（可选）其他属性。查看文件大小时，可以计算单个文件的开始/结束位置。

大多数真实世界的档案馆使用这些组合，并添加其他功能，如校验和，压缩和加密。

实施例

假设我们有两个名为hello.txt的文件，其中Hello, World!（12字节）和bar.txt包含foobar（6字节）。

在第一种方法中，存档看起来像那样

[hello.txt,12][Hello, World!][bar.txt,6][foobar]
 ^- fixed size ^- 12 bytes    ^- fixed size ^- 6 bytes

标题块的长度必须是常量，或者你必须在它们的长度上进行编码。

第二名：

[Hello, World!foobar][hello.txt,12,bar.txt,6]
 ^- 12+6 bytes

在C中创建自己的归档工具

2 个答案: