我试图不使用lseek()
,fseek
之类的功能将未知长度的二进制文件读取到缓冲区块中。
eof
,但是我对如何实现这一点感到困惑。 谢谢。
#include <stdio.h>
#include <stdlib.h>
typedef struct Buffer{
unsigned char data[1012];
struct Buffer *next; //12 bytes
}Buffer;
void mymemcpy(void *dest, void *src, size_t length){
Buffer *buffer_toFill = (Buffer *)dest;
Buffer *buffer_toAdd = (Buffer *)src;
int a = 0;
for(int i = 0; i < length; i++){
buffer_toFill->data[i] = buffer_toAdd->data[i];
}
}
Buffer* add_buffer_front(Buffer *head, Buffer *read_buffer, int size){
Buffer *new_buffer = malloc(sizeof(Buffer));
mymemcpy(new_buffer, read_buffer, size);
if(head != NULL){
new_buffer->next = head;
}
return new_buffer;
}
void display_List(Buffer *head, size_t length){
Buffer *current = head;
while(current != NULL){
for(int i = 0; i < length; i++){
printf("%02X",(unsigned)current->data[i]); //this shows different value compare with xxd <filename>
//printf("%c", current->data[i]);
}
Buffer *prev = current;
free(prev);
current = current->next;
}
}
int main(int argc, char **argv){
FILE *fd;
Buffer *head_buffer = NULL;
int file_length = 0;
int eof_int = 1;
if(argc != 2){
printf("Usage: readFile <filename>\n");
return 1;
}
fd = fopen(argv[1], "rb");
while(eof_int != 0){
Buffer *new_buffer = malloc(sizeof(Buffer));
eof_int = fread(new_buffer, sizeof(Buffer)-12, 1, fd);
if(eof_int == 0){
//size_t length
//
//
head_buffer = add_buffer_front(head_buffer, new_buffer, length);
file_length += length;
}else{
head_buffer = add_buffer_front(head_buffer, new_buffer, (sizeof(new_buffer->data)));
file_length += (sizeof(new_buffer->data));
}
}
display_List(head_buffer, file_length);
fclose(fd);
return 0;
}
答案 0 :(得分:2)
您有几个问题。
(1)fread
返回读取的项目数,但不会返回eof
指示。您需要致电feof(stream*)
来确定是否已到达文件末尾。
(2)您说的是下一个指针是12个字节。这是一个非常危险的假设。最好读取已分配给数据结构的1012个字节。您很有可能正在打印未读入的东西,而只是未初始化的内存。
(3)使用fread
的返回值来确定要复制多少内存。
答案 1 :(得分:1)
请参见下面的代码中的注释-还可以考虑将1012更改为使用#define。
#include <stdio.h>
#include <stdlib.h>
typedef struct Buffer{
unsigned char data[1012];
struct Buffer *next; //12 bytes
}Buffer;
// Create a structure to store stuff about a file
typedef struct {
Buffer *head;
Buffer *tail;
size_t length;
} MyFile;
/*
void mymemcpy(void *dest, void *src, size_t length){
Buffer *buffer_toFill = (Buffer *)dest;
Buffer *buffer_toAdd = (Buffer *)src;
int a = 0;
for(int i = 0; i < length; i++){
buffer_toFill->data[i] = buffer_toAdd->data[i];
}
}
Buffer* add_buffer_front(Buffer *head, Buffer *read_buffer, int size){
Buffer *new_buffer = malloc(sizeof(Buffer));
mymemcpy(new_buffer, read_buffer, size);
if(head != NULL){
new_buffer->next = head;
}
return new_buffer;
}
*/
// Lets make this easier - The buffer has already been "malloced" once - why do it again
// And why are you reversing the file
// Perhaps
void add_buffer(Buffer *to_be_added, MyFile *file, size_t extra_length) {
if (file->tail) { // We have one item in the list
file->tail->next = to_be_added;
} else { // First buffer!
file-> head = to_be_added;
file-> tail = to_be_added;
}
to_be_added->next = NULL; // This is always the case as it is the last one
file->length += extra_length;
}
/*
void display_List(Buffer *head, size_t length){
Buffer *current = head;
while(current != NULL){
for(int i = 0; i < length; i++){
printf("%02X",(unsigned)current->data[i]); //this shows different value compare with xxd <filename>
//printf("%c", current->data[i]);
}
Buffer *prev = current;
free(prev);
current = current->next;
}
}
*/
// Instead pass in the new structure
void display_list(MyFile *file) {
size_t contents_left = file -> length;
Buffer * current = file -> head;
while (current) {
// At most each chunk has 1012 bytes - Check for that
size_t chunk_length = contents_left > 1012 ? 1012 : contents_left;
for(int i = 0; i <chunk_length ; i++){
printf("%02X",(unsigned)current->data[i]);
}
current = current -> next;
}
}
}
int main(int argc, char **argv){
FILE *fd;
MyFile read_file;
read_file.head = NULL;
read_file.tail = NULL;
read_file.length = 0;
Buffer *head_buffer = NULL;
int file_length = 0;
int eof_int = 1;
if(argc != 2){
printf("Usage: readFile <filename>\n");
return 1;
}
fd = fopen(argv[1], "rb");
// Check fd
if (fd == NULL) {
// error stuff
return EXIT_FAILURE; // Look up the include for this
}
while(eof_int != 0){
Buffer *new_buffer = malloc(sizeof(Buffer));
eof_int = fread(new_buffer->data, 1012, 1, fd); // Do not make assumptions on the size of a pointer and store it in the correct location
if(eof_int == 0) { // Read nothing
free(new_buffer); // We was too optimistic! Did Not need this in the end
break;
} else {
add_buffer(&read_file, new_buffer, eof_int);
}
}
display_List(&read_file);
fclose(fd);
return 0;
}
答案 2 :(得分:0)
您要找的技巧是fread
返回读取的 项目 的数量。您正在读取1个缓冲区已满,因此它只会告诉您读取0或1个缓冲区。相反,请翻转并反转它: 读取缓冲区的字节数 。
size_t bytes_read = fread(buffer, 1, sizeof(Buffer)-12, fd);
现在您可以知道多少字节已读入缓冲区。我们可以在size
上添加一个Buffer
字段,以便每个缓冲区可以记住它读取了多少字节,而只打印了那么多字节。
const size_t BUFFER_SIZE = 1024;
typedef struct Buffer {
// I'll explain why I switched to a pointer in a moment
unsigned char *data;
size_t size;
struct Buffer *next;
} Buffer;
void Buffer_print( Buffer *buffer ) {
for( size_t i = 0; i < buffer->size; i++ ) {
printf("%02hhX ", buffer->data[i]);
}
}
Buffer *Buffer_new() {
Buffer *buffer = malloc(sizeof(Buffer));
buffer->size = 0;
buffer->data = NULL;
buffer->next = NULL;
return buffer;
}
请注意,我会谨慎初始化缓冲区的所有字段,否则我们有可能会被垃圾吞没。
现在,我们已经更改了缓冲区,因此关于其大小和位置的假设被打破了。没关系,无论如何,我们应该直接阅读buffer->data
。
size_t Buffer_read( Buffer *buffer, size_t buffer_size, FILE* fp ) {
buffer->data = malloc(buffer_size);
size_t bytes_read = fread(buffer->data, 1, buffer_size, fp);
buffer->size = bytes_read;
return bytes_read;
}
现在,缓冲区知道其读取了多少数据,因此我们可以根据需要分配任意大小的数据。无需将其硬编码到结构中。这使代码更加灵活和高效。它使我们能够廉价地分配空缓冲区,这将使事情变得简单得多。
我们还可以避免使用malloc
并使用垃圾进行初始化buffer->data
。如果fread
仅部分填充buffer->data
,则其余部分将保持为垃圾。没关系,知道我们已读取的数据大小意味着我们在遇到垃圾之前就停止打印。
现在我们可以构建循环了。读取0字节后,我们知道读取已完成。
while( Buffer_read( buffer, BUFFER_SIZE, fp ) > 0 ) {
... now what ...
}
fclose(fp);
链表的工作方式,当您添加到链表时,将其添加到tail->next
并使其成为新尾巴。这通常称为“推”。
Buffer *Buffer_push( Buffer *tail, Buffer *new_tail ) {
tail->next = new_tail;
return new_tail;
}
Buffer *head = Buffer_new();
Buffer *tail = head;
while( Buffer_read( tail, BUFFER_SIZE, fp ) > 0 ) {
tail = Buffer_push( tail, Buffer_new() );
}
fclose(fp);
请注意,我们从一个空的head
开始,这也是tail
。从这两个分配开始,使循环更加简单。无需检查if( head )
或if( tail )
。这确实意味着我们列表的末尾总是有一个空缓冲区。没关系。由于我们不再使用固定的buffer->data
空缓冲区,所以现在变得很小且便宜。
最后一步是打印所有内容。我们已经可以打印一个缓冲区,因此只需要遍历链表并打印每个缓冲区即可。
void Buffer_print_all( Buffer *head ) {
for( Buffer *buffer = head; buffer != NULL; buffer = buffer->next ) {
Buffer_print(buffer);
}
}
Buffer_print_all(head);
最后一个空的缓冲区挂在末端就可以了。它知道其大小为0,因此Buffer_print
实际上不会使用空buffer->data
。