Question

在我的代码中，我遇到需要将数据从一个文件复制到另一个文件的情况。我提出的解决方案看起来像这样：

const int BUF_SIZE = 1024;
char buf[BUF_SIZE];

int left_to_copy = toCopy;
while(left_to_copy > BUF_SIZE)
{
    fread(buf, BUF_SIZE, 1, fin);
    fwrite(buf, BUF_SIZE, 1, fout);
    left_to_copy -= BUF_SIZE;
}

fread(buf, left_to_copy, 1, fin);
fwrite(buf, left_to_copy, 1, fout);

我的主要想法是可能有类似memcpy的东西，但是对于文件中的数据。我只给它两个文件流和总字节数。我搜索了一下，但我找不到任何这样的东西。

但是如果没有这样的东西，我应该使用什么缓冲区大小来实现最快的传输？更大意味着更少的系统调用，但我认为它可能会破坏系统上的其他缓冲或缓存。我应该动态分配缓冲区，以便只进行一对读/写调用吗？在这种特定情况下，典型的传输大小是从几KB到十几MB。

编辑：对于操作系统特定信息，我们使用的是Linux。

EDIT2：

我尝试使用sendfile，但它没有用。它似乎写了适量的数据，但它很垃圾。

我用上面这样的东西替换了我的例子：

fflush(fin);
fflush(fout);
off_t offset = ftello64(fin);
sendfile(fileno(fout), fileno(fin), &offset, toCopy);
fseeko64(fin, offset, SEEK_SET);

我添加了同花顺，并且一次寻找一个，因为它似乎没有起作用。

Answer 1

您需要告诉我们您的（所需）操作系统。适当的调用（或者更确切地说，最合适的调用）将是非常系统特定的。

在Linux / * BSD / Mac中，您将使用sendfile(2)来处理内核空间中的复制。

概要

 #include <sys/sendfile.h>
 ssize_t sendfile(int out_fd, int in_fd, off_t *offset, size_t count);

说明

sendfile() copies data between one file descriptor and another.  Because this
copying is done within the kernel, sendfile() is more efficient than the
combination of read(2) and write(2), which would require transferring data to
and from user space.

in_fd should be a file descriptor opened for reading and out_fd should be a
descriptor opened for writing.

进一步阅读：

Linux sendfile source
Understanding sendfile() and splice()
“Server portion of sendfile example.”←示例C代码（存档链接，粘贴在下面）

sendfile示例的服务器部分：

/*

Server portion of sendfile example.

usage: server [port]

Copyright (C) 2003 Jeff Tranter.


This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/


#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/sendfile.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <netinet/in.h>


int main(int argc, char **argv)
{
  int port = 1234;           /* port number to use */
  int sock;                  /* socket desciptor */
  int desc;                  /* file descriptor for socket */
  int fd;                    /* file descriptor for file to send */
  struct sockaddr_in addr;   /* socket parameters for bind */
  struct sockaddr_in addr1;  /* socket parameters for accept */
  int    addrlen;            /* argument to accept */
  struct stat stat_buf;      /* argument to fstat */
  off_t offset = 0;          /* file offset */
  char filename[PATH_MAX];   /* filename to send */
  int rc;                    /* holds return code of system calls */

  /* check command line arguments, handling an optional port number */
  if (argc == 2) {
    port = atoi(argv[1]);
    if (port <= 0) {
      fprintf(stderr, "invalid port: %s\n", argv[1]);
      exit(1);
    }
  } else if (argc != 1) {
    fprintf(stderr, "usage: %s [port]\n", argv[0]);
    exit(1);
  }

  /* create Internet domain socket */
  sock = socket(AF_INET, SOCK_STREAM, 0);
  if (sock == -1) {
    fprintf(stderr, "unable to create socket: %s\n", strerror(errno));
    exit(1);
  }

  /* fill in socket structure */
  memset(&addr, 0, sizeof(addr));
  addr.sin_family = AF_INET;
  addr.sin_addr.s_addr = INADDR_ANY;
  addr.sin_port = htons(port);

  /* bind socket to the port */
  rc =  bind(sock, (struct sockaddr *)&addr, sizeof(addr));
  if (rc == -1) {
    fprintf(stderr, "unable to bind to socket: %s\n", strerror(errno));
    exit(1);
  }

  /* listen for clients on the socket */
  rc = listen(sock, 1);
  if (rc == -1) {
    fprintf(stderr, "listen failed: %s\n", strerror(errno));
    exit(1);
  }

  while (1) {

    /* wait for a client to connect */
    desc = accept(sock, (struct sockaddr *)  &addr1, &addrlen);
    if (desc == -1) {
      fprintf(stderr, "accept failed: %s\n", strerror(errno));
      exit(1);
    }

    /* get the file name from the client */
    rc = recv(desc, filename, sizeof(filename), 0);
    if (rc == -1) {
      fprintf(stderr, "recv failed: %s\n", strerror(errno));
      exit(1);
    }

    /* null terminate and strip any \r and \n from filename */
        filename[rc] = '\0';
    if (filename[strlen(filename)-1] == '\n')
      filename[strlen(filename)-1] = '\0';
    if (filename[strlen(filename)-1] == '\r')
      filename[strlen(filename)-1] = '\0';

    /* exit server if filename is "quit" */
    if (strcmp(filename, "quit") == 0) {
      fprintf(stderr, "quit command received, shutting down server\n");
      break;
    }

    fprintf(stderr, "received request to send file %s\n", filename);

    /* open the file to be sent */
    fd = open(filename, O_RDONLY);
    if (fd == -1) {
      fprintf(stderr, "unable to open '%s': %s\n", filename, strerror(errno));
      exit(1);
    }

    /* get the size of the file to be sent */
    fstat(fd, &stat_buf);

    /* copy file using sendfile */
    offset = 0;
    rc = sendfile (desc, fd, &offset, stat_buf.st_size);
    if (rc == -1) {
      fprintf(stderr, "error from sendfile: %s\n", strerror(errno));
      exit(1);
    }
    if (rc != stat_buf.st_size) {
      fprintf(stderr, "incomplete transfer from sendfile: %d of %d bytes\n",
              rc,
              (int)stat_buf.st_size);
      exit(1);
    }

    /* close descriptor for file that was sent */
    close(fd);

    /* close socket descriptor */
    close(desc);
  }

  /* close socket */
  close(sock);
  return 0;
}

Answer 2

您可以做的一件事是增加缓冲区的大小。如果你有大文件，那可能会有所帮助。

另一件事是直接调用操作系统，无论你的情况如何。 fread()和fwrite().

中存在一些开销

如果你可以使用无缓冲的例程并提供你自己的更大的缓冲区，你可能会看到一些明显的性能改进。

我建议在完成后从fread()获取从返回值写入的字节数。

Answer 3

考虑目标操作系统的内存映射文件I / O可能是值得的。对于您正在讨论的文件大小，这是一种可行的方法，操作系统将比您可以更好地进行优化。如果你想编写可移植的操作系统代码，这可能不是最好的方法。

这需要一些设置，但是一旦你设置了它，你就可以忘记循环代码＆amp;它基本上看起来像一个memcpy。

Answer 4

就快速阅读而言，我还可以选择映射文件 - 使用内存映射I / O. mmap（参见mmap的手册页）。与传统的I / O相比，它被认为更有效，特别是在处理大文件时。

mmap实际上并不读取该文件。它只是将其映射到地址空间。这就是为什么它如此之快，在您实际访问该地址空间区域之前没有光盘I / O.

或者您可以首先看到块大小，并且根据您可以继续读取，这也被认为是有效的，因为编译器在这种情况下增强了优化。

在C / C ++中将数据从一个文件复制到另一个文件的最快方法？

4 个答案: