Question

我很难开始使用libcurl。下面的代码似乎没有从指定的URL检索整个页面。我哪里错了？

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <curl/curl.h>
#include <curl/types.h>
#include <curl/easy.h>

using namespace std;

char buffer[1024];

size_t tobuffer(char *ptr, size_t size, size_t nmemb, void *stream)
{
    strncpy(buffer,ptr,size*nmemb);
    return size*nmemb;
}

int main() {
    CURL *curl;
    CURLcode res;


    curl = curl_easy_init();
    if(curl) {
        curl_easy_setopt(curl, CURLOPT_URL, "http://google.co.in");
        curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION,1);
        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &tobuffer);

        res = curl_easy_perform(curl);

        printf("%s",buffer);

        curl_easy_cleanup(curl);
    }
    return 0;
}

Answer 1

如at the libcurl documentation for curl_easy_setopt()所示，回调函数被调用多次，以传递所获取页面的所有字节。

您的函数会在每次调用时覆盖相同的缓冲区，结果是在curl_easy_perform()完成获取文件后，您只能在最后一次调用tobuffer()左侧时使用。

简而言之，您的函数tobuffer()必须执行除每次调用时覆盖相同缓冲区之外的其他操作。

<强>更新

例如，您可以执行以下完全未经测试的代码：

struct buf {
    char *buffer;
    size_t bufferlen;
    size_t writepos;
} buffer = {0};

size_t tobuffer(char *ptr, size_t size, size_t nmemb, void *stream)
{
    size_t nbytes = size*nmemb;
    if (!buffer.buffer) {
        buffer.buffer = malloc(1024);
        buffer.bufferlen = 1024;
        buffer.writepos = 0;
    }
    if (buffer.writepos + nbytes < buffer.bufferlen) {
        buffer.bufferlen = 2 * buffer.bufferlen;
        buffer.buffer = realloc(buffer, buffer.bufferlen);
    }
    assert(buffer.buffer != NULL);
    memcpy(buffer.buffer+buffer.writepos,ptr,nbytes);
    return nbytes;
}

在程序的某个稍后阶段，您需要释放分配的内存，如下所示：

void freebuffer(struct buf *b) {
    free(b->buffer);
    b->buffer = NULL;
    b->bufferlen = 0;
    b->writepos = 0;
}

另请注意，我使用memcpy()代替strncpy()将数据移动到缓冲区。这很重要，因为libcurl没有声称传递给回调函数的数据实际上是NUL终止的ASCII字符串。特别是，如果您检索.gif图像文件，它当然可以（并且将）在文件中包含您希望保留在缓冲区中的零字节。 strncpy()将在源数据中看到的第一个NUL后停止复制。

作为读者的练习，我已将所有错误处理留在此代码之外。你必须放入一些内容。此外，我还有一个多汁的内存泄漏，因为realloc()的调用失败了。

另一个改进是使用允许回调的stream参数值来自libcurl调用者的选项。这可以用于分配管理缓冲区而不使用全局变量。我强烈建议你这样做。

Answer 2

char buffer[1024];

当缓冲区大小限制为1024时，如何获得整个网页？

Answer 3

您正在使用libcurl执行Simple get操作。您可以使用此示例程序作为参考。为什么不在回调中打印缓冲区或写入文件，如本例所示？

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include <curl/curl.h>
#include <curl/types.h>
#include <curl/easy.h>

static size_t write_data(void *ptr, size_t size, size_t nmemb, void *stream)
{
  int written = fwrite(ptr, size, nmemb, (FILE *)stream);
  return written;
}

int main(int argc, char **argv)
{
  CURL *curl_handle;
  static const char *headerfilename = "head.out";
  FILE *headerfile;
  static const char *bodyfilename = "body.out";
  FILE *bodyfile;

  curl_global_init(CURL_GLOBAL_ALL);

  /* init the curl session */ 
  curl_handle = curl_easy_init();

  /* set URL to get */ 
  curl_easy_setopt(curl_handle, CURLOPT_URL, "http://curl.haxx.se");

  /* no progress meter please */ 
  curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 1L);

  /* send all data to this function  */ 
  curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);

  /* open the files */ 
  headerfile = fopen(headerfilename,"w");
  if (headerfile == NULL) {
    curl_easy_cleanup(curl_handle);
    return -1;
  }
  bodyfile = fopen(bodyfilename,"w");
  if (bodyfile == NULL) {
    curl_easy_cleanup(curl_handle);
    return -1;
  }

  /* we want the headers to this file handle */ 
  curl_easy_setopt(curl_handle,   CURLOPT_WRITEHEADER, headerfile);

  /*
   * Notice here that if you want the actual data sent anywhere else but
   * stdout, you should consider using the CURLOPT_WRITEDATA option.  */ 

  /* get it! */ 
  curl_easy_perform(curl_handle);

  /* close the header file */ 
  fclose(headerfile);

  /* cleanup curl stuff */ 
  curl_easy_cleanup(curl_handle);

  return 0;
}

Answer 4

提示：使用stringstream！只需用stringstream替换缓冲区，然后输出内容： (string)<streamname>.str() 适合我!!!

Answer 5

我不知道这个库，但在我看来你正在重用缓冲区...如果你下载的页面不适合那么你会反复写它，可能只看到最后一个片段。例如，如果我们将字母表复制到10个字符的缓冲区中，我们得到：

ABCDEFGHIJ - first copy stores this
KLMNOPQRST - second copy stores this
UVWXYZ     - third copy stores this

根据报告的数据大小是否包含终止0 / NUL字符，缓冲区可能被视为UVWXYZ（printf（％s）将解释为“UVWXYZ”），或者视为“UVWXYZQRST”（printf（％s））会继续尝试打印超过缓冲区的末尾，直到它恰好找到0 / NUL）。

res = curl_easy_perform（curl）强烈暗示它会给你一个结果/错误代码，你是否打算检查这个值是什么以及文档说的意思是什么？

你真的应该学会自己诊断这些东西......你会发现疑似问题而不是复制到缓冲区，你把一个std :: cout语句放到你的回调中，向你显示数据以及如何很多次它被称为。在发现问题之前，请将其分解。

Answer 6

您似乎错过了CURLOPT_WRITEDATA选项。它将第一个参数传递给WRITEFUNCION to_buffer（char * ptr ...

curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);

使用libcurl的问题：它似乎没有获得整个页面

6 个答案: