Question

我正在尝试将此curl函数包含在我的课程中，但是在使用CURLOPT_WRITEFUNCTION时出现问题。编译后没有找到我的解决方案。还尝试了一些基于stackoverflow的东西，没有。

这是我的尝试（在此代码中替换'writer'）

节点::作家＆安培;节点::作家 std :: bind1st（std :: mem_fun（＆amp; node :: writer），this）;

这是我的代码：

#ifndef NODE_H_
#define NODE_H_

int writer(char *data, std::size_t size, std::size_t nmemb, std::string *buffer);

/*
 * function prototypes
 */

class node {
 /*
  * general struct to hold html element properties
  */
 struct tag;

 /*
  * the url and source of the page
  */
 std::string url;
 std::string source;

 /*
  *  vector of structures that store tag elements
  */
 std::vector<tag> heading;
 std::vector<tag> anchor;

 /*
  * grab source with curl
  */
 std::string curlHttpget(const std::string &url);

 /*
  * add tag structs to vector
  * @see std::vector<tag> heading
  * @see std::vector<tag> anchor
  */
 void add_heading(std::string, std::string);
 void add_anchor(std::string, std::string);

public:
 /*
  * constructors
  */
 node(){}
 node(std::string);

 /*
  * deconstructors
  */
 ~node(){}

 /*
  * crawl page
  */
 void load(std::string seed);//crawls the page

 /*
  * anchor tags
  */
 void get_anchors();// scrape the anchor tags
 void display_anchors();

 /*
  * heading tags
  */
 void get_headings();// scrape heading tags
 void display_headings();
};
/*
 * for all stored html elements
 */
struct node::tag {
 std::string text;
 std::string properties;
 tag(std::string t, std::string p) : text(t), properties(p) {}
};

/*
 * constructors
 */
node::node(std::string seed) {
 load(seed);
 get_anchors();
 get_headings();
}
/*
 * araneus::subroutines
 */

// crawl the page
void node::load(std::string seed) {
 url = seed;
 source = curlHttpget(url);
}


//scrape html source
std::string node::curlHttpget(const std::string &url) {
 std::string buffer;

 CURL *curl;
 CURLcode result;

 curl = curl_easy_init();

 if (curl) {
  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
  curl_easy_setopt(curl, CURLOPT_HEADER, 0);
  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writer);
  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);

  result = curl_easy_perform(curl);//http get performed

  curl_easy_cleanup(curl);//must cleanup

  //error codes: http://curl.haxx.se/libcurl/c/libcurl-errors.html
  if (result == CURLE_OK) {
   return buffer;
  }
  //curl_easy_strerror was added in libcurl 7.12.0
  std::cerr << "error: " << result << " " << curl_easy_strerror(result) << std::endl;
  return "";
 }

 std::cerr << "error: could not initalize curl" << std::endl;
 return "";
}

void node::get_headings() {
 static const regex expression("<[hH][1-6]\\s*(?<properties>.*?)\\s*>(?<name>.*?)</\\s*[hH][1-6]\\s*>");

 int const subMatches[] = { 1, 2 };

 sregex_token_iterator p(source.begin(), source.end(), expression, subMatches);
 sregex_token_iterator end;

 string text;
 string properties;

 int count = 0;
 for (;p != end; count++, ++p)
 {
  string m(p->first, p->second);

  if(count % 2) {
   text = m;
   add_heading(text, properties);
  }
  else {
   properties = m;
  }
 }
}

//use regex to find anchors in source
void node::get_anchors() {
 static const regex expression("<[a|A].*?[href|HREF]\\s*=[\"\"'](?<url>.*?)[\"\"'].*?>(?<name>.*?)</[a|A]>");
 static const regex relative("^\\/");
 static const regex firstChar("^[A-Za-z0-9\\-_\\$\\.\\+!\\*'\\(\\)#]"); // valid url characters
 static const regex protocol("^[http:\\/\\/|HTTP:\\/\\/|https:\\/\\/|HTTPS:\\/\\/|ftp:\\/\\/|FTP:\\/\\/|sftp:\\/\\/|SFTP:\\/\\/]");

 int const subMatches[] = { 1, 2 };

 sregex_token_iterator p(source.begin(), source.end(), expression, subMatches);
 sregex_token_iterator end;

 string text, properties;

 int count = 0;
 for (; p != end; count++, ++p) {
  std::string m(p->first, p->second);

  if(count % 2) {
   text = m;
   add_anchor(text, properties);
  }
  else {
   if(regex_search(m, relative)) { //if link is in "/somewhere" format
    properties = url + m;
   }
   else if(regex_search(m, protocol)) { //if link is absolute "http://www.somewhere.com"
    properties = m;
   }
   else if(regex_search(m, firstChar)) { //if link starts with a valid url char "somewhere.html"
    properties = url + "/" + m;
   }
   else {
    std::cout << "link of unknown protocol: " << m << std::endl;
   }
  }
 }
}

void node::add_heading(std::string text, std::string properties) {
 heading.push_back(tag(text, properties));
}

void node::display_headings() {
 for(int i = 0; i < (int)heading.size(); i++) {
  std::cout<< "[h]: " << heading[i].text << endl;
  std::cout<< "[h.properties]: " << heading[i].properties << endl;
 }
 cout << "found " << (int)heading.size() << " <h[1-6]> tags" << endl;
}

void node::add_anchor(std::string text, std::string properties) {
 anchor.push_back(tag(text, properties));
}

void node::display_anchors() {
 for(int i = 0; i < (int)anchor.size(); i++) {
  std::cout<< "[a]: " << anchor[i].text << endl;
  std::cout<< "[a.properties]: " << anchor[i].properties << endl;
 }
 cout << "found " << (int)anchor.size() << " <a> tags" << endl;
}

//required by libcurl
int writer(char *data, std::size_t size, std::size_t nmemb, std::string *buffer) {
 int result = 0;

 if (buffer != NULL) {
  buffer->append(data, size * nmemb);
  result = size * nmemb;
 }
 return result;
}

#endif /* NODE_H_ */

寻找一个解决方案，让函数'int writer'成为“int node :: writer”。当我调用CURLOPT_WRITEFUNCTION时，问题出现在std :: string node :: curlHttpget中。

＆amp; node :: writer编译但是给出了一个seg fault = /

感谢

Answer 1

而不是使用std :: string *使用node *作为参数或另一个类，如HttpGet，它有一个std :: string和一个返回节点的指针，因此它可以写入字符串并在每次调用时访问您的节点

boost :: bind不适用于C-API回调。

它编译因为curl_easy_setopt使用...所以完全不是类型安全的。你可以在阳光下传递任何你想要的类型，它会编译。它可能不会运行，因为你发现了你的成本。

我会寻求额外的类型安全性，使你的函数与Curl_write_callback具有完全相同的签名，即void *作为第4个参数，并在函数实现中进行转换。

CURLOPT_WRITEFUNCTION指向成员函数的指针

1 个答案: