我正在尝试使用正则表达式从CURL输出中提取数据。我可以解析网页的HTML源代码并将其保存在文本文件中。但是,在保存之前,我想从输出中删除所有<script>
标记。这是程序: - (bbb.txt是输出文本文件)
int main(void) {
CURL *curl;
FILE *fp;
CURLcode res;
char *url = "google.com";
char outfilename[FILENAME_MAX] = "bbb.txt";
curl = curl_easy_init();
if (curl) {
fp = fopen(outfilename,"wb");
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_USERAGENT, "Developer/1.0");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
fclose(fp);
}
return 0;
}