尽管程序已完成,为什么控制台仍保持打开状态?

时间:2015-08-12 13:15:29

标签: c++ multithreading winapi

我正在开发一个网络爬虫,能够在按下“ESC”按钮的任何时候中止。
为了检查是否按下'ESC',我正在使用一个线程设置一个标志来指示'main'函数终止自身。虽然这样可以正常工作,但程序完成后控制台窗口不会关闭。控制台窗口关闭的唯一情况是在线程启动后立即按下“ESC”(因此在while循环开始之前;请参阅代码中的注释)。

#include <curl/curl.h>
#include <ctime>
#include <fstream>
#include <iostream>
#include <regex>
#include <queue>
#include <stdio.h>
#include <string>
#include <thread>
#include <vector>
#include <windows.h>

boolean exitCrawler{ false };
char URL[512];
CURL *curl;
int i{ 0 }, start{ 0 }, stop{ 0 }, urlCount{ 0 };
std::ofstream write;
std::deque <const char*> urls;
std::regex rgx("(?:http)[:](?://)[A-Za-z0-9]+.[A-Za-z0-9]+.[a-z]+/");
std::smatch sm;
std::string answer, content;
std::vector <std::string> detectedURLs;
unsigned short keyState;

auto analyzeContent() -> void;
auto crawlSite() -> int;
auto indexSite() -> void;
auto quitCrawler() -> void;
auto writeContent(char*, size_t, size_t)->size_t;

auto main() -> void {
    SetConsoleTitle(L"WebCrawler");
    write.open("C:\\Users\\Daniel\\Desktop\\urls.txt", std::ios::app);
    std::cout << "Welcome to the WebCrawler!" << std::endl << "URL (http://www.*.*/): ";
    std::cin >> URL;
    std::cout << "Starting to crawl the surface web. Press \"ESC\" at any time to cancel the crawler." << std::endl;
    urls.push_back(URL);
    std::thread qC(quitCrawler);
    qC.detach();
    //Press 'ESC' here to get the console window to close itself after the program finishes. If you press 'ESC' after the while loop starts, the console window will stay open.
    while (true) {
        std::string s(urls.front(), strlen(urls.front()));
        if (s.find("http://") == -1) {
            urls.pop_front();
            content = "";
            continue;
        }
        if (crawlSite() == 1) {
            urls.pop_front();
            content = "";
            continue;
        }
        analyzeContent();
        std::cout << "\rCrawled " << urlCount << " sites of the surface web.";
        urls.pop_front();
        if ((urls.size() == 0) || exitCrawler) {
            break;
        }
    }
    std::cout << std::endl << "Crawler terminating..." << std::endl;
    write.close();
}

auto analyzeContent() -> void {
    memset(&sm, 0, sizeof(sm));
    i = 0;
    std::string::const_iterator beginningIterator = content.cbegin();
    while (std::regex_search(beginningIterator, content.cend(), sm, rgx)) {
        beginningIterator = sm[0].second;
        if ((std::find(detectedURLs.begin(), detectedURLs.end(), sm[0])) == detectedURLs.end()) {
            urls.push_back(sm[0].str().c_str());
            i++;
        }
        detectedURLs.push_back(sm[0]);
    }
}

auto crawlSite() -> int {
    curl = curl_easy_init();
    curl_easy_setopt(curl, CURLOPT_TIMEOUT, 30);
    if (curl_easy_setopt(curl, CURLOPT_URL, urls.front()) != CURLE_OK) {
        return 1;
    }
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &writeContent);
    if (curl_easy_perform(curl) != CURLE_OK) {
        return 1;
    }
    curl_easy_cleanup(curl);
    curl_global_cleanup();
    indexSite();
    return 0;
}

auto indexSite() -> void {
    urlCount++;
    write << "&emsp;#" << urlCount << "&emsp;&emsp;&emsp;&emsp;&emsp;[" << urls.front() << "]/[...] --- {[...]} --- " << std::time(0) << "<br>" << std::endl;
}

auto quitCrawler() -> void {
    keyState = GetAsyncKeyState(VK_ESCAPE);
    while (true) {
        if (keyState != 0) {
            exitCrawler = true;
            break;
        }
        keyState = GetAsyncKeyState(VK_ESCAPE);
    }
}

auto writeContent(char* buffer, size_t size, size_t nmemb) -> size_t {
    content.append(buffer);
    return size * nmemb;
}

说实话,我不知道为什么程序在代码完成后保持打开状态。
你们有什么想法吗?

0 个答案:

没有答案