从共享缓冲区中写入文件会丢失数据并且程序崩溃而没有cout

时间:2019-06-21 10:01:26

标签: c++ multithreading file crash

我正在使用线程和共享缓冲区制作程序。这两个线程在后台无限期地运行,一个线程将用数据填充共享缓冲区,而另一个线程将共享缓冲区的内容写入文件。

用户可以启动或停止数据填充,这导致线程进入等待状态,直到用户再次启动线程。每个循环中,缓冲区将填充50个浮点数。

这是代码:


#include <iostream>
#include <vector>
#include <iterator>
#include <utility>
#include <fstream>
#include <condition_variable>
#include <mutex>
#include <thread>

using namespace std;

std::mutex m;
std::condition_variable cv;
std::vector<std::vector<float>> datas;
bool keep_running = true, start_running = false;

void writing_thread()
{
    ofstream myfile;

    bool opn = false;

    while(1)
    {

        while(keep_running)
        {
            // Open the file only once
            if(!opn)
            {
                myfile.open("IQ_Datas.txt");
                opn = true;

            }


            // Wait until main() sends data
            std::unique_lock<std::mutex> lk(m);

            cv.wait(lk, [] {return !datas.empty();});


            auto d = std::move(datas);


            lk.unlock();


            for(auto &entry : d)
            {
                for(auto &e : entry)
                    myfile << e << endl;
            }


        }

        if(opn)
        {
            myfile.close();
            opn = false;
        }

    }
}

void sending_thread()
{

    std::vector<float> m_buffer;
    int cpt=0;
    //Fill the buffer with 50 floats
    for(float i=0; i<50; i++)
        m_buffer.push_back(i);

    while(1)
    {
        {
            std::unique_lock<std::mutex> lk(m);
            cv.wait(lk, [] {return keep_running && start_running;});

        }
        while(keep_running)
        {

            //Each loop d is containing 50 floats
            std::vector<float> d = m_buffer;

            cout << "in3" << endl; //Commenting this line makes the program crash

            {
                std::lock_guard<std::mutex> lk(m);
                if (!keep_running)break;
                datas.push_back(std::move(d));
            }
            cv.notify_one();
            cpt++;
        }

        cout << "Total data: " << cpt*50 << endl;
        cpt = 0;
    }
}
void start()
{
    {
        std::unique_lock<std::mutex> lk(m);
        start_running = true;
    }
    cv.notify_all();
}
void stop()
{
    {
        std::unique_lock<std::mutex> lk(m);
        start_running = false;
    }
    cv.notify_all();
}

int main()
{
    int go = 0;
    thread t1(sending_thread);
    thread t2(writing_thread);

    t1.detach();
    t2.detach();

    while(1)
    {

        std::cin >> go;

        if(go == 1)
        {
            start();
            keep_running = true;
        }
        else if(go == 0)
        {
            stop();
            keep_running = false;
        }


    }

    return 0;
}


此代码有2个问题:

  • 在注释行cout << "in3" << endl;时,程序将在约20-40秒后崩溃,并显示错误消息:在抛出'std :: bad_alloc'实例后调用终止 what():std :: bad_alloc 。如果我让cout执行,程序将无问题运行。

  • 程序运行时,在停止sending_thread之后,我将显示使用cout << "Total data: " << cpt*50 << endl;复制的数据总量。对于少量数据,所有数据均已正确写入文件中,但是当数据量较大时,将丢失数据。 Missing/Correct data(文件中的总行数与total data不匹配)

为什么使用cout程序可以正常运行?是什么导致数据丢失?是因为sending_thread占用缓冲区的时间太快而writing_thread占用了太多时间写入文件吗?

编辑:某些精确度,将更多提示添加到sending_thread似乎可以解决所有问题。第一个线程产生2100万个浮点数,第二个线程成功将2100万个浮点数写入文件。似乎没有cout,生产者线程的工作速度太快,以至于消费者线程无法在将共享缓冲区写入文件的同时继续从共享缓冲区中检索数据。

1 个答案:

答案 0 :(得分:2)

避免:

Moved-from object 'datas' of type 'std::vector' is moved:
        auto d = std::move(datas);
                 ^~~~~~~~~~~~~~~~

替换此:

        // Wait until main() sends data
        std::unique_lock<std::mutex> lk(m);
        cv.wait(lk, [] {return !datas.empty();});
        auto d = std::move(datas);
        lk.unlock();

与此:

        // Wait until main() sends data            
        std::vector<std::vector<float>> d;
        {
            std::unique_lock<std::mutex> lk(m);
            cv.wait(lk, [] { return !datas.empty(); });
            datas.swap(d);
        }

也可以用boolstd::atomic_bool替换从多个线程访问的std::atomic_flag变量。

bad_alloc来自sending_thread,它比writing_thread快得多,因此它将耗尽内存。当sending_thread放慢速度(打印时)时,问题不太明显,但是您应该进行一些同步才能正确执行此操作。您可以围绕它创建一个包装器类,并提供插入和提取方法,以确保所有访问都正确同步,并为其提供最大数量的元素。一个例子:

template<typename T>
class atomic2dvector {
public:
    atomic2dvector(size_t max_elements) : m_max_elements(max_elements) {}

    atomic2dvector(const atomic2dvector&) = delete;
    atomic2dvector(atomic2dvector&&) = delete;
    atomic2dvector& operator=(const atomic2dvector&) = delete;
    atomic2dvector& operator=(atomic2dvector&&) = delete;

    ~atomic2dvector() { shutdown(); }

    bool insert_one(std::vector<T>&& other) {
        std::unique_lock<std::mutex> lock(m_mtx);
        while(m_current_elements + m_data.size() > m_max_elements && m_shutdown == false)
            m_cv.wait(lock);
        if(m_shutdown) return false;

        m_current_elements += other.size();
        m_data.emplace_back(std::forward<std::vector<T>>(other));

        m_cv.notify_one();
        return true;
    }
    std::vector<std::vector<T>> extract_all() {
        std::vector<std::vector<T>> return_value;

        std::unique_lock<std::mutex> lock(m_mtx);
        while(m_data.empty() && m_shutdown == false) m_cv.wait(lock);

        if(m_shutdown == false) {
            m_current_elements = 0;
            return_value.swap(m_data);
        } else {
            // return an empty vector if we should shutdown
        }
        m_cv.notify_one();

        return return_value;
    }

    bool is_active() const { return m_shutdown == false; }

    void shutdown() {
        m_shutdown = true;
        m_cv.notify_all();
    }

private:
    size_t m_max_elements;
    size_t m_current_elements = 0;
    std::atomic<bool> m_shutdown = false;
    std::condition_variable m_cv{};
    std::mutex m_mtx{};
    std::vector<std::vector<T>> m_data{};
};

如果即使在关机后仍要继续提取数据,则可以将extract_all()更改为此:

   std::vector<std::vector<T>> extract_all() {
        std::vector<std::vector<T>> return_value;

        std::unique_lock<std::mutex> lock(m_mtx);
        while(m_data.empty() && m_shutdown == false) m_cv.wait(lock);

        m_current_elements = 0;
        return_value.swap(m_data);
        m_cv.notify_one();

        return return_value;
    }

完整的示例如下:

#include <atomic>
#include <chrono>
#include <condition_variable>
#include <fstream>
#include <iostream>
#include <iterator>
#include <mutex>
#include <thread>
#include <utility>
#include <vector>

using namespace std;

template<typename T>
class atomic2dvector {
public:
    atomic2dvector(size_t max_elements) : m_max_elements(max_elements) {}
    atomic2dvector(const atomic2dvector&) = delete;
    atomic2dvector(atomic2dvector&&) = delete;
    atomic2dvector& operator=(const atomic2dvector&) = delete;
    atomic2dvector& operator=(atomic2dvector&&) = delete;

    ~atomic2dvector() { shutdown(); }

    bool insert_one(std::vector<T>&& other) {
        std::unique_lock<std::mutex> lock(m_mtx);
        while(m_current_elements + m_data.size() > m_max_elements &&
              m_shutdown == false)
            m_cv.wait(lock);
        if(m_shutdown) return false;

        m_current_elements += other.size();
        m_data.emplace_back(std::forward<std::vector<T>>(other));

        m_cv.notify_one();
        return true;
    }
    std::vector<std::vector<T>> extract_all() {
        std::vector<std::vector<T>> return_value;

        std::unique_lock<std::mutex> lock(m_mtx);
        while(m_data.empty() && m_shutdown == false) m_cv.wait(lock);

        m_current_elements = 0;
        return_value.swap(m_data);
        m_cv.notify_one();

        return return_value;
    }

    bool is_active() const { return m_shutdown == false; }

    void shutdown() {
        m_shutdown = true;
        m_cv.notify_all();
    }

private:
    size_t m_max_elements;
    size_t m_current_elements = 0;
    std::atomic<bool> m_shutdown = false;
    std::condition_variable m_cv{};
    std::mutex m_mtx{};
    std::vector<std::vector<T>> m_data{};
};

std::mutex m;
std::condition_variable cv;
atomic2dvector<float> datas(256 * 1024 * 1024 / sizeof(float)); // 0.25 GiB limit
std::atomic_bool start_running = false;

void writing_thread() {
    std::ofstream myfile("IQ_Datas.txt");
    if(myfile) {
        std::cout << "writing_thread waiting\n";

        std::vector<std::vector<float>> d;
        while((d = datas.extract_all()).empty() == false) {
            std::cout << "got " << d.size() << "\n";

            for(auto& entry : d) {
                for(auto& e : entry) myfile << e << "\n";
            }
            std::cout << "wrote " << d.size() << "\n\n";
        }
    }
    std::cout << "writing_thread shutting down\n";
}

void sending_thread() {
    std::vector<float> m_buffer;
    std::uintmax_t cpt = 0;
    // Fill the buffer with 50 floats
    for(float i = 0; i < 50; i++) m_buffer.push_back(i);

    while(true) {
        {
            std::unique_lock<std::mutex> lk(m);
            cv.wait(lk, [] {
                return start_running == true || datas.is_active() == false;
            });
        }
        if(datas.is_active() == false) break;
        std::cout << "sending...\n";
        while(start_running == true) {
            // Each loop d is containing 50 floats
            std::vector<float> d = m_buffer;
            if(datas.insert_one(std::move(d)) == false) break;
            cpt++;
        }
        cout << "Total data: " << cpt * 50 << endl;
        cpt = 0;
    }
    std::cout << "sending_thread shutting down\n";
}

void start() {
    std::unique_lock<std::mutex> lk(m);
    start_running = true;
    cv.notify_all();
}
void stop() {
    std::unique_lock<std::mutex> lk(m);
    start_running = false;
    cv.notify_all();
}
void quit() {
    datas.shutdown();
    cv.notify_all();
}

int main() {
    int go = 0;
    thread t1(sending_thread);
    thread t2(writing_thread);

    std::this_thread::sleep_for(std::chrono::milliseconds(100));
    std::cout << "Enter 1 to make the sending thread send and 0 to make it stop "
                 "sending. Enter a non-integer to shutdown.\n";

    while(std::cin >> go) {
        if(go == 1) {
            start();
        } else if(go == 0) {
            stop();
        }
    }
    std::cout << "--- shutting down ---\n";
    quit();

    std::cout << "joining threads\n";
    t1.join();
    std::cout << "t1 joined\n";
    t2.join();
    std::cout << "t2 joined\n";
}