Question

ý早些时候看到了这段代码，并一直在尝试对其进行改进，因为我认为它可以在多个线程上更好地工作，而不是一直使用它，并且我认为条件变量会有所改善，因为它将启用线程之间的信号传递。我已经剪切了一部分代码，并将其放在底部（因此，从技术上讲，它已突出显示，请忽略其余的跟踪变量期望）：

// Import things we need from the standard library

using std::chrono::duration_cast;
using std::chrono::milliseconds;
using std::complex;
using std::cout;
using std::endl;
using std::ofstream;

// Define the alias "the_clock" for the clock type we're going to use.
typedef std::chrono::steady_clock the_clock;

// The size of the image to generate.
const int WIDTH = 1920;
const int HEIGHT = 1080;

// The number of times to iterate before we assume that a point isn't in the
// Mandelbrot set.
// (You may need to turn this up if you zoom further into the set.)
const int MAX_ITERATIONS = 500;

// The image data.
// Each pixel is represented as 0xRRGGBB.
uint32_t image[HEIGHT][WIDTH];


// Write the image to a TGA file with the given name.
// Format specification: http://www.gamers.org/dEngine/quake3/TGA.txt
void write_tga(const char *filename)
{
    ofstream outfile(filename, ofstream::binary);

    uint8_t header[18] = {
        0, // no image ID
        0, // no colour map
        2, // uncompressed 24-bit image
        0, 0, 0, 0, 0, // empty colour map specification
        0, 0, // X origin
        0, 0, // Y origin
        WIDTH & 0xFF, (WIDTH >> 8) & 0xFF, // width
        HEIGHT & 0xFF, (HEIGHT >> 8) & 0xFF, // height
        24, // bits per pixel
        0, // image descriptor
    };
    outfile.write((const char *)header, 18);

    for (int y = 0; y < HEIGHT; ++y)
    {
        for (int x = 0; x < WIDTH; ++x)
        {
            uint8_t pixel[3] = {
                image[y][x] & 0xFF, // blue channel
                (image[y][x] >> 8) & 0xFF, // green channel
                (image[y][x] >> 16) & 0xFF, // red channel
            };
            outfile.write((const char *)pixel, 3);
        }
    }

    outfile.close();
    if (!outfile)
    {
        // An error has occurred at some point since we opened the file.
        cout << "Error writing to " << filename << endl;
        exit(1);
    }
}


// Render the Mandelbrot set into the image array.
// The parameters specify the region on the complex plane to plot.
void compute_mandelbrot(const double left, const double right, const double top, const double bottom)
{
    for (int y = 0; y < HEIGHT; ++y)
    {
        for (int x = 0; x < WIDTH; ++x)
        {
            // Work out the point in the complex plane that
            // corresponds to this pixel in the output image.
            complex<double> c(left + (x * (right - left) / WIDTH),
                top + (y * (bottom - top) / HEIGHT));

            // Start off z at (0, 0).
            complex<double> z(0.0, 0.0);

            // Iterate z = z^2 + c until z moves more than 2 units
            // away from (0, 0), or we've iterated too many times.
            int iterations = 0;
            while (abs(z) < 2.0 && iterations < MAX_ITERATIONS)
            {
                z = (z * z) + c;

                ++iterations;
            }

            if (iterations == MAX_ITERATIONS)
            {
                // z didn't escape from the circle.
                // This point is in the Mandelbrot set.
                image[y][x] = 0x000000; // black
            }
            else
            {
                // z escaped within less than MAX_ITERATIONS
                // iterations. This point isn't in the set.
                image[y][x] = 0xFFFFFF; // white
            }
        }
    }
}

int main(int argc, char *argv[])
{
    cout << "Processor logical cores: " << std::thread::hardware_concurrency() << endl;

    cout << "Please wait..." << endl;

    // Sequential implementation
    if(true)
    {
        cout << "Sequential code..." << endl;
        // Start timing
        the_clock::time_point start = the_clock::now();

        // This shows the whole set.
        compute_mandelbrot(-2.0, 1.0, 1.125, -1.125);

        // This zooms in on an interesting bit of detail.
        //compute_mandelbrot(-0.751085, -0.734975, 0.118378, 0.134488);

        // Stop timing
        the_clock::time_point end = the_clock::now();

        // Compute the difference between the two times in milliseconds
        auto time_taken = duration_cast<milliseconds>(end - start).count();
        cout << "Computing the Mandelbrot set took " << time_taken << " ms." << endl;
    }

    cout << endl;

    // Parallel implementation - method 1
    if(true)
    {
        cout << "Parallel using C++ threads - method 1..." << endl;

        // Test with various thread numbers
        for (int threads_count = 1; threads_count < 15; ++threads_count)
        {
            // Start timing
            const the_clock::time_point start = the_clock::now();

            // Threads vector
            std::vector<std::thread> threads;

            const int chunk_size = HEIGHT / threads_count;
            int y1 = 0;
            int y2 = chunk_size;
            for (int i = 0; i < threads_count; ++i)
            {
                // Divide work into equal area chunks, then let each thread calculate its part
                y1 = i * chunk_size;
                y2 = y1 + chunk_size;
                if (y2 > HEIGHT) y2 = HEIGHT;

                // Start the thread - compute_mandelbrot takes area to be calculated by a given thread through [y1, y2)
                threads.push_back(std::thread([y1, y2](const double left, const double right, const double top, const double bottom) 
                {
                    for (int y = y1; y < y2; ++y)
                    {
                        for (int x = 0; x < WIDTH; ++x)
                        {
                            // Work out the point in the complex plane that
                            // corresponds to this pixel in the output image.
                            complex<double> c(left + (x * (right - left) / WIDTH),
                                top + (y * (bottom - top) / HEIGHT));

                            // Start off z at (0, 0).
                            complex<double> z(0.0, 0.0);

                            // Iterate z = z^2 + c until z moves more than 2 units
                            // away from (0, 0), or we've iterated too many times.
                            int iterations = 0;
                            while (abs(z) < 2.0 && iterations < MAX_ITERATIONS)
                            {
                                z = (z * z) + c;

                                ++iterations;
                            }

                            if (iterations == MAX_ITERATIONS)
                            {
                                // z didn't escape from the circle.
                                // This point is in the Mandelbrot set.
                                image[y][x] = 0x000000; // black
                            }
                            else
                            {
                                // z escaped within less than MAX_ITERATIONS
                                // iterations. This point isn't in the set.
                                image[y][x] = 0xFFFFFF; // white
                            }
                        }
                    }
                }, -2.0, 1.0, 1.125, -1.125));
            }

            //Join threads
            for (auto &t : threads)
                t.join();

            // Stop timing
            const the_clock::time_point end = the_clock::now();

            // Compute the difference between the two times in milliseconds
            const auto time_taken = duration_cast<milliseconds>(end - start).count();
            cout << "Threads: " << threads_count << ", time: " << time_taken << " ms." << endl;

        }
    }

    cout << endl;

    // Parallel implementation - method 2
    if (true)
    {
        cout << "Parallel using C++ threads - method 2..." << endl;

        // Test with various thread numbers
        for (int threads_count = 1; threads_count < 15; ++threads_count)
        {
            // Start timing
            const the_clock::time_point start = the_clock::now();

            // Indicates next chunk of work to be done
            // Using std::atomic allows synchronized access when checking the exit condition of the while loop in the thread function
            std::atomic<int> work_queue(0);

            // To allow synchronized read/write to work_queue
            // Used to prevent race condition between the threads when accessing work_queue
            // It is protecting work_queue to be accessed only by one of the threads at a time
            std::mutex queue_mutex;

            // Threads vector
            std::vector<std::thread> threads;

            //Lauch #parts threads
            for (int i = 0; i < threads_count; ++i)
            {
                // Start the thread - work_queue points to the next chunk of data to be calculated
                threads.push_back(std::thread([&work_queue, &queue_mutex](const double left, const double right, const double top, const double bottom, const int height)
                {
                    // Our arbitrarily chosen chunk_size
                    // Smaller chunk_size allows for the work to be distributed more evenly amongst the threads, however this also increases
                    // thread synchronization overhead
                    static constexpr int chunk_size = 20;

                    // If nothing to be done, exit the thread
                    while (work_queue * chunk_size < height)
                    {
                        int part = 0;
                        {
                            // Lock our sychronization mutex
                            // This ensures that only one thread can read/write to the work_queue at the same time
                            std::lock_guard<std::mutex> guard(queue_mutex);

                            // Take the next chunk of work to be done and increment the counter
                            part = work_queue++;

                            // Again, if nothing to be done, exit the thread
                            if (part >= height)
                                break;
                        }

                        for (int y = part * chunk_size; y < part * chunk_size + chunk_size && y < height; ++y)
                        {
                            for (int x = 0; x < WIDTH; ++x)
                            {
                                // Work out the point in the complex plane that
                                // corresponds to this pixel in the output image.
                                complex<double> c(left + (x * (right - left) / WIDTH),
                                    top + (y * (bottom - top) / HEIGHT));

                                // Start off z at (0, 0).
                                complex<double> z(0.0, 0.0);

                                // Iterate z = z^2 + c until z moves more than 2 units
                                // away from (0, 0), or we've iterated too many times.
                                int iterations = 0;
                                while (abs(z) < 2.0 && iterations < MAX_ITERATIONS)
                                {
                                    z = (z * z) + c;

                                    ++iterations;
                                }

                                if (iterations == MAX_ITERATIONS)
                                {
                                    // z didn't escape from the circle.
                                    // This point is in the Mandelbrot set.
                                    image[y][x] = 0x000000; // black
                                }
                                else
                                {
                                    // z escaped within less than MAX_ITERATIONS
                                    // iterations. This point isn't in the set.
                                    image[y][x] = 0xFFFFFF; // white
                                }
                            }
                        }
                    }
                }, -2.0, 1.0, 1.125, -1.125, HEIGHT));
            }

            //Join threads
            for (auto &t : threads)
                t.join();

            // Stop timing
            const the_clock::time_point end = the_clock::now();

            // Compute the difference between the two times in milliseconds
            const auto time_taken = duration_cast<milliseconds>(end - start).count();
            cout << "Threads: " << threads_count << ", time: " << time_taken << " ms." << endl;

        }
    }

    write_tga("output.tga");

    system("pause");

    return 0;

这是正在编辑的主要部分

// Parallel implementation - method 2
if (true)
{
    cout << "Parallel using C++ threads - method 2..." << endl;

    // Test with various thread numbers
    for (int threads_count = 1; threads_count < 15; ++threads_count)
    {
        // Start timing
        const the_clock::time_point start = the_clock::now();

        // Indicates next chunk of work to be done
        // Using std::atomic allows synchronized access when checking the exit condition of the while loop in the thread function
        std::atomic<int> work_queue(0);

        // To allow synchronized read/write to work_queue
        // Used to prevent a race condition between the threads when accessing work_queue
        // It is protecting work_queue to be accessed only by one of the threads at a time
        std::mutex queue_mutex;

        // Threads vector
        std::vector<std::thread> threads;

        //Lauch #parts threads
        for (int i = 0; i < threads_count; ++i)
        {
            // Start the thread - work_queue points to the next chunk of data to be calculated
            threads.push_back(std::thread([&work_queue, &queue_mutex](const double left, const double right, const double top, const double bottom, const int height)
            {
                // Our arbitrarily chosen chunk_size
                // Smaller chunk_size allows for the work to be distributed more evenly amongst the threads, however, this also increases
                // thread synchronization overhead
                static constexpr int chunk_size = 20;

                // If nothing to be done, exit the thread
                while (work_queue * chunk_size < height)
                {
                    int part = 0;
                    {
                        // Lock our sychronization mutex
                        // This ensures that only one thread can read/write to the work_queue at the same time
                        std::lock_guard<std::mutex> guard(queue_mutex);

                        // Take the next chunk of work to be done and increment the counter
                        part = work_queue++;

                        // Again, if nothing to be done, exit the thread
                        if (part >= height)
                            break;
                    }

                    for (int y = part * chunk_size; y < part * chunk_size + chunk_size && y < height; ++y)
                    {
                        for (int x = 0; x < WIDTH; ++x)
                        {
                            // Work out the point in the complex plane that
                            // corresponds to this pixel in the output image.
                            complex<double> c(left + (x * (right - left) / WIDTH),
                                top + (y * (bottom - top) / HEIGHT));

                            // Start off z at (0, 0).
                            complex<double> z(0.0, 0.0);

                            // Iterate z = z^2 + c until z moves more than 2 units
                            // away from (0, 0), or we've iterated too many times.
                            int iterations = 0;
                            while (abs(z) < 2.0 && iterations < MAX_ITERATIONS)
                            {
                                z = (z * z) + c;

                                ++iterations;
                            }

                            if (iterations == MAX_ITERATIONS)
                            {
                                // z didn't escape from the circle.
                                // This point is in the Mandelbrot set.
                                image[y][x] = 0x000000; // black
                            }
                            else
                            {
                                // z escaped within less than MAX_ITERATIONS
                                // iterations. This point isn't in the set.
                                image[y][x] = 0xFFFFFF; // white
                            }
                        }
                    }
                }
            }, -2.0, 1.0, 1.125, -1.125, HEIGHT));
        }

        //Join threads
        for (auto &t : threads)
            t.join();

我试图确定如何使它成为两个线程而不是一个线程。但是，甚至无法考虑将条件变量放在何处。这可能不是通常在堆栈溢出时发生的情况，而是严重卡住了，不希望通过一堆仇恨者获得大量支持，只是试图解决此问题。您将如何将一个线程分为两个，并实现一个条件变量，兴奋地听到您的人们对这个项目的想法

Answer 1

这似乎很适合OpenMP，它允许您通过注释源代码来告诉编译器，可以并行执行任何特定算法的哪些位。

我相信大多数主流编译器都支持它（您未声明平台），包括MSVC。

想接受它并运行它吗？网路上有许多与此相关的资讯来源，包括this tutorial at openmp.org。

Mandelbrot从一遍又一遍地使用的线程更改为两个线程

1 个答案: