ý早些时候看到了这段代码,并一直在尝试对其进行改进,因为我认为它可以在多个线程上更好地工作,而不是一直使用它,并且我认为条件变量会有所改善,因为它将启用线程之间的信号传递。我已经剪切了一部分代码,并将其放在底部(因此,从技术上讲,它已突出显示,请忽略其余的跟踪变量期望):
// Import things we need from the standard library
using std::chrono::duration_cast;
using std::chrono::milliseconds;
using std::complex;
using std::cout;
using std::endl;
using std::ofstream;
// Define the alias "the_clock" for the clock type we're going to use.
typedef std::chrono::steady_clock the_clock;
// The size of the image to generate.
const int WIDTH = 1920;
const int HEIGHT = 1080;
// The number of times to iterate before we assume that a point isn't in the
// Mandelbrot set.
// (You may need to turn this up if you zoom further into the set.)
const int MAX_ITERATIONS = 500;
// The image data.
// Each pixel is represented as 0xRRGGBB.
uint32_t image[HEIGHT][WIDTH];
// Write the image to a TGA file with the given name.
// Format specification: http://www.gamers.org/dEngine/quake3/TGA.txt
void write_tga(const char *filename)
{
ofstream outfile(filename, ofstream::binary);
uint8_t header[18] = {
0, // no image ID
0, // no colour map
2, // uncompressed 24-bit image
0, 0, 0, 0, 0, // empty colour map specification
0, 0, // X origin
0, 0, // Y origin
WIDTH & 0xFF, (WIDTH >> 8) & 0xFF, // width
HEIGHT & 0xFF, (HEIGHT >> 8) & 0xFF, // height
24, // bits per pixel
0, // image descriptor
};
outfile.write((const char *)header, 18);
for (int y = 0; y < HEIGHT; ++y)
{
for (int x = 0; x < WIDTH; ++x)
{
uint8_t pixel[3] = {
image[y][x] & 0xFF, // blue channel
(image[y][x] >> 8) & 0xFF, // green channel
(image[y][x] >> 16) & 0xFF, // red channel
};
outfile.write((const char *)pixel, 3);
}
}
outfile.close();
if (!outfile)
{
// An error has occurred at some point since we opened the file.
cout << "Error writing to " << filename << endl;
exit(1);
}
}
// Render the Mandelbrot set into the image array.
// The parameters specify the region on the complex plane to plot.
void compute_mandelbrot(const double left, const double right, const double top, const double bottom)
{
for (int y = 0; y < HEIGHT; ++y)
{
for (int x = 0; x < WIDTH; ++x)
{
// Work out the point in the complex plane that
// corresponds to this pixel in the output image.
complex<double> c(left + (x * (right - left) / WIDTH),
top + (y * (bottom - top) / HEIGHT));
// Start off z at (0, 0).
complex<double> z(0.0, 0.0);
// Iterate z = z^2 + c until z moves more than 2 units
// away from (0, 0), or we've iterated too many times.
int iterations = 0;
while (abs(z) < 2.0 && iterations < MAX_ITERATIONS)
{
z = (z * z) + c;
++iterations;
}
if (iterations == MAX_ITERATIONS)
{
// z didn't escape from the circle.
// This point is in the Mandelbrot set.
image[y][x] = 0x000000; // black
}
else
{
// z escaped within less than MAX_ITERATIONS
// iterations. This point isn't in the set.
image[y][x] = 0xFFFFFF; // white
}
}
}
}
int main(int argc, char *argv[])
{
cout << "Processor logical cores: " << std::thread::hardware_concurrency() << endl;
cout << "Please wait..." << endl;
// Sequential implementation
if(true)
{
cout << "Sequential code..." << endl;
// Start timing
the_clock::time_point start = the_clock::now();
// This shows the whole set.
compute_mandelbrot(-2.0, 1.0, 1.125, -1.125);
// This zooms in on an interesting bit of detail.
//compute_mandelbrot(-0.751085, -0.734975, 0.118378, 0.134488);
// Stop timing
the_clock::time_point end = the_clock::now();
// Compute the difference between the two times in milliseconds
auto time_taken = duration_cast<milliseconds>(end - start).count();
cout << "Computing the Mandelbrot set took " << time_taken << " ms." << endl;
}
cout << endl;
// Parallel implementation - method 1
if(true)
{
cout << "Parallel using C++ threads - method 1..." << endl;
// Test with various thread numbers
for (int threads_count = 1; threads_count < 15; ++threads_count)
{
// Start timing
const the_clock::time_point start = the_clock::now();
// Threads vector
std::vector<std::thread> threads;
const int chunk_size = HEIGHT / threads_count;
int y1 = 0;
int y2 = chunk_size;
for (int i = 0; i < threads_count; ++i)
{
// Divide work into equal area chunks, then let each thread calculate its part
y1 = i * chunk_size;
y2 = y1 + chunk_size;
if (y2 > HEIGHT) y2 = HEIGHT;
// Start the thread - compute_mandelbrot takes area to be calculated by a given thread through [y1, y2)
threads.push_back(std::thread([y1, y2](const double left, const double right, const double top, const double bottom)
{
for (int y = y1; y < y2; ++y)
{
for (int x = 0; x < WIDTH; ++x)
{
// Work out the point in the complex plane that
// corresponds to this pixel in the output image.
complex<double> c(left + (x * (right - left) / WIDTH),
top + (y * (bottom - top) / HEIGHT));
// Start off z at (0, 0).
complex<double> z(0.0, 0.0);
// Iterate z = z^2 + c until z moves more than 2 units
// away from (0, 0), or we've iterated too many times.
int iterations = 0;
while (abs(z) < 2.0 && iterations < MAX_ITERATIONS)
{
z = (z * z) + c;
++iterations;
}
if (iterations == MAX_ITERATIONS)
{
// z didn't escape from the circle.
// This point is in the Mandelbrot set.
image[y][x] = 0x000000; // black
}
else
{
// z escaped within less than MAX_ITERATIONS
// iterations. This point isn't in the set.
image[y][x] = 0xFFFFFF; // white
}
}
}
}, -2.0, 1.0, 1.125, -1.125));
}
//Join threads
for (auto &t : threads)
t.join();
// Stop timing
const the_clock::time_point end = the_clock::now();
// Compute the difference between the two times in milliseconds
const auto time_taken = duration_cast<milliseconds>(end - start).count();
cout << "Threads: " << threads_count << ", time: " << time_taken << " ms." << endl;
}
}
cout << endl;
// Parallel implementation - method 2
if (true)
{
cout << "Parallel using C++ threads - method 2..." << endl;
// Test with various thread numbers
for (int threads_count = 1; threads_count < 15; ++threads_count)
{
// Start timing
const the_clock::time_point start = the_clock::now();
// Indicates next chunk of work to be done
// Using std::atomic allows synchronized access when checking the exit condition of the while loop in the thread function
std::atomic<int> work_queue(0);
// To allow synchronized read/write to work_queue
// Used to prevent race condition between the threads when accessing work_queue
// It is protecting work_queue to be accessed only by one of the threads at a time
std::mutex queue_mutex;
// Threads vector
std::vector<std::thread> threads;
//Lauch #parts threads
for (int i = 0; i < threads_count; ++i)
{
// Start the thread - work_queue points to the next chunk of data to be calculated
threads.push_back(std::thread([&work_queue, &queue_mutex](const double left, const double right, const double top, const double bottom, const int height)
{
// Our arbitrarily chosen chunk_size
// Smaller chunk_size allows for the work to be distributed more evenly amongst the threads, however this also increases
// thread synchronization overhead
static constexpr int chunk_size = 20;
// If nothing to be done, exit the thread
while (work_queue * chunk_size < height)
{
int part = 0;
{
// Lock our sychronization mutex
// This ensures that only one thread can read/write to the work_queue at the same time
std::lock_guard<std::mutex> guard(queue_mutex);
// Take the next chunk of work to be done and increment the counter
part = work_queue++;
// Again, if nothing to be done, exit the thread
if (part >= height)
break;
}
for (int y = part * chunk_size; y < part * chunk_size + chunk_size && y < height; ++y)
{
for (int x = 0; x < WIDTH; ++x)
{
// Work out the point in the complex plane that
// corresponds to this pixel in the output image.
complex<double> c(left + (x * (right - left) / WIDTH),
top + (y * (bottom - top) / HEIGHT));
// Start off z at (0, 0).
complex<double> z(0.0, 0.0);
// Iterate z = z^2 + c until z moves more than 2 units
// away from (0, 0), or we've iterated too many times.
int iterations = 0;
while (abs(z) < 2.0 && iterations < MAX_ITERATIONS)
{
z = (z * z) + c;
++iterations;
}
if (iterations == MAX_ITERATIONS)
{
// z didn't escape from the circle.
// This point is in the Mandelbrot set.
image[y][x] = 0x000000; // black
}
else
{
// z escaped within less than MAX_ITERATIONS
// iterations. This point isn't in the set.
image[y][x] = 0xFFFFFF; // white
}
}
}
}
}, -2.0, 1.0, 1.125, -1.125, HEIGHT));
}
//Join threads
for (auto &t : threads)
t.join();
// Stop timing
const the_clock::time_point end = the_clock::now();
// Compute the difference between the two times in milliseconds
const auto time_taken = duration_cast<milliseconds>(end - start).count();
cout << "Threads: " << threads_count << ", time: " << time_taken << " ms." << endl;
}
}
write_tga("output.tga");
system("pause");
return 0;
这是正在编辑的主要部分
// Parallel implementation - method 2
if (true)
{
cout << "Parallel using C++ threads - method 2..." << endl;
// Test with various thread numbers
for (int threads_count = 1; threads_count < 15; ++threads_count)
{
// Start timing
const the_clock::time_point start = the_clock::now();
// Indicates next chunk of work to be done
// Using std::atomic allows synchronized access when checking the exit condition of the while loop in the thread function
std::atomic<int> work_queue(0);
// To allow synchronized read/write to work_queue
// Used to prevent a race condition between the threads when accessing work_queue
// It is protecting work_queue to be accessed only by one of the threads at a time
std::mutex queue_mutex;
// Threads vector
std::vector<std::thread> threads;
//Lauch #parts threads
for (int i = 0; i < threads_count; ++i)
{
// Start the thread - work_queue points to the next chunk of data to be calculated
threads.push_back(std::thread([&work_queue, &queue_mutex](const double left, const double right, const double top, const double bottom, const int height)
{
// Our arbitrarily chosen chunk_size
// Smaller chunk_size allows for the work to be distributed more evenly amongst the threads, however, this also increases
// thread synchronization overhead
static constexpr int chunk_size = 20;
// If nothing to be done, exit the thread
while (work_queue * chunk_size < height)
{
int part = 0;
{
// Lock our sychronization mutex
// This ensures that only one thread can read/write to the work_queue at the same time
std::lock_guard<std::mutex> guard(queue_mutex);
// Take the next chunk of work to be done and increment the counter
part = work_queue++;
// Again, if nothing to be done, exit the thread
if (part >= height)
break;
}
for (int y = part * chunk_size; y < part * chunk_size + chunk_size && y < height; ++y)
{
for (int x = 0; x < WIDTH; ++x)
{
// Work out the point in the complex plane that
// corresponds to this pixel in the output image.
complex<double> c(left + (x * (right - left) / WIDTH),
top + (y * (bottom - top) / HEIGHT));
// Start off z at (0, 0).
complex<double> z(0.0, 0.0);
// Iterate z = z^2 + c until z moves more than 2 units
// away from (0, 0), or we've iterated too many times.
int iterations = 0;
while (abs(z) < 2.0 && iterations < MAX_ITERATIONS)
{
z = (z * z) + c;
++iterations;
}
if (iterations == MAX_ITERATIONS)
{
// z didn't escape from the circle.
// This point is in the Mandelbrot set.
image[y][x] = 0x000000; // black
}
else
{
// z escaped within less than MAX_ITERATIONS
// iterations. This point isn't in the set.
image[y][x] = 0xFFFFFF; // white
}
}
}
}
}, -2.0, 1.0, 1.125, -1.125, HEIGHT));
}
//Join threads
for (auto &t : threads)
t.join();
我试图确定如何使它成为两个线程而不是一个线程。但是,甚至无法考虑将条件变量放在何处。这可能不是通常在堆栈溢出时发生的情况,而是严重卡住了,不希望通过一堆仇恨者获得大量支持,只是试图解决此问题。您将如何将一个线程分为两个,并实现一个条件变量,兴奋地听到您的人们对这个项目的想法
答案 0 :(得分:0)
这似乎很适合OpenMP,它允许您通过注释源代码来告诉编译器,可以并行执行任何特定算法的哪些位。
我相信大多数主流编译器都支持它(您未声明平台),包括MSVC。
想接受它并运行它吗?网路上有许多与此相关的资讯来源,包括this tutorial at openmp.org。