我的目标很简单:我想用C ++创建一个可以在屏幕上绘制数千个位图的渲染系统。我一直在尝试使用线程加速进程,但无济于事。在大多数情况下,我实际上通过使用多个线程来降低性能。我使用这个项目作为一种教育练习,不使用硬件加速。那就是说,我的问题是:
使用多个线程接受大量图像列表以在屏幕上绘制并以断线速度渲染它们的最佳方法是什么?我知道我将无法创建一个可以与硬件加速图形相媲美的系统,但我相信我的想法仍然可行,因为操作非常简单:将像素从一个存储位置复制到另一个存储位置。
我的渲染器设计使用三个核心blitting操作:位图图像的位置,旋转和缩放。我将它设置为仅在需要时旋转图像,并且仅在需要时缩放图像。
我已经完成了这个系统的几个设计。所有这些都太慢而无法完成工作(300 64x64位图,只有60fps)。
以下是我尝试过的设计:
这是我用来将位图互相blit的位图类:
class Bitmap
{
public:
Bitmap(int w, int h)
{
width = w;
height = h;
size = w * h;
pixels = new unsigned int[size];
}
virtual ~Bitmap()
{
if (pixels != 0)
{
delete[] pixels;
pixels = 0;
}
}
void blit(Bitmap *bmp, float x, float y, float rot, float sclx,
float scly)
{
// Position only
if (rot == 0 && sclx == 1 && scly == 1)
{
blitPos(bmp, x, y);
return;
}
// Rotate only
else if (rot != 0 && sclx == 1 && scly == 1)
{
blitRot(bmp, x, y, rot);
return;
}
// Scale only
else if (rot == 0 && (sclx != 1 || scly != 1))
{
blitScl(bmp, x, y, sclx, scly);
return;
}
/////////////////////////////////////////////////////////////////////////////
// If it is not one of those, you have to do all three... :D
/////////////////////////////////////////////////////////////////////////////
// Create a bitmap that is scaled to the new size.
Bitmap tmp((int)(bmp->width * sclx), (int)(bmp->height * scly));
// Find how much each pixel steps:
float step_x = (float)bmp->width / (float)tmp.width;
float step_y = (float)bmp->height / (float)tmp.height;
// Fill the scaled image with pixels!
float inx = 0;
int xOut = 0;
while (xOut < tmp.width)
{
float iny = 0;
int yOut = 0;
while (yOut < tmp.height)
{
unsigned int sample = bmp->pixels[
(int)(std::floor(inx) + std::floor(iny) * bmp->width)
];
tmp.drawPixel(xOut, yOut, sample);
iny += step_y;
yOut++;
}
inx += step_x;
xOut++;
}
blitRot(&tmp, x, y, rot);
}
void drawPixel(int x, int y, unsigned int color)
{
if (x > width || y > height || x < 0 || y < 0)
return;
if (color == 0x00000000)
return;
int index = x + y * width;
if (index >= 0 && index <= size)
pixels[index] = color;
}
unsigned int getPixel(int x, int y)
{
return pixels[x + y * width];
}
void clear(unsigned int color)
{
std::fill(&pixels[0], &pixels[size], color);
}
private:
void blitPos(Bitmap *bmp, float x, float y)
{
// Don't draw if coordinates are already past edges
if (x > width || y > height || y + bmp->height < 0 || x + bmp->width < 0)
return;
int from;
int to;
int destfrom;
int destto;
for (int i = 0; i < bmp->height; i++)
{
from = i * bmp->width;
to = from + bmp->width;
//////// Caps
// Bitmap is being drawn past the right edge
if (x + bmp->width > width)
{
int cap = bmp->width - ((x + bmp->width) - width);
to = from + cap;
}
// Bitmap is being drawn past the left edge
else if (x + bmp->width < bmp->width)
{
int cap = bmp->width + x;
from += (bmp->width - cap);
to = from + cap;
}
//////// Destination Maths
if (x < 0)
{
destfrom = (y + i) * width;
destto = destfrom + (bmp->width + x);
}
else
{
destfrom = x + (y + i) * width;
destto = destfrom + bmp->width;
}
// Bitmap is being drawn past either top or bottom edges
if (y + i > height - 1)
{
continue;
}
if (destfrom > size || destfrom < 0)
{
continue;
}
memcpy(&pixels[destfrom], &bmp->pixels[from], sizeof(unsigned int) * (to - from));
}
}
void blitRot(Bitmap *bmp, float x, float y, float rot)
{
float sine = std::sin(-rot);
float cosine = std::cos(-rot);
int x1 = (int)(-bmp->height * sine);
int y1 = (int)(bmp->height * cosine);
int x2 = (int)(bmp->width * cosine - bmp->height * sine);
int y2 = (int)(bmp->height * cosine + bmp->width * sine);
int x3 = (int)(bmp->width * cosine);
int y3 = (int)(bmp->width * sine);
int minx = (int)std::min(0, std::min(x1, std::min(x2, x3)));
int miny = (int)std::min(0, std::min(y1, std::min(y2, y3)));
int maxx = (int)std::max(0, std::max(x1, std::max(x2, x3)));
int maxy = (int)std::max(0, std::max(y1, std::max(y2, y3)));
int w = maxx - minx;
int h = maxy - miny;
int srcx;
int srcy;
int dest_x;
int dest_y;
unsigned int color;
for (int sy = miny; sy < maxy; sy++)
{
for (int sx = minx; sx < maxx; sx++)
{
srcx = sx * cosine + sy * sine;
srcy = sy * cosine - sx * sine;
dest_x = x + sx;
dest_y = y + sy;
if (dest_x <= width - 1 && dest_y <= height - 1
&& dest_x >= 0 && dest_y >= 0)
{
color = 0;
// Only grab a pixel if it is inside of the src image
if (srcx < bmp->width && srcy < bmp->height && srcx >= 0 &&
srcy >= 0)
color = bmp->getPixel(srcx, srcy);
// Only this pixel if it is not completely transparent:
if (color & 0xFF000000)
// Only if the pixel is somewhere between 0 and the bmp size
if (0 < srcx < bmp->width && 0 < srcy < bmp->height)
drawPixel(x + sx, y + sy, color);
}
}
}
}
void blitScl(Bitmap *bmp, float x, float y, float sclx, float scly)
{
// Create a bitmap that is scaled to the new size.
int finalwidth = (int)(bmp->width * sclx);
int finalheight = (int)(bmp->height * scly);
// Find how much each pixel steps:
float step_x = (float)bmp->width / (float)finalwidth;
float step_y = (float)bmp->height / (float)finalheight;
// Fill the scaled image with pixels!
float inx = 0;
int xOut = 0;
float iny;
int yOut;
while (xOut < finalwidth)
{
iny = 0;
yOut = 0;
while (yOut < finalheight)
{
unsigned int sample = bmp->pixels[
(int)(std::floor(inx) + std::floor(iny) * bmp->width)
];
drawPixel(xOut + x, yOut + y, sample);
iny += step_y;
yOut++;
}
inx += step_x;
xOut++;
}
}
public:
int width;
int height;
int size;
unsigned int *pixels;
};
以下是一些显示我尝试过的最新方法的代码:保存所有说明,然后在收到所有指示后将其提供给工人:
class Instruction
{
public:
Instruction() {}
Instruction(Bitmap* out, Bitmap* in, float x, float y, float rot,
float sclx, float scly)
: outbuffer(out), inbmp(in), x(x), y(y), rot(rot),
sclx(sclx), scly(scly)
{ }
~Instruction()
{
outbuffer = nullptr;
inbmp = nullptr;
}
public:
Bitmap* outbuffer;
Bitmap* inbmp;
float x, y, rot, sclx, scly;
};
图层类:
class Layer
{
public:
bool empty()
{
return instructions.size() > 0;
}
public:
std::vector<Instruction> instructions;
int pixel_count;
};
工作线程类:
class Worker
{
public:
void start()
{
done = false;
work_thread = std::thread(&Worker::processData, this);
}
void processData()
{
while (true)
{
controller.lock();
if (done)
{
controller.unlock();
break;
}
if (!layers.empty())
{
for (int i = 0; i < layers.size(); i++)
{
for (int j = 0; j < layers[i].instructions.size(); j++)
{
Instruction* inst = &layers[i].instructions[j];
inst->outbuffer->blit(inst->inbmp, inst->x, inst->y, inst->rot, inst->sclx, inst->scly);
}
}
layers.clear();
}
controller.unlock();
}
}
void finish()
{
done = true;
}
public:
bool done;
std::thread work_thread;
std::mutex controller;
std::vector<Layer> layers;
};
最后,渲染管理器类:
class RenderManager
{
public:
RenderManager()
{
workers.reserve(std::thread::hardware_concurrency());
for (int i = 0; i < 1; i++)
{
workers.emplace_back();
workers.back().start();
}
}
void layer()
{
layers.push_back(current_layer);
current_layer = Layer();
}
void blit(Bitmap* out, Bitmap* in, float x, float y, float rot, float sclx, float scly)
{
current_layer.instructions.emplace_back(out, in, x, y, rot, sclx, scly);
}
void processInstructions()
{
if (layers.empty())
layer();
lockall();
int index = 0;
for (int i = 0; i < layers.size(); i++)
{
// Evenly distribute the layers in a round-robin fashion
Layer l = layers[i];
workers[index].layers.push_back(layers[i]);
index++;
if (index >= workers.size()) index = 0;
}
layers.clear();
unlockall();
}
void lockall()
{
for (int i = 0; i < workers.size(); i++)
{
workers[i].controller.lock();
}
}
void unlockall()
{
for (int i = 0; i < workers.size(); i++)
{
workers[i].controller.unlock();
}
}
void finish()
{
// Wait until every worker is done rendering
lockall();
// At this point, we know they have nothing more to draw
unlockall();
}
void endRendering()
{
for (int i = 0; i < workers.size(); i++)
{
// Send each one an exit code
workers[i].finish();
}
// Let the workers finish and then return
for (int i = 0; i < workers.size(); i++)
{
workers[i].work_thread.join();
}
}
private:
std::vector<Worker> workers;
std::vector<Layer> layers;
Layer current_layer;
};
以下是我尝试的第三种方法的屏幕截图,结果如下: Sending packages of draw instructions
真正有用的是,如果有人可以简单地指出我应该尝试什么方法的正确方向。我已经尝试过这四种方法但都失败了,所以我站在那些比我更有帮助的人面前。房间里最不聪明的人是那个不问问题的人,因为他的骄傲不允许。请注意,这是我在Stack Overflow上的第一个问题。