我刚刚发现了SDL的奇怪行为。
我编写了一个简单的粒子渲染器,由于某些原因,使用软件渲染器的速度比使用硬件渲染器的速度快6倍。
这是源代码:
的main.cpp
#define _USE_MATH_DEFINES
#include <iostream>
#include <cstdlib>
#include <Windows.h>
#include <vector>
#include <math.h>
#include <time.h>
#include <SDL.h>
#include "Particle.h"
const int SCREEN_WIDTH = 1024;
const int SCREEN_HEIGHT = 600;
const int PARTICLE_NUMBER = 50000;
const int MAX_SPEED = 200;
const int MIN_SPEED = 5;
long long getMs (void) {
SYSTEMTIME stime;
GetLocalTime(&stime);
long long ms = stime.wMilliseconds +
stime.wSecond * 1000 +
stime.wMinute * 60000 +
stime.wHour * 3600000 +
stime.wDay * 86400000 +
stime.wMonth * 2592000000 +
(stime.wYear - 1970) * 31104000000;
return ms;
}
int main(int argc, char *argv[])
{
bool hardwareAccelerated = true;
if (argc == 2)
{
if (strncmp(argv[1], "-software", 9) == 0)
{
hardwareAccelerated = false;
}
}
char title [100];
sprintf(title, "Particles: %d - (%s)", PARTICLE_NUMBER, (hardwareAccelerated ? "HARDWARE ACCELERATED" : "SOFTWARE RENDERING"));
Particle<double> *particles = (Particle<double>*) malloc(sizeof(Particle<double>) * PARTICLE_NUMBER);
for (int i = 0; i < PARTICLE_NUMBER; i++)
{
double x = rand() % SCREEN_WIDTH;
double y = rand() % SCREEN_HEIGHT;
double direction = (((double) rand() / (double) RAND_MAX) - 0.5f) * 2 * M_PI;
double speed = rand() % (MAX_SPEED - MIN_SPEED) + MIN_SPEED;
(particles+i)->setPos(x, y);
(particles+i)->setDirection(direction);
(particles+i)->setSpeed(speed);
// std::cout << (particles+i) << std::endl;
}
if (SDL_Init(SDL_INIT_EVERYTHING) != 0) {
return 1;
}
SDL_Window *window = SDL_CreateWindow(title,
SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
SCREEN_WIDTH, SCREEN_HEIGHT, SDL_WINDOW_SHOWN);
if (window == nullptr) {
return 2;
}
SDL_RendererFlags flags = (hardwareAccelerated ? SDL_RENDERER_ACCELERATED : SDL_RENDERER_SOFTWARE);
SDL_Renderer *renderer = SDL_CreateRenderer(window, -1,
flags);
if (renderer == nullptr) {
return 3;
}
bool quit = false;
SDL_Event evt;
long long lastFrame = getMs();
double delta = 0.f;
while (!quit)
{
long long currentTime = getMs();
delta = currentTime - lastFrame;
lastFrame = currentTime;
std::cout << "delta: " << delta << std::endl;
while(SDL_PollEvent(&evt) != 0)
{
if (evt.type == SDL_QUIT)
{
quit = true;
}
}
SDL_SetRenderDrawColor(renderer, 0,0,0,1);
SDL_RenderClear(renderer);
SDL_SetRenderDrawColor(renderer, 255,0,0,255);
for (int i = 0; i < PARTICLE_NUMBER; i++)
{
(particles+i)->tick(delta);
double *pos = (particles+i)->getPos();
SDL_RenderDrawPoint(renderer, pos[0], pos[1]);
}
SDL_RenderPresent(renderer);
}
SDL_DestroyRenderer(renderer);
SDL_DestroyWindow(window);
SDL_Quit();
return 0;
}
particle.h
#ifndef _H_PARTICLE
#define _H_PARTICLE
#include <math.h>
template <class T>
class Particle
{
public:
Particle(void);
void tick(double);
void setPos(T, T);
T* getPos(void);
void setDirection(double);
double getDirection(void);
void setSpeed(T);
T getSpeed(void);
~Particle(void);
private:
T x;
T y;
T speed;
double direction;
};
template <class T>
Particle<T>::Particle(void)
{
}
template <class T>
void Particle<T>::tick(double delta)
{
double dt = delta / 1000;
T d_speed = this->speed * dt;
// std::cout << d_speed << std::endl;
this->x += cos(this->direction) * d_speed;
this->y += sin(this->direction) * d_speed;
if (this->x > SCREEN_WIDTH) this->x = 0;
if (this->y > SCREEN_HEIGHT) this->y = 0;
if (this->x < 0) this->x = SCREEN_WIDTH;
if (this->y < 0) this->y = SCREEN_HEIGHT;
}
template <class T>
void Particle<T>::setPos(T x, T y)
{
this->x = x;
this->y = y;
}
template <class T>
T* Particle<T>::getPos(void)
{
T pos[2];
pos[0] = this->x;
pos[1] = this->y;
return pos;
}
template <class T>
void Particle<T>::setDirection(double direction)
{
this->direction = direction;
}
template <class T>
double Particle<T>::getDirection(void)
{
return this->direction;
}
template <class T>
void Particle<T>::setSpeed(T speed)
{
this->speed = speed;
}
template <class T>
T Particle<T>::getSpeed(void)
{
return this->speed;
}
template <class T>
Particle<T>::~Particle(void)
{
}
#endif
为什么会这样?硬件渲染器不应该比软件渲染器快得多吗?
答案 0 :(得分:1)
SDL_RenderDrawPoint()
调用SDL_RenderDrawPoints()
,但计数为1。
SDL_RenderDrawPoints()
在呈现所需的点数之前调用SDL_stack_alloc()
,并在完成时调用SDL_stack_free()
。那可能是你的问题。您正在为系统中的每个粒子(每帧)执行malloc
和free
。
我认为退役忍者有正确的想法 - 改为使用SDL_RenderDrawPoints()
,每帧只做一次malloc
和free
。
或者 - 使用不同的范例。创建一次SDL_Surface。在每个帧中,您可以对所需的所有像素进行blit(通过对特定像素执行SDL_Surface
直接操作SDL_MapRGB()
的像素存储器),然后在渲染时,转换{{1移动到SDL_Surface
并将 呈现给渲染器。
一些示例代码 - 如果SDL_Texture
是一个类并且包含指向Particle
的指针,那么您可以使用如下所示的绘图函数:
SDL_Surface
void Particle::draw()
{
Uint32 x = m_position.getX();
Uint32 y = m_position.getY();
Uint32 * pixel = (Uint32*)m_screen->pixels+(y*(m_pitch/4))+x;
Uint8 r1 = 0;
Uint8 g1 = 0;
Uint8 b1 = 0;
Uint8 a1 = 0;
GFX_RGBA_FROM_PIXEL(*pixel, m_screen->format, &r1, &g1, &b1, &a1);
Uint32 * p = (Uint32*)m_screen->pixels+(y*(m_pitch/4))+x;
*p = SDL_MapRGB(m_screen->format, m_r, m_g, m_b);
}
在哪里(从Andreas Schiffler的SDL2_gfx库中被盗)并被定义为:
GFX_RGBA_FROM_PIXEL
它可能会更快。我没有做过任何时间测试,但它可能是值得的,因为你直接操纵像素存储器的颜色,然后简单地每帧测试。您没有执行任何///////////////////////////////////////////////////////////////////
void GFX_RGBA_FROM_PIXEL(Uint32 pixel, SDL_PixelFormat * fmt, Uint8* r, Uint8* g, Uint8* b, Uint8* a)
{
*r = ((pixel&fmt->Rmask)>>fmt->Rshift)<<fmt->Rloss;
*g = ((pixel&fmt->Gmask)>>fmt->Gshift)<<fmt->Gloss;
*b = ((pixel&fmt->Bmask)>>fmt->Bshift)<<fmt->Bloss;
*a = ((pixel&fmt->Amask)>>fmt->Ashift)<<fmt->Aloss;
}
或malloc
s。