我正在进行模拟云(实际云),其中云被3D点模拟,然后投影到2D热图,大约640x480单位。点数大约是50k,这是我可以在没有模拟破坏的情况下尽可能小的但是我似乎找不到以任何速度执行此操作的方法(通常需要3-5秒的运行时间)
我想我的问题是,普通计算机能够做到这一点是否可行?我通常低估了现在计算机的速度,但在这种情况下我可能会高估它们。我还没有对模拟进行优化,但如果不能实现模拟,那么现在知道并省去麻烦是件好事。
如果有可能,是否有任何技术可能对从点数据到热图的转换速度足以每秒更新60次有用?它实际上只是查看点数据并在转换后将结果写入2D数组,因此我认为它主要与内存查找绑定。
答案 0 :(得分:6)
这绝对是可行的,即使计算是由CPU完成的。理想情况下,您应该使用GPU。所需的API可以是OpenCL,也可以在渲染结果时使用Compute Shaders。
这两种技术都允许您编写一个适用于单个元素(点)的小程序(着色器)。这些都可以在GPU上并行运行,这样可以让它们运行得非常快。
答案 1 :(得分:4)
是,如果您的数据已在内存中预先计算
只需尝试使用SDL纹理(或直接使用OpenGL纹理,这是SDL使用的):
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <SDL2/SDL.h>
#define COLOR_MAX 255
double common_get_secs(void) {
struct timespec ts;
timespec_get(&ts, TIME_UTC);
return ts.tv_sec + (1e-9 * ts.tv_nsec);
}
const double COMMON_FPS_GRANULARITY_S = 0.5;
double common_fps_last_time_s;
unsigned int common_fps_nframes;
void common_fps_init() {
common_fps_nframes = 0;
common_fps_last_time_s = common_get_secs();
}
void common_fps_update_and_print() {
double dt, current_time_s;
current_time_s = common_get_secs();
common_fps_nframes++;
dt = current_time_s - common_fps_last_time_s;
if (dt > COMMON_FPS_GRANULARITY_S) {
printf("FPS = %f\n", common_fps_nframes / dt);
common_fps_last_time_s = current_time_s;
common_fps_nframes = 0;
}
}
int main(void) {
SDL_Event event;
SDL_Renderer *renderer = NULL;
SDL_Texture *texture = NULL;
SDL_Window *window = NULL;
Uint8 *base;
int pitch;
void *pixels = NULL;
const unsigned int
WINDOW_WIDTH = 500,
WINDOW_HEIGHT = WINDOW_WIDTH;
const double
SPEED = WINDOW_WIDTH / 10.0,
CENTER_X = WINDOW_WIDTH / 2.0,
CENTER_Y = WINDOW_HEIGHT / 2.0,
PERIOD = WINDOW_WIDTH / 10.0,
PI2 = 2.0 * acos(-1.0);
double dt, initial_time;
float z;
unsigned int x, xc, y, yc;
SDL_Init(SDL_INIT_TIMER | SDL_INIT_VIDEO);
SDL_CreateWindowAndRenderer(WINDOW_WIDTH, WINDOW_WIDTH, 0, &window, &renderer);
texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888,
SDL_TEXTUREACCESS_STREAMING, WINDOW_WIDTH, WINDOW_HEIGHT);
initial_time = common_get_secs();
common_fps_init();
while (1) {
dt = common_get_secs() - initial_time;
SDL_LockTexture(texture, NULL, &pixels, &pitch);
for (x = 0; x < WINDOW_WIDTH; x++) {
for (y = 0; y < WINDOW_HEIGHT; y++) {
xc = CENTER_X - x;
yc = CENTER_Y - y;
/*z = COLOR_MAX * 0.5 * (1.0 + (sin(PI2 * (sqrt(xc*xc + yc*yc) - SPEED * dt) / PERIOD)));*/
z = (int)(x + y + SPEED * dt) % COLOR_MAX;
base = ((Uint8 *)pixels) + (4 * (x * WINDOW_WIDTH + y));
base[0] = 0;
base[1] = 0;
base[2] = z;
base[3] = COLOR_MAX;
}
}
SDL_UnlockTexture(texture);
SDL_RenderCopy(renderer, texture, NULL, NULL);
SDL_RenderPresent(renderer);
common_fps_update_and_print();
if (SDL_PollEvent(&event) && event.type == SDL_QUIT)
break;
}
SDL_DestroyRenderer(renderer);
SDL_DestroyWindow(window);
SDL_Quit();
return EXIT_SUCCESS;
}
编译:
gcc -Wall -std=c11 heatmap.c -lSDL2 -lm
在Ubuntu 16.04上,更简单的计算:
z = (x + y + SPEED * dt) % COLOR_MAX
使用Nvidia NVS 5400M(2012年中期)在联想Thinkpad T430上达到300 FPS。
当然,对内存的预先计算结果会更快。
如果计算稍微复杂一点:
z = COLOR_MAX * 0.5 * (1.0 + (sin(PI2 * (sqrt(xc*xc + yc*yc) - SPEED * dt) / PERIOD)))
FPS只有30,所以我们看到限制很快成为计算。
如果你不能足够快地运行计算,你可能需要存储到磁盘而不会溢出内存,然后就是对你的磁盘+压缩方法(视频编解码器)进行基准测试。
片段着色器
如果你可以在片段着色器上运行你的计算,你可以实时做更复杂的事情。
使用以下代码,更复杂的计算运行 3k FPS !
但实施起来会更难,所以请确保你需要它。
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <SDL2/SDL.h>
#define GLEW_STATIC
#include <GL/glew.h>
static const GLuint WIDTH = 500;
static const GLuint HEIGHT = 500;
static const GLchar* vertex_shader_source =
"#version 120\n"
"attribute vec2 coord2d;\n"
"void main(void) {\n"
" gl_Position = vec4(coord2d, 0.0, 1.0);\n"
"}\n";
static const GLchar* fragment_shader_source =
"#version 120\n"
"uniform float pi2;\n"
"uniform float time;\n"
"uniform float width;\n"
"uniform float height;\n"
"uniform float periods_x;\n"
"uniform float periods_y;\n"
"void main(void) {\n"
" float center_x = width / 2.0;"
" float center_y = height / 2.0;"
" float x = (gl_FragCoord.x - center_x) * periods_x / width;"
" float y = (gl_FragCoord.y - center_y) * periods_y / height;"
" gl_FragColor[0] = 0.5 * (1.0 + (sin((pi2 * (sqrt(x*x + y*y) - time)))));\n"
" gl_FragColor[1] = 0.0;\n"
" gl_FragColor[2] = 0.0;\n"
"}\n";
static const GLfloat vertices[] = {
-1.0, 1.0,
1.0, 1.0,
1.0, -1.0,
-1.0, -1.0,
};
static const GLuint indexes[] = {
0, 2, 1,
0, 3, 2,
};
double common_get_secs(void) {
struct timespec ts;
timespec_get(&ts, TIME_UTC);
return ts.tv_sec + (1e-9 * ts.tv_nsec);
}
const double COMMON_FPS_GRANULARITY_S = 0.5;
double common_fps_last_time_s;
unsigned int common_fps_nframes;
void common_fps_init() {
common_fps_nframes = 0;
common_fps_last_time_s = common_get_secs();
}
void common_fps_update_and_print() {
double dt, current_time_s;
current_time_s = common_get_secs();
common_fps_nframes++;
dt = current_time_s - common_fps_last_time_s;
if (dt > COMMON_FPS_GRANULARITY_S) {
printf("FPS = %f\n", common_fps_nframes / dt);
common_fps_last_time_s = current_time_s;
common_fps_nframes = 0;
}
}
/* Copy paste. Upstream on OpenGL. */
GLint common_get_shader_program(
const char *vertex_shader_source,
const char *fragment_shader_source) {
GLchar *log = NULL;
GLint fragment_shader, log_length, program, success, vertex_shader;
/* Vertex shader */
vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex_shader, 1, &vertex_shader_source, NULL);
glCompileShader(vertex_shader);
glGetShaderiv(vertex_shader, GL_COMPILE_STATUS, &success);
glGetShaderiv(vertex_shader, GL_INFO_LOG_LENGTH, &log_length);
log = malloc(log_length);
if (log_length > 0) {
glGetShaderInfoLog(vertex_shader, log_length, NULL, log);
printf("vertex shader log:\n\n%s\n", log);
}
if (!success) {
printf("vertex shader compile error\n");
exit(EXIT_FAILURE);
}
/* Fragment shader */
fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment_shader, 1, &fragment_shader_source, NULL);
glCompileShader(fragment_shader);
glGetShaderiv(fragment_shader, GL_COMPILE_STATUS, &success);
glGetShaderiv(fragment_shader, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) {
log = realloc(log, log_length);
glGetShaderInfoLog(fragment_shader, log_length, NULL, log);
printf("fragment shader log:\n\n%s\n", log);
}
if (!success) {
printf("fragment shader compile error\n");
exit(EXIT_FAILURE);
}
/* Link shaders */
program = glCreateProgram();
glAttachShader(program, vertex_shader);
glAttachShader(program, fragment_shader);
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &success);
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
if (log_length > 0) {
log = realloc(log, log_length);
glGetProgramInfoLog(program, log_length, NULL, log);
printf("shader link log:\n\n%s\n", log);
}
if (!success) {
printf("shader link error");
exit(EXIT_FAILURE);
}
free(log);
glDeleteShader(vertex_shader);
glDeleteShader(fragment_shader);
return program;
}
int main(void) {
/* SDL variables. */
SDL_Event event;
SDL_Window *window;
SDL_GLContext gl_context;
const unsigned int WINDOW_WIDTH = 500, WINDOW_HEIGHT = WINDOW_WIDTH;
double dt, initial_time;
/* OpenGL variables. */
GLint
attribute_coord2d,
ibo_size,
width_location,
height_location,
time_location,
periods_x_location,
periods_y_location,
pi2_location,
program
;
GLuint ibo, vbo;
const char *attribute_name = "coord2d";
const float
periods_x = 10.0,
periods_y = 10.0,
pi2 = 2.0 * acos(-1.0)
;
/* SDL init. */
SDL_Init(SDL_INIT_TIMER | SDL_INIT_VIDEO);
window = SDL_CreateWindow(__FILE__, 0, 0,
WINDOW_WIDTH, WINDOW_HEIGHT, SDL_WINDOW_OPENGL);
gl_context = SDL_GL_CreateContext(window);
glewInit();
/* OpenGL init. */
{
program = common_get_shader_program(vertex_shader_source, fragment_shader_source);
attribute_coord2d = glGetAttribLocation(program, attribute_name);
if (attribute_coord2d == -1) {
fprintf(stderr, "error: attribute_coord2d: %s\n", attribute_name);
return EXIT_FAILURE;
}
height_location = glGetUniformLocation(program, "height");
periods_x_location = glGetUniformLocation(program, "periods_x");
periods_y_location = glGetUniformLocation(program, "periods_y");
pi2_location = glGetUniformLocation(program, "pi2");
time_location = glGetUniformLocation(program, "time");
width_location = glGetUniformLocation(program, "width");
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
glUseProgram(program);
glViewport(0, 0, WIDTH, HEIGHT);
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glGenBuffers(1, &ibo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indexes), indexes, GL_STATIC_DRAW);
glGetBufferParameteriv(GL_ELEMENT_ARRAY_BUFFER, GL_BUFFER_SIZE, &ibo_size);
glUniform1f(pi2_location, pi2);
glUniform1f(width_location, WIDTH);
glUniform1f(height_location, HEIGHT);
glUniform1f(periods_x_location, periods_x);
glUniform1f(periods_y_location, periods_y);
}
initial_time = common_get_secs();
common_fps_init();
while (1) {
dt = common_get_secs() - initial_time;
/* OpenGL draw. */
glClear(GL_COLOR_BUFFER_BIT);
glEnableVertexAttribArray(attribute_coord2d);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glVertexAttribPointer(attribute_coord2d, 2, GL_FLOAT, GL_FALSE, 0, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo);
glUniform1f(time_location, dt);
glDrawElements(GL_TRIANGLES, ibo_size / sizeof(indexes[0]), GL_UNSIGNED_INT, 0);
glDisableVertexAttribArray(attribute_coord2d);
common_fps_update_and_print();
SDL_GL_SwapWindow(window);
if (SDL_PollEvent(&event) && event.type == SDL_QUIT)
break;
}
/* OpenGL cleanup. */
glDeleteBuffers(1, &ibo);
glDeleteBuffers(1, &vbo);
glDeleteProgram(program);
/* SDL cleanup. */
SDL_GL_DeleteContext(gl_context);
SDL_DestroyWindow(window);
SDL_Quit();
return EXIT_SUCCESS;
}
然后:
gcc -Wall -std=c11 a.c -lSDL2 -lm -lGL -lGLEW
GitHub上游: