我目前正在为科学应用程序编写3D可视化工具,但我遇到了性能问题。 我有一个相对较大的网格(~1000行×1000列),对于这个网格的每个点,我都有一个物理值,我想表示为高度(例如:温度)。
以下是我尝试用白高斯噪音绘制的示例: http://i.stack.imgur.com/KM23m.jpg
我正在使用DirectX 9绘制我的场景。我基本上绘制了一堆三角形,其中(X,Y)坐标是网格上的一个点,Z坐标是该点的物理测量。
以下是我在每一帧上所做的操作:
我的问题是帧速率没有预期的那么高。 当我想要至少60fps时,我在Nvidia GTX 770(& Intel Core i7 4770k,如果这很重要)上绘制的~2百万个三角形达到~30fps。
有没有更好的方法来做我正在做的事情或我的问题是三角形的数量是否太大? 如果我使用DirectX 11,我会获得更好的性能吗?
感谢您的帮助。
此处编辑是一个独立的简化代码:
#include <windows.h>
#define _USE_MATH_DEFINES
#include <math.h>
#include <windowsx.h>
#include <d3d9.h>
#include <vector>
#include <random>
#include <fcntl.h>
#include <io.h>
#define SCREEN_WIDTH 800
#define SCREEN_HEIGHT 600
#define COLORMAPSIZE 256
#pragma comment (lib, "d3d9.lib")
#define DEG2RAD(x) (x* (float)M_PI/180.0f)
#define CUSTOMFVF (D3DFVF_XYZ | D3DFVF_DIFFUSE)
std::default_random_engine randGenerator;
// global declarations
LPDIRECT3D9 d3d;
LPDIRECT3DDEVICE9 d3ddev;
const int Nrows = 1000, Ncols = 2000;
float indexAz=DEG2RAD(90), indexEl = DEG2RAD(60), distance=80;
const float dataAmplitude = 5.f;
std::vector<float> dataBuffer;
typedef struct D3DXVECTOR3 : public D3DVECTOR
{
public:
FLOAT x,y,z;
D3DXVECTOR3() {};
D3DXVECTOR3( FLOAT xx, FLOAT yy, FLOAT zz ) : x(xx), y(yy), z(zz) {};
} D3DXVECTOR3, *LPD3DXVECTOR3;
typedef struct {
float x, y, z;
D3DCOLOR color;
} Vertex;
void initD3D(HWND hWnd);
void resetD3D(HWND hWnd);
void render_frame(void);
void cleanD3D(void);
void draw_graphics(void);
// the WindowProc function prototype
LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam);
void CreateConsole() {
CONSOLE_SCREEN_BUFFER_INFO consoleInfo;
int consoleHandleR, consoleHandleW ;
long stdioHandle;
FILE *fptr;
AllocConsole();
std::wstring strW = L"Dev Console";
SetConsoleTitleW( strW.c_str() );
EnableMenuItem(GetSystemMenu(GetConsoleWindow(), FALSE), SC_CLOSE , MF_GRAYED);
DrawMenuBar(GetConsoleWindow());
GetConsoleScreenBufferInfo( GetStdHandle(STD_OUTPUT_HANDLE), &consoleInfo );
stdioHandle = (long)GetStdHandle( STD_INPUT_HANDLE );
consoleHandleR = _open_osfhandle( stdioHandle, _O_TEXT );
fptr = _fdopen( consoleHandleR, "r" );
*stdin = *fptr;
setvbuf( stdin, NULL, _IONBF, 0 );
stdioHandle = (long) GetStdHandle( STD_OUTPUT_HANDLE );
consoleHandleW = _open_osfhandle( stdioHandle, _O_TEXT );
fptr = _fdopen( consoleHandleW, "w" );
*stdout = *fptr;
setvbuf( stdout, NULL, _IONBF, 0 );
stdioHandle = (long)GetStdHandle( STD_ERROR_HANDLE );
*stderr = *fptr;
setvbuf( stderr, NULL, _IONBF, 0 );
}
// Generate a random number following a uniform distribution
double rand(const double inf=0, const double sup=1) {
std::uniform_real_distribution<double> distribution(inf,sup);
return distribution(randGenerator);
}
// Update the buffer with new data
void UpdateDataBuffer()
{
static bool firstCall = true;
if (firstCall) //fill the whole buffer
{
for(unsigned k = 0 ; k < Nrows*Ncols ; k++)
dataBuffer[k] = (float)rand(0,dataAmplitude);
firstCall = false;
} else { // remove the first column, shift the whole buffer and update the last column
memmove(&dataBuffer[0], &dataBuffer[Nrows], (Ncols-1)*Nrows*sizeof(float));
for(unsigned k= Nrows*(Ncols-1) ; k < Nrows*Ncols ; k++)
dataBuffer[k] = (float)rand(0,dataAmplitude);
}
}
// the entry point for any Windows program
int WINAPI WinMain(HINSTANCE hInstance,
HINSTANCE hPrevInstance,
LPSTR lpCmdLine,
int nCmdShow)
{
CreateConsole();
randGenerator.seed( GetTickCount() );
dataBuffer.resize(Nrows * Ncols);
HWND hWnd;
WNDCLASSEX wc;
ZeroMemory(&wc, sizeof(WNDCLASSEX));
wc.cbSize = sizeof(WNDCLASSEX);
wc.style = CS_HREDRAW | CS_VREDRAW;
wc.lpfnWndProc = WindowProc;
wc.hInstance = hInstance;
wc.hCursor = LoadCursor(NULL, IDC_ARROW);
wc.lpszClassName = "WindowClass";
RegisterClassEx(&wc);
hWnd = CreateWindowEx(NULL, "WindowClass", "Our Direct3D Program",
WS_OVERLAPPEDWINDOW, 0, 0, SCREEN_WIDTH, SCREEN_HEIGHT,
NULL, NULL, hInstance, NULL);
ShowWindow(hWnd, nCmdShow);
initD3D(hWnd);
MSG msg;
LARGE_INTEGER frequency;
LARGE_INTEGER t1, t2;
float fps = 0.f;
float NFramAvg = 1.0f/10.0f, totalElapsedTime = 0.0f;
QueryPerformanceFrequency(&frequency);
while(TRUE)
{
while(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
{
TranslateMessage(&msg);
DispatchMessage(&msg);
}
if(msg.message == WM_QUIT)
break;
UpdateDataBuffer();
QueryPerformanceCounter(&t1);
render_frame();
QueryPerformanceCounter(&t2);
fps = fps - NFramAvg*fps + NFramAvg* frequency.QuadPart / (t2.QuadPart - t1.QuadPart);
totalElapsedTime += (t2.QuadPart - t1.QuadPart)*1000.0f / frequency.QuadPart;;
if (totalElapsedTime > 1000) {
printf("FPS = %g\n", fps);
totalElapsedTime = 0.0;
}
}
cleanD3D();
return msg.wParam;
}
// this is the main message handler for the program
LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
static int clickX, clickY;
static bool down = false;
switch(message)
{
case WM_LBUTTONDOWN:
if (!down)
{
clickX = GET_X_LPARAM(lParam);
clickY = GET_Y_LPARAM(lParam);
}
down = true;
break;
case WM_LBUTTONUP:
down = false;
break;
case WM_MOUSEMOVE:
if (down)
{
int dx = GET_X_LPARAM(lParam) - clickX;
int dy = GET_Y_LPARAM(lParam) - clickY;
indexAz += dx*DEG2RAD(0.5f);
if (indexEl + dy*DEG2RAD(0.5f) < M_PI_2 && indexEl + dy*DEG2RAD(0.5f) > -M_PI_2)
indexEl += dy*DEG2RAD(0.5f);
clickX += dx;
clickY += dy;
}
break;
case WM_MOUSEWHEEL:
{
int zDelta = GET_WHEEL_DELTA_WPARAM(wParam); //scroll power
distance -= 2*zDelta/120.f;
distance = max(1.0f, distance);
break;
}
case WM_DESTROY:
{
PostQuitMessage(0);
return 0;
} break;
case WM_SIZE:
if (d3ddev)
resetD3D(hWnd);
break;
}
return DefWindowProc (hWnd, message, wParam, lParam);
}
// this function initializes and prepares Direct3D for use
void initD3D(HWND hWnd)
{
d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS d3dpp;
RECT rect;
GetClientRect(hWnd, &rect);
ZeroMemory(&d3dpp, sizeof(d3dpp));
d3dpp.Windowed = TRUE;
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dpp.hDeviceWindow = hWnd;
d3dpp.BackBufferFormat = D3DFMT_X8R8G8B8;
d3dpp.BackBufferWidth = rect.right - rect.left;
d3dpp.BackBufferHeight = rect.bottom -rect.top;
d3dpp.EnableAutoDepthStencil = TRUE;
d3dpp.AutoDepthStencilFormat = D3DFMT_D16;
d3d->CreateDevice(D3DADAPTER_DEFAULT,
D3DDEVTYPE_HAL,
hWnd,
D3DCREATE_SOFTWARE_VERTEXPROCESSING,
&d3dpp,
&d3ddev);
d3ddev->SetRenderState(D3DRS_LIGHTING, FALSE); // turn off the 3D lighting
d3ddev->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); // turn off culling
d3ddev->SetRenderState(D3DRS_ZENABLE, TRUE); // turn on the z-buffer
}
void resetD3D(HWND hWnd)
{
D3DPRESENT_PARAMETERS d3dpp;
RECT rect;
GetClientRect(hWnd, &rect);
ZeroMemory(&d3dpp, sizeof(d3dpp));
d3dpp.Windowed = TRUE;
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dpp.hDeviceWindow = hWnd;
d3dpp.BackBufferFormat = D3DFMT_X8R8G8B8;
d3dpp.BackBufferWidth = rect.right - rect.left;
d3dpp.BackBufferHeight = rect.bottom -rect.top;
d3dpp.EnableAutoDepthStencil = TRUE;
d3dpp.AutoDepthStencilFormat = D3DFMT_D16;
if (d3dpp.BackBufferWidth && d3dpp.BackBufferHeight)
{
d3ddev->Reset(&d3dpp);
PAINTSTRUCT ps;
BeginPaint(hWnd, &ps);
EndPaint(hWnd, &ps);
InvalidateRect(hWnd, NULL, FALSE);
}
d3ddev->SetRenderState(D3DRS_LIGHTING, FALSE); // turn off the 3D lighting
d3ddev->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE); // turn off culling
d3ddev->SetRenderState(D3DRS_ZENABLE, TRUE); // turn on the z-buffer
}
D3DXVECTOR3 *D3DVec3Subtract(D3DXVECTOR3 *pOut, const D3DXVECTOR3 *pV1,const D3DXVECTOR3 *pV2)
{
pOut->x = pV1->x - pV2->x;
pOut->y = pV1->y - pV2->y;
pOut->z = pV1->z - pV2->z;
return pOut;
}
D3DXVECTOR3* D3DVec3Normalize(D3DXVECTOR3 *pOut,const D3DXVECTOR3 *pV)
{
FLOAT norm = sqrt( pV->x * pV->x + pV->y * pV->y + pV->z * pV->z );
pOut->x = pV->x / norm;
pOut->y = pV->y / norm;
pOut->z = pV->z / norm;
return pOut;
}
D3DXVECTOR3* D3DVec3Cross(D3DXVECTOR3 *pOut,const D3DXVECTOR3 *pV1,const D3DXVECTOR3 *pV2)
{
pOut->x = pV1->y*pV2->z - pV1->z*pV2->y;
pOut->y = pV1->z*pV2->x - pV1->x*pV2->z;
pOut->z = pV1->x*pV2->y - pV1->y*pV2->x;
return pOut;
}
FLOAT D3DVec3Dot(const D3DXVECTOR3 *pV1,const D3DXVECTOR3 *pV2)
{
return pV1->x * pV2->x + pV1->y * pV2->y + pV1->z * pV2->z;
}
D3DMATRIX* D3DMatrixLookAtLH(D3DMATRIX *pOut,const D3DXVECTOR3 *pEye,const D3DXVECTOR3 *pAt,const D3DXVECTOR3 *pUp)
{
D3DXVECTOR3 right, rightn, up, upn, vec, vec2;
D3DVec3Subtract(&vec2, pAt, pEye);
D3DVec3Normalize(&vec, &vec2);
D3DVec3Cross(&right, pUp, &vec);
D3DVec3Cross(&up, &vec, &right);
D3DVec3Normalize(&rightn, &right);
D3DVec3Normalize(&upn, &up);
pOut->m[0][0] = rightn.x;
pOut->m[1][0] = rightn.y;
pOut->m[2][0] = rightn.z;
pOut->m[3][0] = -D3DVec3Dot(&rightn,pEye);
pOut->m[0][1] = upn.x;
pOut->m[1][1] = upn.y;
pOut->m[2][1] = upn.z;
pOut->m[3][1] = -D3DVec3Dot(&upn, pEye);
pOut->m[0][2] = vec.x;
pOut->m[1][2] = vec.y;
pOut->m[2][2] = vec.z;
pOut->m[3][2] = -D3DVec3Dot(&vec, pEye);
pOut->m[0][3] = 0.0f;
pOut->m[1][3] = 0.0f;
pOut->m[2][3] = 0.0f;
pOut->m[3][3] = 1.0f;
return pOut;
}
D3DMATRIX* D3DXMatrixPerspectiveFovLH(D3DMATRIX *pOut, const FLOAT fovy, const FLOAT Aspect, const FLOAT zn, const FLOAT zf)
{
FLOAT yScale = tanf((float)M_PI_2 - fovy/2);
FLOAT xScale = yScale /Aspect;
memset(pOut,0, sizeof(*pOut));
pOut->_11 = xScale;
pOut->_22 = yScale;
pOut->_33 = zf/(zf-zn);
pOut->_34 = 1;
pOut->_43 = -zn*zf/(zf-zn);
return pOut;
}
long GetColor(const float x, const float inf, const float sup)
{
BYTE c =(BYTE)( 255 * (x-inf)/(sup-inf) );
return D3DCOLOR_XRGB(c,c,c);
}
// this is the function that puts the 3D models into video RAM
void draw_graphics(void)
{
static long colorTab[COLORMAPSIZE] = {0,};
static std::vector<Vertex> tmp;
static LPDIRECT3DVERTEXBUFFER9 v_buffer = NULL;
static LPDIRECT3DINDEXBUFFER9 i_buffer = NULL;
static unsigned NVertex = 0;
// Create empty IDirect3DTexture9
const unsigned MN = Nrows*Ncols;
unsigned k=0;
if (MN > tmp.size())
tmp.resize( MN );
if (colorTab[0] == 0) // if colortab empty, fill it
{
for(int i=0 ; i < COLORMAPSIZE ;i++)
colorTab[i] = GetColor((float)i, (float)0, (float)(COLORMAPSIZE-1));
}
if (!v_buffer)
d3ddev->CreateVertexBuffer(MN*sizeof(Vertex), 0,D3DFVF_XYZ | D3DFVF_DIFFUSE,D3DPOOL_MANAGED,&v_buffer,NULL);
float factor = (COLORMAPSIZE-1.0f)/dataAmplitude;
for (k=0 ; k < MN ; k++)
{
if (dataBuffer[k] >= dataAmplitude)
tmp[k].color = colorTab[COLORMAPSIZE-1];
else if (dataBuffer[k] <= 0)
tmp[k].color = colorTab[0];
else
tmp[k].color = colorTab[(int)( ( dataBuffer[k])*factor )];
}
float M_2 = Nrows/2.0f, N_2 = Ncols/2.0f;
k=0;
for (unsigned n=0 ; n < Ncols ; n++)
{
for (unsigned m=0 ; m < Nrows ; m++, k++)
{
tmp[k].x = M_2 - m;
tmp[k].z = n - N_2;
tmp[k].y = dataBuffer[k];
}
}
Vertex* pVoid;
v_buffer->Lock(0, 0, (void**)&pVoid, 0);
memcpy(pVoid, &tmp[0], MN*sizeof(Vertex));
v_buffer->Unlock();
if (!i_buffer)
d3ddev->CreateIndexBuffer(3*2*(Nrows-1)*(Ncols-1)*sizeof(DWORD), 0, D3DFMT_INDEX32, D3DPOOL_MANAGED,&i_buffer,NULL);
DWORD *indices;
i_buffer->Lock(0, 0, (void**)&indices, 0);
k=0;
for (unsigned n=0 ; n < Ncols-1 ; n++)
{
if (n!=0)
indices[k++] = n*Nrows;
for (unsigned m=0 ; m < Nrows-1 ; m++)
{
indices[k++] = m + n*Nrows;
indices[k++] = m + (n+1)*Nrows;
}
indices[k++] = Nrows-2 + (n+1)*Nrows;
}
NVertex = k;
i_buffer->Unlock();
d3ddev->SetStreamSource(0, v_buffer, 0, sizeof(Vertex));
d3ddev->SetIndices(i_buffer);
d3ddev->DrawIndexedPrimitive(D3DPT_TRIANGLESTRIP, 0, 0, MN, 0, NVertex-2);
//printf("%d triangle drawn\n", NVertex-2);
//i_buffer->Release();
//v_buffer->Release();
}
// this is the function used to render a single frame
void render_frame(void)
{
d3ddev->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
d3ddev->Clear(0, NULL, D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 1.0f, 0);
d3ddev->BeginScene();
d3ddev->SetFVF(CUSTOMFVF);
// set the view transform
D3DMATRIX matView; // the view transform matrix
float R = distance*25;
D3DMatrixLookAtLH(&matView,
&D3DXVECTOR3 (R*sin(indexAz)*cos(indexEl), R*sin(indexEl),R*cos(indexAz)*cos(indexEl)), // the camera position
&D3DXVECTOR3 (0.0f, 0.0f, 0.0f), // the look-at position
&D3DXVECTOR3 (0.0f, 1.0f, 0.0f)); // the up direction
d3ddev->SetTransform(D3DTS_VIEW, &matView); // set the view transform to matView
// set the projection transform
D3DMATRIX matProjection; // the projection transform matrix
D3DXMatrixPerspectiveFovLH(&matProjection,
DEG2RAD(45), // the horizontal field of view
(FLOAT)SCREEN_WIDTH / (FLOAT)SCREEN_HEIGHT, // aspect ratio
0.001f, // the near view-plane
100000.f); // the far view-plane
d3ddev->SetTransform(D3DTS_PROJECTION, &matProjection); // set the projection
indexAz += DEG2RAD(0.1f);
draw_graphics();
d3ddev->EndScene();
d3ddev->Present(NULL, NULL, NULL, NULL);
}
// this is the function that cleans up Direct3D and COM
void cleanD3D(void)
{
d3ddev->Release();
d3d->Release();
}
答案 0 :(得分:2)
好吧,因为你说你受IO限制,最好的办法是减少每次更新时发送给GPU的数据量。
如果您要定位SM 3.0或更高版本,则可以使用顶点纹理提取从每个帧更新的纹理中采样高度。这对输入汇编程序和BW来说要容易得多。
为了最容易地做到这一点,你可能想要改变你的顶点数据,而不是提供带有xyz的float3,你可能想要提供一个只有xz的uint2,这样你就可以直接从纹理中获取而不需要浮点数在着色器中进行int转换。
作为附注,如果可以从它的位置简单地导出你的VB,那么你很少有理由为它写一个顶点的颜色,在着色器中使用它来计算它的bw会少一些。 lerp或其他一些混合操作。
我已经为我的一个项目做了这件事,而且我的GTX 480的速度非常快。让我在渲染高度图上花了50毫秒到大约1毫秒。
答案 1 :(得分:1)
除非我遗漏了某些内容,否则您无需锁定索引缓冲区并对其进行更新,无论如何都要将其设置为相同的值。这将摆脱1000x2000循环。
如果数据没有改变每一帧,我也不会更新每帧的顶点缓冲区。只要数据实际更新,就更新它。