Question

当我读到OpenGL / WebGL中的性能时，我几乎听说减少绘制调用。所以我的问题是我只使用4个顶点来绘制纹理四边形。这通常意味着我的vbo只包含4个顶点。基本上

gl.bindBuffer(gl.ARRAY_BUFFER,vbo);
gl.uniformMatrix4fv(matrixLocation, false, modelMatrix);
gl.drawArrays(gl.TRIANGLE_FAN,0, vertices.length/3);

这就是我看到的问题。在绘制之前，我更新当前四边形的模型矩阵。例如，沿y轴移动它5个单位。

所以我必须：

gl.bindBuffer(gl.ARRAY_BUFFER,vbo);
gl.uniformMatrix4fv(matrixLocation, false, modelMatrix);
gl.drawArrays(gl.TRIANGLE_FAN, 0, vertices.length/3);

gl.uniformMatrix4fv(matrixLocation, false, anotherModelMatrix);
gl.drawArrays(gl.TRIANGLE_FAN,0, vertices.length/3);
....// repeat until all textures are rendered

我怎样才能减少抽奖电话？或者甚至将其减少到只有一次抽奖。

Answer 1

第一个问题是，这有关系吗？

如果你的筹码少于1000，甚至可能是2000，那么打电话可能并不重要。易于使用比大多数其他解决方案更重要。

如果你真的需要很多四边形，那么就有很多解决方案。一种是将N个四边形放入单个缓冲区。 See this presentation。然后将位置，旋转和缩放放入其他缓冲区或纹理中，并计算着色器内的矩阵。

换句话说，对于纹理四元组，人们通常将顶点位置和texcoords放在像这样排序的缓冲区中

p0, p1, p2, p3, p4, p5,   // buffer for positions for 1 quad
t0, t1, t2, t3, t4, t5,   // buffer for texcoord for 1 quad

相反，你会这样做

p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, ...  // positions for N quads
t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, ...  // texcoords for N quads

p0 - p5只是单位四元组值，p6 - p11是相同的值，p12 - p17也是相同的值。 t0-t5是单位texcoord值，t6-t11是相同的texcoord值。等

然后添加更多缓冲区。让我们想象我们想要的只是世界的位置和规模。所以我们再添加2个缓冲区

s0, s0, s0, s0, s0, s0, s1, s1, s1, s1, s1, s1, s2, ...  // scales for N quads
w0, w0, w0, w0, w0, w0, w1, w1, w1, w1, w1, w1, w2, ...  // world positions for N quads

注意比例如何重复6次，对于第一个四边形的每个顶点一次。然后它再次重复6次为下一个四边形等。与世界位置相同。这就是单个四边形的所有6个顶点共享相同的世界位置和相同的比例。

现在在着色器中我们可以使用这样的

attribute vec3 position;
attribute vec2 texcoord;
attribute vec3 worldPosition;
attribute vec3 scale;

uniform mat4 view;    // inverse of camera
uniform mat4 camera;  // inverse of view
uniform mat4 projection;

varying vec2 v_texcoord;

void main() {
   // Assuming we want billboards (quads that always face the camera)
   vec3 localPosition = (camera * vec4(position * scale, 0)).xyz;

   // make quad points at the worldPosition
   vec3 worldPos = worldPosition + localPosition;

   gl_Position = projection * view * vec4(worldPos, 1);

   v_texcoord = texcoord; // pass on texcoord to fragment shader
}

现在我们想要设置四边形的位置，我们需要在相应的缓冲区中设置6个世界位置（每个6个顶点一个）。

一般情况下，您可以更新所有世界排名，然后拨打gl.bufferData进行一次通话，上传所有排名。

这是100k四边形

const vs = `
attribute vec3 position;
attribute vec2 texcoord;
attribute vec3 worldPosition;
attribute vec2 scale;

uniform mat4 view;    // inverse of camera
uniform mat4 camera;  // inverse of view
uniform mat4 projection;

varying vec2 v_texcoord;

void main() {
   // Assuming we want billboards (quads that always face the camera)
   vec3 localPosition = (camera * vec4(position * vec3(scale, 1), 0)).xyz;

   // make quad points at the worldPosition
   vec3 worldPos = worldPosition + localPosition;

   gl_Position = projection * view * vec4(worldPos, 1);

   v_texcoord = texcoord; // pass on texcoord to fragment shader
}
`;

const fs = `
precision mediump float;
varying vec2 v_texcoord;
uniform sampler2D texture;
void main() {
  gl_FragColor = texture2D(texture, v_texcoord);
}
`;

const m4 = twgl.m4;
const gl = document.querySelector("canvas").getContext("webgl");

// compiles and links shaders and looks up locations
const programInfo = twgl.createProgramInfo(gl, [vs, fs]);

const numQuads = 100000;
const positions = new Float32Array(numQuads * 6 * 2);
const texcoords = new Float32Array(numQuads * 6 * 2);
const worldPositions = new Float32Array(numQuads * 6 * 3);
const basePositions = new Float32Array(numQuads * 3); // for JS
const scales = new Float32Array(numQuads * 6 * 2);
const unitQuadPositions = [
   -.5, -.5, 
    .5, -.5,
   -.5,  .5,
   -.5,  .5,
    .5, -.5,
    .5,  .5,
];
const unitQuadTexcoords = [
    0, 0,
    1, 0,
    0, 1,
    0, 1,
    1, 0,
    1, 1,
];

for (var i = 0; i < numQuads; ++i) {
  const off3 = i * 6 * 3;
  const off2 = i * 6 * 2;
  
  positions.set(unitQuadPositions, off2);
  texcoords.set(unitQuadTexcoords, off2);
  const worldPos = [rand(-100, 100), rand(-100, 100), rand(-100, 100)];
  const scale = [rand(1, 2), rand(1, 2)];
  basePositions.set(worldPos, i * 3);
  for (var j = 0; j < 6; ++j) {
    worldPositions.set(worldPos, off3 + j * 3);
    scales.set(scale, off2 + j * 2);
  }
}

const tex = twgl.createTexture(gl, {
  src: "http://i.imgur.com/weklTat.gif",
  crossOrigin: "",
  flipY: true,
});

// calls gl.createBuffer, gl.bufferData
const bufferInfo = twgl.createBufferInfoFromArrays(gl, {
  position: { numComponents: 2, data: positions, },
  texcoord: { numComponents: 2, data: texcoords, },
  worldPosition: { numComponents: 3, data: worldPositions, },
  scale: { numComponents: 2, data: scales, },
});

function render(time) {
   time *= 0.001; // seconds
   
   twgl.resizeCanvasToDisplaySize(gl.canvas);
   
   gl.viewport(0, 0, gl.canvas.width, gl.canvas.height);
   gl.enable(gl.DEPTH_TEST);
   
   gl.useProgram(programInfo.program);
   
   // calls gl.bindBuffer, gl.enableVertexAttribArray, gl.vertexAttribPointer
   twgl.setBuffersAndAttributes(gl, programInfo, bufferInfo);
   
   const fov = Math.PI * .25;
   const aspect = gl.canvas.clientWidth / gl.canvas.clientHeight;
   const zNear = .1;
   const zFar = 200;
   const projection = m4.perspective(fov, aspect, zNear, zFar);
   
   const radius = 100;
   const tm = time * .1
   const eye = [Math.sin(tm) * radius, Math.sin(tm * .9) * radius, Math.cos(tm) * radius];
   const target = [0, 0, 0];
   const up = [0, 1, 0];
   const camera = m4.lookAt(eye, target, up);
   const view = m4.inverse(camera);
   
   // calls gl.uniformXXX
   twgl.setUniforms(programInfo, { 
     texture: tex,
     view: view,
     camera: camera,
     projection: projection,
   });
   
   // update all the worldPositions
   for (var i = 0; i < numQuads; ++i) {
     const src = i * 3;
     const dst = i * 6 * 3;
     for (var j = 0; j < 6; ++j) {
       const off = dst + j * 3;
       worldPositions[off + 0] = basePositions[src + 0] + Math.sin(time + i) * 10;
       worldPositions[off + 1] = basePositions[src + 1] + Math.cos(time + i) * 10;
       worldPositions[off + 2] = basePositions[src + 2];
     }
   }
   
   // upload them to the GPU
   gl.bindBuffer(gl.ARRAY_BUFFER, bufferInfo.attribs.worldPosition.buffer);
   gl.bufferData(gl.ARRAY_BUFFER, worldPositions, gl.DYNAMIC_DRAW);
   
   // calls gl.drawXXX
   twgl.drawBufferInfo(gl, bufferInfo);
   
   requestAnimationFrame(render);
}
requestAnimationFrame(render);

function rand(min, max) {
  if (max === undefined) {
     max = min;
     min = 0;
  }
  return Math.random() * (max - min) + min;
}

body { margin: 0; }
canvas { width: 100vw; height: 100vh; display: block; }

<script src="https://twgljs.org/dist/3.x/twgl-full.min.js"></script>
<canvas />

您可以使用ANGLE_instance_arrays扩展名将重复顶点的数量从6减少到1。它不如上面的技术快，但它非常接近。

您还可以通过在纹理中存储世界位置和比例来将数据量从6减少到1。在这种情况下，而不是2个额外的缓冲区，你只需要一个重复的id

添加一个额外的缓冲区

// id buffer
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3 ....

id重复6次，每个quad的6个顶点中的每个顶点重复一次。

然后使用该id计算纹理坐标以查找世界位置和比例。

attribute float id;
...

uniform sampler2D worldPositionTexture;  // texture with world positions
uniform vec2 textureSize;               // pass in the texture size

...

  // compute the texel that contains our world position
  vec2 texel = vec2(
     mod(id, textureSize.x),
     floor(id / textureSize.x));

  // compute the UV coordinate to access that texel
  vec2 uv = (texel + .5) / textureSize;

  vec3 worldPosition = texture2D(worldPositionTexture, uv).xyz;

现在你需要将你的世界位置放在纹理中，你可能想要一个浮点纹理来使它变得容易。您可以为比例等做类似的事情，并将每个存储在单独的纹理中，或者将所有纹理存储在相同的纹理中，以适当地改变您的紫外线计算。

const vs = `
attribute vec3 position;
attribute vec2 texcoord;
attribute float id;

uniform sampler2D worldPositionTexture;  
uniform sampler2D scaleTexture;          
uniform vec2 textureSize;  // texture are same size so only one size needed
uniform mat4 view;    // inverse of camera
uniform mat4 camera;  // inverse of view
uniform mat4 projection;

varying vec2 v_texcoord;

void main() {
  // compute the texel that contains our world position
  vec2 texel = vec2(
     mod(id, textureSize.x),
     floor(id / textureSize.x));

  // compute the UV coordinate to access that texel
  vec2 uv = (texel + .5) / textureSize;

  vec3 worldPosition = texture2D(worldPositionTexture, uv).xyz;
  vec2 scale = texture2D(scaleTexture, uv).xy;

  // Assuming we want billboards (quads that always face the camera)
  vec3 localPosition = (camera * vec4(position * vec3(scale, 1), 0)).xyz;

  // make quad points at the worldPosition
  vec3 worldPos = worldPosition + localPosition;

  gl_Position = projection * view * vec4(worldPos, 1);

  v_texcoord = texcoord; // pass on texcoord to fragment shader
}
`;

const fs = `
precision mediump float;
varying vec2 v_texcoord;
uniform sampler2D texture;
void main() {
  gl_FragColor = texture2D(texture, v_texcoord);
}
`;

const m4 = twgl.m4;
const gl = document.querySelector("canvas").getContext("webgl");
const ext = gl.getExtension("OES_texture_float");
if (!ext) {
  alert("Doh! requires OES_texture_float extension");
}
if (gl.getParameter(gl.MAX_VERTEX_TEXTURE_IMAGE_UNITS) < 2) {
  alert("Doh! need at least 2 vertex texture image units");
}

// compiles and links shaders and looks up locations
const programInfo = twgl.createProgramInfo(gl, [vs, fs]);

const numQuads = 50000;
const positions = new Float32Array(numQuads * 6 * 2);
const texcoords = new Float32Array(numQuads * 6 * 2);
const ids = new Float32Array(numQuads * 6);
const basePositions = new Float32Array(numQuads * 3); // for JS
// we need to pad these because textures have to rectangles
const size = roundUpToNearest(numQuads * 4, 1024 * 4)
const worldPositions = new Float32Array(size);
const scales = new Float32Array(size);
const unitQuadPositions = [
   -.5, -.5, 
    .5, -.5,
   -.5,  .5,
   -.5,  .5,
    .5, -.5,
    .5,  .5,
];
const unitQuadTexcoords = [
    0, 0,
    1, 0,
    0, 1,
    0, 1,
    1, 0,
    1, 1,
];

for (var i = 0; i < numQuads; ++i) {
  const off2 = i * 6 * 2;
  const off4 = i * 4;
  
  // you could even put these in a texture OR you can even generate
  // them inside the shader based on the id. See vertexshaderart.com for
  // examples of generating positions in the shader based on id
  positions.set(unitQuadPositions, off2);
  texcoords.set(unitQuadTexcoords, off2);
  ids.set([i, i, i, i, i, i], i * 6);

  const worldPos = [rand(-100, 100), rand(-100, 100), rand(-100, 100)];
  const scale = [rand(1, 2), rand(1, 2)];
  basePositions.set(worldPos, i * 3);
    
  for (var j = 0; j < 6; ++j) {  
    worldPositions.set(worldPos, off4 + j * 4);    
    scales.set(scale, off4 + j * 4);
  }
}

const tex = twgl.createTexture(gl, {
  src: "http://i.imgur.com/weklTat.gif",
  crossOrigin: "",
  flipY: true,
});

const worldPositionTex = twgl.createTexture(gl, {
  type: gl.FLOAT,
  src: worldPositions,
  width: 1024,
  minMag: gl.NEAREST,
  wrap: gl.CLAMP_TO_EDGE,
});

const scaleTex = twgl.createTexture(gl, {
  type: gl.FLOAT,
  src: scales,
  width: 1024,
  minMag: gl.NEAREST,
  wrap: gl.CLAMP_TO_EDGE,
});

// calls gl.createBuffer, gl.bufferData
const bufferInfo = twgl.createBufferInfoFromArrays(gl, {
  position: { numComponents: 2, data: positions, },
  texcoord: { numComponents: 2, data: texcoords, },
  id: { numComponents: 1, data: ids, },
});

function render(time) {
   time *= 0.001; // seconds
   
   twgl.resizeCanvasToDisplaySize(gl.canvas);
   
   gl.viewport(0, 0, gl.canvas.width, gl.canvas.height);
   gl.enable(gl.DEPTH_TEST);
   
   gl.useProgram(programInfo.program);
   
   // calls gl.bindBuffer, gl.enableVertexAttribArray, gl.vertexAttribPointer
   twgl.setBuffersAndAttributes(gl, programInfo, bufferInfo);
   
   const fov = Math.PI * .25;
   const aspect = gl.canvas.clientWidth / gl.canvas.clientHeight;
   const zNear = .1;
   const zFar = 200;
   const projection = m4.perspective(fov, aspect, zNear, zFar);
   
   const radius = 100;
   const tm = time * .1
   const eye = [Math.sin(tm) * radius, Math.sin(tm * .9) * radius, Math.cos(tm) * radius];
   const target = [0, 0, 0];
   const up = [0, 1, 0];
   const camera = m4.lookAt(eye, target, up);
   const view = m4.inverse(camera);
   
   // update all the worldPositions
   for (var i = 0; i < numQuads; ++i) {
     const src = i * 3;
     const dst = i * 3;
     worldPositions[dst + 0] = basePositions[src + 0] + Math.sin(time + i) * 10;
     worldPositions[dst + 1] = basePositions[src + 1] + Math.cos(time + i) * 10;
     worldPositions[dst + 2] = basePositions[src + 2];
   }
   
   // upload them to the GPU
   const width = 1024;
   const height = worldPositions.length / width / 4;
   gl.bindTexture(gl.TEXTURE_2D, worldPositionTex);
   gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, width, height, 0, gl.RGBA, gl.FLOAT, worldPositions); 
   
   // calls gl.uniformXXX, gl.activeTeture, gl.bindTexture
   twgl.setUniforms(programInfo, { 
     texture: tex,
     scaleTexture: scaleTex,
     worldPositionTexture: worldPositionTex,
     textureSize: [width, height],
     view: view,
     camera: camera,
     projection: projection,
   });
   
   // calls gl.drawXXX
   twgl.drawBufferInfo(gl, bufferInfo);
   
   requestAnimationFrame(render);
}
requestAnimationFrame(render);

function rand(min, max) {
  if (max === undefined) {
     max = min;
     min = 0;
  }
  return Math.random() * (max - min) + min;
}

function roundUpToNearest(v, round) {
  return ((v + round - 1) / round | 0) * round;
}

body { margin: 0; }
canvas { width: 100vw; height: 100vh; display: block; }

<script src="https://twgljs.org/dist/3.x/twgl-full.min.js"></script>
<canvas />

请注意，至少在我的机器上通过纹理执行它比通过缓冲区执行它要慢，所以虽然它对JavaScript的工作量较少（每个quad只有一个worldPosition更新）但显然GPU的工作量更大（至少在我的机）。对于我来说缓冲版本以60fps运行，具有100k四边形，而纹理版本以大约40fps运行，具有100k四边形。我将它降低到50k但当然这些数字适用于我的机器。其他机器也会。

这样的技术可以让你拥有更多的四边形，但这是以牺牲灵活性为代价的。您只能以着色器中提供的方式操作它们。例如，如果您希望能够从不同的来源（中心，左上角，右下角等）进行缩放，则需要添加另一个数据或设置位置。如果你想旋转，你需要添加旋转数据等......

你甚至可以传递每个四边形的整个矩阵，但是你会在每个四边形上传16个浮点数。它仍然可能更快，因为你在调用gl.uniformMatrix4fv时已经这样做了，但你只需要进行2次调用，gl.bufferData或gl.texImage2D上传新矩阵然后{{ 1}}画画。

另一个问题是你提到了纹理。如果你在每个四边形使用不同的纹理，那么你需要弄清楚如何将它们转换为纹理图集（一个纹理中的所有图像），在这种情况下，你的UV坐标不会像上面那样重复。

如何在OpenGL / WebGL中减少绘制调用

1 个答案: