Question

从本质上讲，我有一个512x512x512 WebGLTexture对象，该对象在任何地方都为0，除了大约3个体素（为1）之外。我需要为科学计算应用程序尽可能快地打印出这3个体素的xyz坐标与我的研究有关，但是我能做的最好的事情是在将对象通过笨拙的WebGL2方法链传递之后，使用[for]循环。有谁知道获取这些坐标的更快方法？有没有一种方法可以将Vec3原语从fragmentShader推送到数组？

我一直在寻找有用的扩展程序。

我每次通过以下步骤将tbl.compressedTable推入数组：

                var tbl = new Abubu.RgbaCompressedDataFromTexture({ 
                    target    : env.stipt,
                    threshold : env.fthrsh,
                    compressionThresholdChannel : 'r',
                });
                this.timeSeries.push(time) ;
                this.lastRecordedTime = time ;
                this.samples.push([tbl.compressedTable]) ;

最后一行是杀手.。我正在使用类原型：

class RgbaCompressedDataFromTexture extends RgbaCompressedData{
    constructor( options={} ){
        if ( options.target == undefined && 
             options.texture == undefined ) return null ;

        var texture ;
        texture = readOption(options.target, null ) ;
        texture = readOption(options.texture, options.target ) ;

        var ttbond = new Float32TextureTableBond({ target : texture } ) ;
        ttbond.tex2tab() ;
        var table       = ttbond.table ;
        var width       = ttbond.width ;
        var height      = ttbond.height ;
        var op          = options ;
        op.width        = width ;
        op.height       = height ;

        super( table, op ) ;
        this.ttbond     = ttbond ;
        this.texture    = texture ;
    }
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *  CONSTRUCTOR ENDS
 *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

扩展课程：

class RgbaCompressedData{
    constructor( data, options={}){

        if (data == undefined){
            log( 'You need to provide data source for compression!') ;
            return null ;
        }

        this.data       = new Float32Array(data) ;
        this.width      = readOption( options.width,    data.length/4   ) ;
        this.height     = readOption( options.height,   1               ) ;
        if ( (this.width == (data.length/4)) && height != 1 ){
            this.width = (data.length/this.height)/4 ;
        }

        this.threshold  = readOption(   options.threshold, 0            ) ;
        this.threshold  = readOption(   options.compressionThreshold,
                                        this.threshold                  ) ;

        this.compressionThresholdChannel
                        = readOption(   options.channel,    'r'         ) ;

        switch (this.compressionThresholdChannel){
            case 'r' :
                this.channel = 0 ;
                break ;
            case 'g' :
                this.channel = 1 ;
                break ;
            case 'b' :
                this.channel = 2 ;
                break ;
            case 'a' :
                this.channel = 3 ;
                break ;
            default :
                this.channel = 0 ;
                break ;
        }

        this.compThresholdData = new Float32Array(this.width*this.height) ;

/*------------------------------------------------------------------------
 * count number of pixels above the compression threshold
 *------------------------------------------------------------------------
 */
        this.noAboveThreshold = 0 ;
        for(var j=0 ; j<this.height ; j++){
            for (var i=0 ; i <this.width; i++){
                var indx    = i + j*this.width ;
                this.compThresholdData[indx]
                        = this.data[indx*4 + this.channel] ;
                if (this.compThresholdData[indx]>this.threshold){
                        this.noAboveThreshold++ ;
                }
            }
        }

/*------------------------------------------------------------------------
 * allocating memory to data
 *------------------------------------------------------------------------
 */
        this.compressedSize    =
            Math.ceil( Math.sqrt( this.noAboveThreshold )) ;

        this.compressedTable =
            new Float32Array(this.compressedSize*this.compressedSize*4  ) ;
        this.decompressionMapTable =
            new Float32Array(this.compressedSize*this.compressedSize*4  ) ;
        this.compressionMapTable =
            new Float32Array(this.width*this.height * 4 ) ;

/*------------------------------------------------------------------------
 * compress data
 *------------------------------------------------------------------------
 */
        var num = 0 ;
        for(var j=0 ; j<this.height ; j++){
            for (var i=0 ; i <this.width; i++){
                var indx    = i + j*this.width ;
                if (this.compThresholdData[indx]>this.threshold){
                    var jj  = Math.floor( num/this.compressedSize) ;
                    var ii  = num - jj*this.compressedSize ;

                    var x   = ii/this.compressedSize
                            + 0.5/this.compressedSize ;
                    var y   = jj/this.compressedSize
                            + 0.5/this.compressedSize ;

                    var nindx = ii + jj*this.compressedSize ;

                    this.compressionMapTable[indx*4     ]   = x ;
                    this.compressionMapTable[indx*4 + 1 ]   = y ;
                    this.decompressionMapTable[nindx*4  ]   =
                        i/this.width + 0.5/this.width ;
                    this.decompressionMapTable[nindx*4+1]   =
                        j/this.height+ 0.5/this.height ;

                    for (var k = 0 ; k<4 ; k++){
                        this.compressedTable[nindx*4+k]
                            = this.data[indx*4+k] ;
                    }
                    num++ ;
                }else{
                    this.compressionMapTable[indx*4     ]
                        = 1.-0.5/this.compressedSize ;
                    this.compressionMapTable[indx*4 + 1 ]
                        = 1.-0.5/this.compressedSize ;
                }

            }
        }
        var ii = this.compressedSize -1 ;
        var jj = this.compressedSize -1 ;
        var nindx = ii + jj*this.compressedSize ;
        for (var k = 0 ; k<4 ; k++){
            this.compressedTable[nindx*4+k] = 0. ;
        }

/*------------------------------------------------------------------------
 * setting compressedData, compressionMap, decompressionMap textures
 *------------------------------------------------------------------------
 */
        this.full   = new TableTexture(
            this.data,
            this.width,
            this.height,
            {
                minFilter : 'nearest' ,
                magFilter : 'nearest'
            }
        ) ;

        this.sparse = new TableTexture(
            this.compressedTable,
            this.compressedSize ,
            this.compressedSize ,
            {
                minFilter : 'nearest' ,
                magFilter : 'nearest'
            }
        ) ;

        this.compressionMap     = new TableTexture(
            this.compressionMapTable,
            this.width,
            this.height ,
            {
                minFilter : 'nearest' ,
                magFilter : 'nearest'
            }
        ) ;

        this.decompressionMap   = new TableTexture(
            this.decompressionMapTable ,
            this.compressedSize ,
            this.compressedSize ,
            {
                minFilter : 'nearest' ,
                magFilter : 'nearest'
            }
        ) ;
    }   
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *  CONSTRUCTOR ENDS
 *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

并利用以下类：

class Float32TextureTableBond{

/*------------------------------------------------------------------------
 * constructor
 *------------------------------------------------------------------------
 */
    constructor( options={}){
        if ( options.target == undefined && options.texture == undefined ){
            return null ;
        } ;

        this.texture = readOptions( options.target , null ) ;
        this.texture = readOptions( options.texture, this.target ) ;

        this.framebuffer = gl.createFramebuffer() ;
        gl.bindFramebuffer( gl.READ_FRAMEBUFFER, this.framebuffer) ;
        gl.framebufferTexture2D(gl.READ_FRAMEBUFFER, gl.COLOR_ATTACHMENT0,
                                gl.TEXTURE_2D,
                                this.target.texture, 0 ) ;
        gl.readBuffer( gl.COLOR_ATTACHMENT0 ) ;
        this.canRead    = (
            gl.checkFramebufferStatus(gl.READ_FRAMEBUFFER)
            == gl.FRAMEBUFFER_COMPLETE
        ) ;
        gl.bindFramebuffer( gl.READ_FRAMEBUFFER, null) ;

        this.width  = this.target.width ;
        this.height = this.target.height ;
        this.table   = readOption(options.table, 
                new Float32Array(this.width*this.height*4 ) ) ;
    }
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *  CONSTRUCTOR ENDS
 *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

无错误消息，输出正确。当我开始记录数据时，我的模拟速度会慢到一只昏昏欲睡的乌龟的速度。

Answer 1

我还没有真正考虑过，但是下面的一些代码可能会给您一些想法。

问题在于无法在WebGL2 AFAIK中有条件地输出数据。您可以在片段着色器中丢弃，但这似乎无济于事。

因此，无论如何，首先要考虑的是着色器基于输出并行化。如果要绘制32k像素，则GPU可以并行处理32k个事物。如果有1个像素可以检查32k事物，那么GPU就没有什么可以并行化了。

因此，这是一个主意，将3D纹理划分为NxNxN大的单元格，在每个单元格中搜索体素。如果单元格是32x32x32，则对于512x512x512输入，有4096项要并行化。对于每个单元格，遍历该单元格，然后将匹配项的位置相加

sum = vec4(0)
for each voxel in cell
   if voxel === 1
     sum += vec4(positionOfVoxel, 1);

outColor = sum;

结果是，如果该单元格中只有1个匹配项，则sum.xyz将包含位置，sum.w将为1。如果有多个匹配项，sum.w将为> 1

下面的代码制作一个4096x1纹理并为其渲染一个四边形。它使用gl_FragCoord.x来计算每个要渲染的像素对应于哪个单元格，并对相应单元格的结果求和。

然后使用readPixels读取结果并遍历并打印出来。理想情况下，我希望GPU本身可以计算出结果，但鉴于您不能有条件地放弃，我没有任何想法。

对于只有一个结果的单元格，将打印结果。对于具有多个结果的单元，另一个着色器将扫描一个单元。我们知道特定单元格中有多少个结果，因此我们可以将numResults渲染1个像素。然后，着色器将越过该单元格，仅查看它找到的第N个结果

 int idOfResultWeWant = int(gl_FragCoord.x)
 int resultId = 0
 for (z...) {
   for (y...) {
     for (x...) {
       if (voxel) {
         if (resultId === idOfResultWeWant) {
           outColor = position
         }
         ++resultId
       }
    }
 }

下面的代码是惰性的，并使用一维结果纹理，这意味着它可以处理的单元格最多为gl.getParameter(gl.MAX_TEXTURE_SIZE)。对于较大的尺寸，必须稍作更改。

不知道这是最快的方法还是最快的方法，但是根据呈现的内容进行并行化的概念很重要，而且将问题分成了较小的部分。

就像使用16x16x16的单元格可能更好，并且也许应该使用第二个着色器代替第二个着色器，方法是将一个单元格本身细分为较小的单元格。

function main() {
  const gl = document.createElement('canvas').getContext('webgl2');
  if (!gl) {
    return alert('need webgl2');
  }
  const ext = gl.getExtension('EXT_color_buffer_float');
  if (!ext) {
    return alert('need EXT_color_buffer_float');
  }
  const size = 512;
  const cellSize = 32;
  const cellsPer = size / cellSize;
  const numCells = (size * size * size) / (cellSize * cellSize * cellSize);
  
  const dataTexture = twgl.createTexture(gl, {
    target: gl.TEXTURE_3D,
    width: size,
    height: size,
    depth: size,
    minMag: gl.NEAREST,
    internalFormat: gl.R8,
    auto: false,
  });
  
  function setData(x, y, z) {
    log('set voxel:', x, y, z);
    gl.texSubImage3D(
       gl.TEXTURE_3D, 0, x, y, z, 1, 1, 1, 
       gl.RED, gl.UNSIGNED_BYTE, new Uint8Array([255]));
  }
  
  for (let i = 0; i < 3; ++i) {
    const x = randInt(size);
    const y = randInt(size);
    const z = randInt(size);
    setData(x, y, z);
  }
  setData(128, 267, 234);
  setData(128 + 4, 267, 234);
  setData(128 + 9, 267, 234);
  
  const cellVS = `#version 300 es
  in vec4 position;
  void main() {
    gl_Position = position;
  }
  `;
  
  const cellFS = `#version 300 es
  precision highp float;
  uniform highp sampler3D data;
  uniform int cellSize;
  out vec4 outColor;
  void main() {
    // really should use 2D but I'm lazy
    int ndx = int(gl_FragCoord.x);
    // assumes equal sides
    int size = textureSize(data, 0).x;
    int cellsPer = size / cellSize;
    int cellZ = ndx / cellsPer / cellsPer;
    int cellY = ndx / cellsPer % cellsPer;
    int cellX = ndx % cellsPer;
    
    ivec3 cell = ivec3(cellX, cellY, cellZ) * cellSize;
    vec4 sum = vec4(0);
    for (int z = 0; z < cellSize; ++z) {
      for (int y = 0; y < cellSize; ++y) {
        for (int x = 0; x < cellSize; ++x) {
          ivec3 pos = cell + ivec3(x, y, z);
          // assumes data is 0 or 1
          float occupied = texelFetch(
              data, 
              pos,
              0).r;
          sum += vec4(pos, 1) * occupied;
        }
      }
    }
    outColor = sum; 
  }
  `;
  
  const cellScanFS = `#version 300 es
  precision highp float;
  uniform highp sampler3D data;
  uniform int cellSize;
  uniform ivec3 cell;  // offset into cell
  out vec4 outColor;
  void main() {
    // really should use 2D but I'm lazy
    int idWeWant = int(gl_FragCoord.x);
    // assumes equal sides
    int size = textureSize(data, 0).x;
    int cellsPer = size / cellSize;
    
    vec4 result = vec4(0);
    int id = 0;
    for (int z = 0; z < cellSize; ++z) {
      for (int y = 0; y < cellSize; ++y) {
        for (int x = 0; x < cellSize; ++x) {
          ivec3 pos = cell + ivec3(x, y, z);
          float occupied = texelFetch(
              data, 
              pos,
              0).r;
          if (occupied > 0.0) {
            if (id == idWeWant) {
              result = vec4(pos, 1);
            }
            ++id;
          }
        }
      }
    }
    outColor = result; 
  }
  `;
  
  const cellProgramInfo = twgl.createProgramInfo(gl, [cellVS, cellFS]);
  const cellScanProgramInfo = twgl.createProgramInfo(gl, [cellVS, cellScanFS]);
  
  const quadBufferInfo = twgl.primitives.createXYQuadBufferInfo(gl, 2);
  
  // as long as numCells is less than the max
  // texture dimensions we can use a 
  // numCells by 1 result texture.
  // If numCells is > max texture dimension
  // we'd need to adjust the code to use
  // a 2d result texture.

  const cellResultWidth = numCells;
  const cellResultHeight = 1;
  const cellResultFBI = twgl.createFramebufferInfo(gl, [
    { internalFormat: gl.RGBA32F, minMag: gl.NEAREST }
  ], cellResultWidth, cellResultHeight);
  
  twgl.bindFramebufferInfo(gl, cellResultFBI);

  twgl.setBuffersAndAttributes(gl, cellProgramInfo, quadBufferInfo);
  
  gl.useProgram(cellProgramInfo.program);
  twgl.setUniforms(cellProgramInfo, {
    cellSize,
    data: dataTexture,
  });
  
  // draw the quad
  twgl.drawBufferInfo(gl, quadBufferInfo);
  
  const data = new Float32Array(numCells * 4);
  gl.readPixels(0, 0, numCells, 1, gl.RGBA, gl.FLOAT, data);
  
  gl.useProgram(cellScanProgramInfo.program);
  
  {
    for (let i = 0; i < numCells; ++i) {
      const off = i * 4;
      const numResultsInCell = data[off + 3];
      if (numResultsInCell) {
        if (numResultsInCell === 1) {
          log('result at: ', ...data.slice(off, off + 3));
        } else {
          getResultsForCell(i, numResultsInCell);
        }
      }
    }
  }
  
  function getResultsForCell(i, numResultsInCell) {
    const cellZ = (i / cellsPer | 0) / cellsPer | 0;
    const cellY = (i / cellsPer | 0) % cellsPer;
    const cellX = i % cellsPer;

    twgl.setUniforms(cellScanProgramInfo, {
      cellSize,
      data: dataTexture,
      cell: [cellX * cellSize, cellY * cellSize, cellZ * cellSize],
    });
    twgl.drawBufferInfo(gl, quadBufferInfo);

    // note: cellResultsFBI is still bound. It's 4096x1
    // so we can only get up to 4096 results without switching to
    // a 2D texture
    gl.viewport(0, 0, numResultsInCell, 1);

    const result = new Float32Array(numResultsInCell * 4);
    gl.readPixels(0, 0, numResultsInCell, 1, gl.RGBA, gl.FLOAT, result);
    for (let j = 0; j < numResultsInCell; ++j) {
      const off = j * 4;
      log('result at:', ...result.slice(off, off + 3));
    }
  }  
  
  function randInt(min, max) {
    return Math.floor(rand(min, max));
  }
  
  function rand(min, max) {
    if (max === undefined) {
      max = min;
      min = 0;
    }
    return Math.random() * (max - min) + min;
  }

  function log(...args) {
    const elem = document.createElement('pre');
    elem.textContent = [...args].join(' ');
    document.body.appendChild(elem);
  }

}
main();

pre { margin: 0; }

<script src="https://twgljs.org/dist/4.x/twgl-full.min.js"></script>

Answer 2

有没有办法将vec3原语从fragmentShader推送到数组？

是的，请使用着色器存储缓冲区。类似于：

layout(std430, binding = 0) buffer Output
{
  uvec3 out_vals[];
};

那将需要绑定到一个足够大的缓冲区以存储返回的参数（从我的头顶开始，我认为std430允许vec3输出类型，但是我也有这种奇怪的感觉，可能需要out类型成为uint，因此您可能需要一次写入3个值-很难记住）

然后，您需要确定要写入的输出数组中元素的索引。为此，您可以使用原子计数器缓冲区来确定计数器，例如

layout(binding = 0, offset = 0) uniform atomic_uint out_count;

然后在您的着色器中，根据gl_GlobalInvocatonID（如果使用计算着色器）或片段着色器的gl_SamplePosition生成索引，您应该能够写出数据：

uint index = atomicCounterIncrement(out_count);
out_vals[index] = gl_GlobalInvocatonID;

可以直接在着色器存储缓冲区上使用原子操作，但是我见过的大多数建议都建议使用ACB。

如何从非常稀疏的纹理快速输出点？

2 个答案: