我正在学习Compute Shaders并尝试使用Unity(我不太熟悉着色器),我正在尝试做一些简单的光线投射,并从Compute Shader写入渲染纹理。一切都很完美,我得到了想要的结果。光线三角形交叉点发生得非常快 - 只有不到半秒钟。但是,此刻我尝试将新颜色应用于渲染纹理,性能会中断。所需时间跃升至5秒。我不能在没有恶化性能的情况下突破循环。我甚至无法在循环中使用bool标志,如果在循环中将其设置为true,我可以在循环外使用它来更新纹理颜色。
#pragma kernel MainCS
struct Triangle
float3 v0;
float3 v1;
float3 v2;
float3 n;
// Precomputed and set from C# script
struct Pixel
float3 position;
float3 direction;
int index;
float pixelColor;
#define blocksize 8
// variables
int imageSize;
// buffers
RWStructuredBuffer<Pixel> pixels : register(u0); // UAV
RWTexture2D<float4> rendTex : register(u1); // UAV
const StructuredBuffer<Triangle> tris : register(t0); // SRV
// This kernel writes some color in the current pixel if there is ray intersection with some of the triangles from the tris buffer. In general works well but slow. The intersection part without writing to the render texture is SUPER FAST. When i attempt to write to the texture - gets SUPER SLOW. Render Texture random write is enabled from the C# script
void MainCS (uint3 id : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
// Get the current pixel ID - pixels is 1D array
int pixelID = (int)(id.y * imageSize + id.x);
// Ray
float3 rayO = pixels[pixelID].position;
float3 rayD = pixels[pixelID].direction;
// Intersection variables
float3 pt0, pt1, pt2, edge0, edge1, edge2, cross1, cross2, cross3, n;
float angle1, angle2, angle3;
float r, _a, b;
float3 w0, I;
bool bIntersect = false;
for (uint tr = 0; tr < tris.Length; tr++)
// Somecalculations
pt0 = tris[tr].v0; pt1 = tris[tr].v1; pt2 = tris[tr].v2;
edge0 = rayO - pt0; edge1 = rayO - pt1; edge2 = rayO - pt2;
// First check - is the ray intersecting the triangle
if (dot(rayD, cross(edge0, edge1)) >= 0.0 ||
dot(rayD, cross(edge1, edge2)) >= 0.0 ||
dot(rayD, cross(edge2, edge0)) >= 0.0) continue;
// Fiding the intersection point
n = normalize(cross(pt0 - pt1, pt0 - pt2));
w0 = rayO - pt0;
_a = -dot(n, w0);
b = dot(n, rayD);
r = _a / b;
I = rayO + rayD * r;
// Second check - before validate the hitpoint
if (_a < 0.0)
// Here i would want to update texture colors
// ==============================================
// Variant 1 =======================================
// Only update the texture without break;
// Gives proper result but is SLOW - 3 seconds
rendTex[id.xy] = float4(1.0, 0.0, 0.0, 1.0);
// if add break; - MUCH SLOWER
// ===============================================
// Variant 2 - Part 1 ==================================
// rising flag to true - fast
bIntersect = true;
// Variant 2 - Part 2 - When using the flag - updating Render texture colror is SUPER SLOW but acurate
rendTex[id.xy] = float4(1.0, 0.0, 0.0, 1.0);
我建议使用Texture2D.setPixels()直接在Unity中绘制纹理。它允许您通过接受Unity Color对象数组批量写入纹理内存,并仅在纹理上调用texture.Apply()
using UnityEngine;
using System.Collections;
public class ExampleClass : MonoBehaviour {
public Camera cam;
void Start() {
cam = GetComponent<Camera>();
void Update() {
if (!Input.GetMouseButton(0))
RaycastHit hit;
if (!Physics.Raycast(cam.ScreenPointToRay(Input.mousePosition), out hit))
Renderer rend = hit.transform.GetComponent<Renderer>();
MeshCollider meshCollider = hit.collider as MeshCollider;
if (rend == null || rend.sharedMaterial == null || rend.sharedMaterial.mainTexture == null || meshCollider == null)
Texture2D tex = rend.material.mainTexture as Texture2D;
Vector2 pixelUV = hit.textureCoord;
pixelUV.x *= tex.width;
pixelUV.y *= tex.height;
tex.SetPixel((int)pixelUV.x, (int)pixelUV.y, Color.black);
这是因为GPU的设计方式。 CPU工作原理的简化视图:获取指令,解码指令,然后在ALU上执行指令。 GPU获取指令,对其进行解码,然后在一堆ALU上同时执行。它同时遍历每个线程上的每一行,并且需要为所有这些像素再次运行程序,即使这些线程中只有一个必须执行不同的指令。