我已经使用金属着色器实现了插值功能。有时,执行代码时会报告段错误:
[commandEncoder dispatchThreadgroups:threadGroups threadsPerThreadgroup:threadGroupCounts];
我已经检查了我的程序的其他部分,似乎这个错误是由dispatchThreadgroups方法引起的。我不确定不正确的threadGroups和threadGroupCounts是否会导致错误。所有代码都列出如下:
void metal_v_texture( void *metal_context, int16_t *dst, uint8_t* _src, int _srcstride, int height, int mx, int my, int width )
{
#define LOCAL_BUF_SIZE 5184 //(64+8)*(64+8)
int x, y;
pixel *src = (pixel*)_src;
ptrdiff_t srcstride = _srcstride / sizeof(pixel);
int16_t out_buf[LOCAL_BUF_SIZE];
int16_t *pout = out_buf;
uint8_t local_src[LOCAL_BUF_SIZE];
MetalContext *mc = metal_context;
memset( out_buf, 0, sizeof(int16_t)*LOCAL_BUF_SIZE );
memset( local_src, 0, sizeof(uint8_t)*LOCAL_BUF_SIZE);
//copy data to the local_src_buffer
int s = _srcstride;
//pcsrc has been checked, it did not lead to segment fault
uint8_t *pcsrc = _src - 4*s - 4;
uint8_t *pcdst = local_src;
for( int i = 0; i < height+8; i++ )
{
memcpy( pcdst, pcsrc, sizeof(uint8_t)*(width+8));
pcsrc += s;
pcdst += 72;
}
id<MTLDevice> device = mc->metal_device;
id<MTLCommandQueue> commandQueue = mc->metal_commandqueue;
id<MTLComputePipelineState> cpipeline = mc->metal_cps_put_hevc_qpel_v;
assert( device );
assert( commandQueue );
assert( cpipeline );
//in_texture
int w = 72/4, h = height+4;
MTLTextureDescriptor *in_desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Uint width:w height:h mipmapped:NO];
id<MTLTexture> in_texture = [device newTextureWithDescriptor:in_desc];
assert( in_texture );
MTLRegion texture_region = MTLRegionMake2D( 0, 0, w, h );
[in_texture replaceRegion:texture_region mipmapLevel:0 withBytes:local_src bytesPerRow:72];
//out_texture
MTLTextureDescriptor *out_desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Uint width:w height:h mipmapped:NO];
out_desc.usage = MTLTextureUsageShaderWrite;
id<MTLTexture> texture = [device newTextureWithDescriptor:out_desc];
assert( texture );
MTLSize threadGroupCounts = MTLSizeMake(1, 1, 1);
MTLSize threadGroups = MTLSizeMake([texture width] / threadGroupCounts.width,
[texture height] / threadGroupCounts.height,
1);
id<MTLCommandBuffer> commandBuffer = [commandQueue commandBuffer];
id<MTLComputeCommandEncoder> commandEncoder = [commandBuffer computeCommandEncoder];
assert( commandBuffer );
assert( commandEncoder );
[commandEncoder setComputePipelineState:cpipeline];
[commandEncoder setTexture:texture atIndex:0];
[commandEncoder setTexture:in_texture atIndex:1];
//after this printf fuction, segment fault happed
printf("%s %d commandEncoder: %x commandBuffer: %x in_texture: %x device: %x commandQueue: %x pipeline: %x\n", __FILE__, __LINE__, commandEncoder, commandBuffer, in_texture, device, commandQueue, cpipeline );
[commandEncoder dispatchThreadgroups:threadGroups threadsPerThreadgroup:threadGroupCounts];
[commandEncoder endEncoding];
// synchronize texture from gpu to host mem
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
[blitEncoder synchronizeTexture:texture slice:0 level:0];
[blitEncoder endEncoding];
[commandBuffer commit];
[commandBuffer waitUntilCompleted];
// Get the data back
MTLRegion region = MTLRegionMake2D(0, 0, [texture width], [texture height]);
[texture getBytes:out_buf bytesPerRow: 72*2 fromRegion:region mipmapLevel:0];
pout = out_buf+4*72+4;
int16_t *pdst = dst;
for( int j = 0; j < height; j++ )
{
memcpy( pdst, pout, sizeof(int16_t)*width );
pdst += MAX_PB_SIZE;
pout += 72;
}
//release
//[in_desc release];
//[out_desc release];
[in_texture release];
[texture release];
[commandBuffer release];
[commandEncoder release];
[blitEncoder release];
}
设备,命令队列和computepipestat在外部创建并传递给此函数。已经检查了prsrc的使用,它没有导致段错误。
uint8_t *pcsrc = _src - 4*s - 4;
commandEncoder,commandBuffer,in_texture,device,commandQueue和cpipeline的地址有效,如段故障的打印信息所示
hmetal.m 548 commandEncoder: ea73af30 commandBuffer: ea529440 in_texture: eb0d7200 device: ec014800 commandQueue: ea70dc50 pipeline: ea710d70
Segmentation fault: 11
感谢您查看我的问题。