我有一个openCL内核,用于在地图中进行路径寻找。这在MAC上完全正常,但在windows机器上,在clBuildProgram()方法中,我收到一条错误,指出源文件无效UTF-8。
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename)
我在网上搜索了一个解决方案,但它们都与clang有关。
我试图在带有NVIDIA GPU的Visual Studio 2013上运行它。我尝试通过转到File-> Advanced Save Options以UTF-8格式保存.cpp和.cl文件。我也尝试使用SublimeText 3以UTF-8格式保存它,但仍然没有运气。
我有其他简单的opencl代码程序在这台机器上正常运行。
我在另一台带有NVIDIA GPU的Windows PC上尝试了同样的错误。
的的main.cpp
#define PROGRAM_FILE "main.cl"
#define KERNEL_FUNC "handleCall"
#define _CRT_SECURE_NO_WARNINGS
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <time.h>
#include <fstream>
#include <new>
#include <iostream>
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
using namespace std;
#define MAX_OUTPUT_BUFFER 80202
size_t worldSize, inputSize, outputSize = MAX_OUTPUT_BUFFER;
unsigned char playerId = 100;
/* Find a GPU or CPU associated with the first available platform */
cl_device_id create_device() {
cl_platform_id platform;
cl_device_id dev;
int err;
//get platform
err = clGetPlatformIDs(1, &platform, NULL);
if (err < 0) {
perror("Couldn't identify a platform");
exit(1);
}
//Access a device
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL);//try GPU First
if (err == CL_DEVICE_NOT_FOUND) {
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL);//CPU if GPU not found
}
if (err < 0) {
perror("Couldn't access any devices");
exit(1);
}
return dev;
}
/* Create program from a file and compile it */
cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {
cl_program program;
FILE *program_handle;
char *program_buffer, *program_log;
size_t program_size, log_size;
int err;
//Read program file and place content into buffer
program_handle = fopen(filename, "r");
if (program_handle == NULL) {
perror("Couldn't find the program file");
exit(1);
}
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
//Create program from file
program = clCreateProgramWithSource(ctx, 1,
(const char**)&program_buffer, &program_size, &err);
free(program_buffer);
//Build program
err = clBuildProgram(program, 1, &dev, NULL, NULL, NULL);
if (err < 0) {
//Find size of log and print to std output
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
0, NULL, &log_size);
program_log = (char*)malloc(log_size + 1);
program_log[log_size] = '\0';
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
log_size + 1, program_log, NULL);
printf("%s\n", program_log);
system("pause");
free(program_log);
exit(1);
}
system("pause");
return program;
}
unsigned char* to_bytes(unsigned short s){
int n = 1;
unsigned char *b = new unsigned char[2];
// little endian if true
if (*(char *)&n == 1) {
b[0] = (unsigned char)(s & 0xff);
b[1] = (unsigned char)((s >> 8) & 0xff);
}
else {
b[1] = (unsigned char)(s & 0xff);
b[0] = (unsigned char)((s >> 8) & 0xff);
}
return b;
}
unsigned short to_short(unsigned char lo, unsigned char hi)
{
int n = 1;
unsigned short x;
// little endian if true
if (*(char *)&n == 1) {
x = (((int)hi) << 8) | lo;
}
else {
x = (((int)lo) << 8) | hi;
}
return x;
}
unsigned char* getMockInputBuffer(){
int i = 0;
int numPlayers = 1;
inputSize = 2 + (numPlayers * 9);
unsigned char *buffer = new unsigned char[inputSize];
int flag = 1;
//Flag: 1 byte
buffer[i++] = flag;
//Number of Players: 1 byte
buffer[i++] = numPlayers;
for (int iter = 0; iter < numPlayers; iter++)
{
unsigned char *tempBytes;
//PlayerID:
buffer[i++] = playerId;
//Source Pos x and z:
unsigned short posX = (unsigned short)(iter + 1) * 10;
tempBytes = to_bytes(posX);
buffer[i++] = tempBytes[0];
buffer[i++] = tempBytes[1];
unsigned short posZ = (unsigned short)(iter + 1) * 10;
tempBytes = to_bytes(posZ);
buffer[i++] = tempBytes[0];
buffer[i++] = tempBytes[1];
//Send Destination
posX = (unsigned short)(iter + 1) * 2000;
tempBytes = to_bytes(posX);
buffer[i++] = tempBytes[0];
buffer[i++] = tempBytes[1];
posZ = (unsigned short)(iter + 1) * 2000;
tempBytes = to_bytes(posZ);
buffer[i++] = tempBytes[0];
buffer[i++] = tempBytes[1];
}
return buffer;
}
unsigned char* getMockOutputBuffer(){
unsigned char *buffer = new unsigned char[MAX_OUTPUT_BUFFER];
return buffer;
}
unsigned char* getMockWorldBuffer(){
/******** WORLD BUFFER *************/
//Create World Buffer
unsigned short width = 32767;
unsigned short height = 32767;
unsigned short xPos = 300;
unsigned short zPos = 300;
const unsigned short numObstacles = 7;
unsigned short obstaclesTL[numObstacles][2] = { { 0, 10 }, { 500, 500 }, { 400, 6000 }, { 10, 10 }, { 0, 0 }, { 300, 10 }, { 300, 6001 } };
unsigned short obstacleBR[numObstacles][2] = { { 20, 20 }, { 600, 600 }, { 600, 7000 }, { 900, 7000 }, { 200, 200 }, { 1000, 6000 }, { 30000, 6005 } };
int i = 0;
worldSize = (sizeof(unsigned short) * 7 * 4) + 13; // since we have 13 obstacles with xmin,xmax,zmin,zmax coordinates
unsigned char * worldBytes = new unsigned char[worldSize];
unsigned char * widthVal = to_bytes(width); //width pos 0-1
unsigned char * heightVal = to_bytes(height); //height byte 2-3
worldBytes[i++] = widthVal[0];
worldBytes[i++] = widthVal[1];
worldBytes[i++] = heightVal[0];
worldBytes[i++] = heightVal[1];
unsigned char n_players = 1;
worldBytes[i++] = n_players;
for (int iter = 0; iter < n_players; iter++)
{
//Send PlayerID
//char PlayerID = IDArray[iter];
worldBytes[i++] = playerId + 1;
//Send Postions
unsigned char * playerSourceX = to_bytes(xPos);
unsigned char * playerSourceZ = to_bytes(zPos);
worldBytes[i++] = playerSourceX[0];
worldBytes[i++] = playerSourceX[1];
worldBytes[i++] = playerSourceZ[0];
worldBytes[i++] = playerSourceZ[1];
}
/******* Obstacles *******/
unsigned char * m_ObstaclesArray = to_bytes(numObstacles);
worldBytes[i++] = m_ObstaclesArray[0];
for (int k = 0; k < numObstacles; k++)
{
unsigned char * tl_x = to_bytes(obstaclesTL[k][0]);
unsigned char * tl_z = to_bytes(obstaclesTL[k][1]);
unsigned char * br_x = to_bytes(obstacleBR[k][0]);
unsigned char * br_z = to_bytes(obstacleBR[k][1]);
worldBytes[i++] = tl_x[0];
worldBytes[i++] = tl_x[1];
worldBytes[i++] = tl_z[0];
worldBytes[i++] = tl_z[1];
worldBytes[i++] = br_x[0];
worldBytes[i++] = br_x[1];
worldBytes[i++] = br_z[0];
worldBytes[i++] = br_z[1];
}
return worldBytes;
}
int main(int argc, char* argv[]) {
int err;
ofstream file("output.txt");
srand((unsigned)time(0));
//OpenCL structures
cl_device_id device;
cl_context context;
cl_program program;
cl_kernel kernel;
cl_command_queue queue;
size_t local_size, global_size;
size_t num_groups;
unsigned char *world = getMockWorldBuffer();
unsigned char *input = getMockInputBuffer();
unsigned char *output = getMockOutputBuffer();
//Create data buffer
global_size = 2;
local_size = 1;
num_groups = global_size / local_size;
//Data and buffers
cl_mem worldBuffer, inputBuffer, outputBuffer;
//Create device and context
device = create_device();
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (err<0) {
perror("could not create context");
exit(1);
}
//Build program
program = build_program(context, device, PROGRAM_FILE);
//Create buffs
if (program == NULL) {
perror("could not create program");
exit(1);
}
worldBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, worldSize, world, &err);
if (err<0) {
perror("could not create world buffer");
exit(1);
}
inputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, inputSize, input, &err);
if (err<0) {
perror("could not create input buffer");
exit(1);
}
outputBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE |
CL_MEM_COPY_HOST_PTR, outputSize, output, &err);
if (err<0) {
perror("could not create output buffer");
exit(1);
}
queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
if (err<0) {
perror("could not create queue");
exit(1);
}
//Create a kernel
kernel = clCreateKernel(program, KERNEL_FUNC, &err);
if (err<0) {
perror("could not create kernel");
exit(1);
}
//Set kernel arguments
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &worldBuffer);
if (err<0) {
perror("could not set world kernel args");
exit(1);
}
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputBuffer);
if (err<0) {
perror("could not set input kernel args");
exit(1);
}
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &outputBuffer);
if (err<0) {
perror("could not set output kernel args");
exit(1);
}
cl_event event;
//Enqueue kernel
clRetainMemObject(outputBuffer);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size,
0, 0, NULL, &event);
if (err<0) {
perror("could not enqueue kernel");
exit(1);
}
clWaitForEvents(1, &event);
cl_ulong time_start, time_end;
double total_time;
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL);
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL);
total_time = time_end - time_start;
printf("\nExecution time in milliseconds = %0.3f ms\n", (total_time / 1000000.0));
//Read the kernel's output
unsigned char *testOutputBuffer = new unsigned char[MAX_OUTPUT_BUFFER];
err = clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0,
MAX_OUTPUT_BUFFER, testOutputBuffer, 0, NULL, &event);
if (err != CL_SUCCESS) {
perror("could not read from kernel");
exit(1);
}
//Reading Result Buffer:
//Flag: 0(PlayerData) or 1(Path):
//Number of players(1 byte)
//PLayerID(1byte) (n-times)
//m set of coords (2bytes) (PathReq only)
//posX (2bytes) (PlayerData: Last known Position PathReq:Position in the path )
//posZ (2bytes) (PlayerData: Last known Position PathReq:Position in the path )
//For Path Req its 2bytes + n*(PlayerID + 2+ (m*4) )
//For Player Data its 2bytes + n*(PlayerID + (m*4) )
cout << "Flag = " << (short)testOutputBuffer[0] << "\n";
cout << "Num players = " << (short)testOutputBuffer[1] << "\n";
cout << "Player ID = " << (short)testOutputBuffer[2] << "\n";
cout << "Num Turns = " << (short)testOutputBuffer[3] << "\n";
short m = (short)testOutputBuffer[3];
int j = 1;
for (int i = 4; i < m * 4 + 4; i += 4){
short x = to_short(testOutputBuffer[i], testOutputBuffer[i + 1]);
short z = to_short(testOutputBuffer[i + 2], testOutputBuffer[i + 3]);
cout << "Turns " << j++ << " = (" << x << "," << z << ") \n";
}
file << "\nExecution time in milliseconds = " << (total_time / 1000000.0) << "ms\n";
printf("finished\n");
file.close();
//Cleanup
clReleaseKernel(kernel);
clReleaseMemObject(worldBuffer);
clReleaseMemObject(inputBuffer);
clReleaseMemObject(outputBuffer);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
return 0;
}
main.cl: google drive link to main.cl file
答案 0 :(得分:2)
我遇到了同样的问题。
由于某种原因,它是通过使用fstream对象将.cl文件读入char数组来解决的。
尝试更改行
FILE *program_handle;
char *program_buffer, *program_log;
size_t program_size, log_size;
int err;
//Read program file and place content into buffer
program_handle = fopen(filename, "r");
if (program_handle == NULL) {
perror("Couldn't find the program file");
exit(1);
}
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
有类似的东西:
std::fstream kernelFile(filename);
std::string content(
(std::istreambuf_iterator<char>(kernelFile)),
std::istreambuf_iterator<char>()
);
const char* kernelCharArray = new char[content.size()];
kernelCharArray = content.c_str();