후속 조치이 문제내 마지막 코드 조각
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define CL_TARGET_OPENCL_VERSION 210
#include "CL/cl.h"
char* program_src = "__kernel void SAXPY (__global float* x, __global float* y, float a)\n"
"{\n"
"const int i = get_global_id (0);\n"
"y [i] = a * x [i];\n"
"}\n";
int main() {
cl_platform_id platform_ids[16];
cl_uint platform_count;
if (clGetPlatformIDs(16, &platform_ids, &platform_count) != CL_SUCCESS) {
return EXIT_FAILURE;
}
printf("%i cl platform(s) found\n", platform_count);
if (platform_count == 0) {
return EXIT_FAILURE;
}
printf("choosing platform 0...\n");
cl_device_id device_ids[16];
cl_int device_count;
if (clGetDeviceIDs(platform_ids[0], CL_DEVICE_TYPE_ALL, 16, &device_ids, &device_count) != CL_SUCCESS) {
return EXIT_FAILURE;
}
printf("%i cl device(s) found on platform 0\n", device_count);
if (device_count == 0) {
return EXIT_FAILURE;
}
cl_device_id device = device_ids[0];
printf("** running test **\n");
cl_int cl_fehler;
cl_context ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &cl_fehler);
if (ctx == NULL) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCommandQueue\n");
cl_fehler = CL_SUCCESS;
cl_command_queue queue = clCreateCommandQueue(ctx, device, 0, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
// Replace 1 mit Zahlen von Gärate IDs
printf("Am clCreateProgram\n");
cl_fehler = CL_SUCCESS;
cl_program program = clCreateProgramWithSource(ctx, 1, &program_src, NULL, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clBuildProgram\n");
cl_fehler = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCreateKernel\n");
cl_fehler = CL_SUCCESS;
cl_kernel kernel = clCreateKernel(program, "SAXPY", &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCreateBuffer\n");
cl_fehler = CL_SUCCESS;
cl_mem eingabe_buffer = clCreateBuffer(ctx, CL_MEM_READ_ONLY, sizeof(cl_float) * 10, NULL, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clCreateBuffer\n");
cl_fehler = CL_SUCCESS;
cl_mem ausgabe_buffer = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, sizeof(cl_float) * 10, NULL, &cl_fehler);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
cl_float eingabe_data[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
printf("Am clEnqueueWriteBuffer\n");
cl_fehler = clEnqueueWriteBuffer(queue, eingabe_buffer, CL_TRUE, 0, sizeof(cl_float) * 10, &eingabe_data, 0, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clSetKernelArg\n");
cl_fehler = clSetKernelArg(kernel, 0, sizeof(cl_mem), &eingabe_buffer);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clSetKernelArg\n");
cl_fehler = clSetKernelArg(kernel, 1, sizeof(cl_mem), &ausgabe_buffer);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clSetKernelArg\n");
cl_float f = 2.0;
cl_fehler = clSetKernelArg(kernel, 2, sizeof(cl_float), &f);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
printf("Am clEnqueueNDRangeKernel\n");
const size_t globalWorkSize[3] = { 10, 0, 0 };
cl_fehler = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
clFinish(queue);
printf("Am clEnqueueReadBuffer\n");
cl_float ausgabe_data[10] = {1, 1, 1, 1, 1, 1, 1 ,1 ,1 ,1 };
cl_fehler = clEnqueueReadBuffer(queue, ausgabe_buffer, CL_TRUE, 0, sizeof(cl_float) * 10, &ausgabe_data[0], 0, NULL, NULL);
if (cl_fehler != CL_SUCCESS) {
printf("Fehler: %i\n", cl_fehler);
return EXIT_FAILURE;
}
// No point in share the rest of the code because the problem is being happening here
}
는 코드의 마지막 반복에서 오류가 발견되지 않았다는 도움을 받아 지금까지 모든 수정 사항이 포함된 마지막 코드입니다.
호스트 시스템은 AMD ATI 04:00.0 Lucienne이 포함된 openSUSE Leap 15.5 x86_64, Radeon Graphics(16) @ 1.800GHz가 포함된 AMD Ryzen 7 5700U 및 시스템 전체에 AMD rocm 드라이버가 설치되어 있습니다.
나는 컴파일 중이다cc src/main.c -lOpenCL -I/opt/rocm-6.0.0/include/ -L/opt/rocm-6.0.0/lib/ -o test
rocm-opencl-sdk
패키지를 설치 했습니다여기벤더(AMD)의 공식 페이지입니다.