2016-05-19 11 views
1

In Visual Studio 2015, ich ein 'Code Build-Projekt für Windows' (für eine CPU) erstellen. Dieses Projekt kommt mit Code, den ich überhaupt nicht berührt habe. Es tut im Wesentlichen Vektoraddition. Die Vektoraddition findet jedoch in einer Template.cl-Datei statt. Wenn ich versuche, dieses Projekt zu kompilieren, gibt es mir die folgende Fehlermeldung:Getrennte .cl-Dateien funktionieren nicht. Fehler MSB3722

Error MSB3722 The command ""C:\Program Files (x86)\Intel\OpenCL SDK\bin\x86\ioc32.exe" -cmd=build -input="blahblah\user\visual studio 2015\Projects\OpenCLProject3\OpenCLProject3\Template.cl" -output="Debug\Template.out" -VS -device=CPU_2_0 -simd=default -targetos=current -bo=" "" exited with code 5. Please verify that you have sufficient rights to run this command. OpenCLProject3 C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\V140\BuildCustomizations\IntelOpenCL.targets 98

Aber wenn ich den Kernel in meine CPP-Datei kopieren und haben es als eine Zeichenfolge, dann führt sie aus. Der String sieht wie folgt aus:

const char* prog1 = "__kernel void Add(__global int* pA, __global int* pB, __global int* pC){const int x = get_global_id(0);const int y = get_global_id(1);const int width = get_global_size(0);const int id = y * width + x;pC[id] = pA[id] + pB[id];}" 

Auch anstelle von einer Quelldatei zu lesen, ich bin nur die Adresse mit & prog1 für die Funktion CreateAndBuildProgram aufrufen. Hier

ist die Struktur des Visual Studio Projektbaum:

--References 
--External 
--Headers 
--OpenCL 
    --Template.cl 
--Source Files 
    --OpenCLProject3.cpp 
    --utils.cpp 

Bitte beachte, dass ich die Fehlercodes entfernt haben. Wenn Sie ein Code-Build-Projekt in Visual Studio 2015 generieren, erhalten Sie genau den gleichen Code und die gleiche Struktur.

Hier ist der Host-Code (OpenCLProject3.cpp).

#include <stdio.h> 
    #include <stdlib.h> 
    #include <tchar.h> 
    #include <memory.h> 
    #include <vector> 

    #include "CL\cl.h" 
    #include "utils.h" 

    //for perf. counters 
    #include <Windows.h> 


    // Macros for OpenCL versions 
    #define OPENCL_VERSION_1_2 1.2f 
    #define OPENCL_VERSION_2_0 2.0f 

    struct ocl_args_d_t 
    { 
     ocl_args_d_t(); 
     ~ocl_args_d_t(); 

     // Regular OpenCL objects: 
     cl_context  context;   // hold the context handler 
     cl_device_id  device;   // hold the selected device handler 
     cl_command_queue commandQueue;  // hold the commands-queue handler 
     cl_program  program;   // hold the program handler 
     cl_kernel  kernel;   // hold the kernel handler 
     float   platformVersion; // hold the OpenCL platform version (default 1.2) 
     float   deviceVersion;  // hold the OpenCL device version (default. 1.2) 
     float   compilerVersion; // hold the device OpenCL C version (default. 1.2) 

     // Objects that are specific for algorithm implemented in this sample 
     cl_mem   srcA;    // hold first source buffer 
     cl_mem   srcB;    // hold second source buffer 
     cl_mem   dstMem;   // hold destination buffer 
    }; 

    ocl_args_d_t::ocl_args_d_t(): 
      context(NULL), 
      device(NULL), 
      commandQueue(NULL), 
      program(NULL), 
      kernel(NULL), 
      platformVersion(OPENCL_VERSION_1_2), 
      deviceVersion(OPENCL_VERSION_1_2), 
      compilerVersion(OPENCL_VERSION_1_2), 
      srcA(NULL), 
      srcB(NULL), 
      dstMem(NULL) 
    { 
    } 

    ocl_args_d_t::~ocl_args_d_t() 
    { 
     cl_int err = CL_SUCCESS; 

     if (kernel) 
     { 
      err = clReleaseKernel(kernel); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseKernel returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (program) 
     { 
      err = clReleaseProgram(program); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseProgram returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (srcA) 
     { 
      err = clReleaseMemObject(srcA); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseMemObject returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (srcB) 
     { 
      err = clReleaseMemObject(srcB); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseMemObject returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (dstMem) 
     { 
      err = clReleaseMemObject(dstMem); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseMemObject returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (commandQueue) 
     { 
      err = clReleaseCommandQueue(commandQueue); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseCommandQueue returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (device) 
     { 
      err = clReleaseDevice(device); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseDevice returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (context) 
     { 
      err = clReleaseContext(context); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseContext returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 


    } 


    bool CheckPreferredPlatformMatch(cl_platform_id platform, const char* preferredPlatform) 
    { 
     size_t stringLength = 0; 
     cl_int err = CL_SUCCESS; 
     bool match = false; 

     // In order to read the platform's name, we first read the platform's name string length (param_value is NULL). 
     // The value returned in stringLength 
     err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &stringLength); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetPlatformInfo() to get CL_PLATFORM_NAME length returned '%s'.\n", TranslateOpenCLError(err)); 
      return false; 
     } 

     // Now, that we know the platform's name string length, we can allocate enough space before read it 
     std::vector<char> platformName(stringLength); 

     // Read the platform's name string 
     // The read value returned in platformName 
     err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, stringLength, &platformName[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetplatform_ids() to get CL_PLATFORM_NAME returned %s.\n", TranslateOpenCLError(err)); 
      return false; 
     } 

     // Now check if the platform's name is the required one 
     if (strstr(&platformName[0], preferredPlatform) != 0) 
     { 
      // The checked platform is the one we're looking for 
      match = true; 
     } 

     return match; 
    } 

    cl_platform_id FindOpenCLPlatform(const char* preferredPlatform, cl_device_type deviceType) 
    { 
     cl_uint numPlatforms = 0; 
     cl_int err = CL_SUCCESS; 

     // Get (in numPlatforms) the number of OpenCL platforms available 
     // No platform ID will be return, since platforms is NULL 
     err = clGetPlatformIDs(0, NULL, &numPlatforms); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetplatform_ids() to get num platforms returned %s.\n", TranslateOpenCLError(err)); 
      return NULL; 
     } 
     LogInfo("Number of available platforms: %u\n", numPlatforms); 

     if (0 == numPlatforms) 
     { 
      LogError("Error: No platforms found!\n"); 
      return NULL; 
     } 

     std::vector<cl_platform_id> platforms(numPlatforms); 

     // Now, obtains a list of numPlatforms OpenCL platforms available 
     // The list of platforms available will be returned in platforms 
     err = clGetPlatformIDs(numPlatforms, &platforms[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetplatform_ids() to get platforms returned %s.\n", TranslateOpenCLError(err)); 
      return NULL; 
     } 

     // Check if one of the available platform matches the preferred requirements 
     for (cl_uint i = 0; i < numPlatforms; i++) 
     { 
      bool match = true; 
      cl_uint numDevices = 0; 

      // If the preferredPlatform is not NULL then check if platforms[i] is the required one 
      // Otherwise, continue the check with platforms[i] 
      if ((NULL != preferredPlatform) && (strlen(preferredPlatform) > 0)) 
      { 
       // In case we're looking for a specific platform 
       match = CheckPreferredPlatformMatch(platforms[i], preferredPlatform); 
      } 

      // match is true if the platform's name is the required one or don't care (NULL) 
      if (match) 
      { 
       // Obtains the number of deviceType devices available on platform 
       // When the function failed we expect numDevices to be zero. 
       // We ignore the function return value since a non-zero error code 
       // could happen if this platform doesn't support the specified device type. 
       err = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &numDevices); 
       if (CL_SUCCESS != err) 
       { 
        LogError("clGetDeviceIDs() returned %s.\n", TranslateOpenCLError(err)); 
       } 

       if (0 != numDevices) 
       { 
        // There is at list one device that answer the requirements 
        return platforms[i]; 
       } 
      } 
     } 

     return NULL; 
    } 


    /* 
    * This function read the OpenCL platdorm and device versions 
    * (using clGetxxxInfo API) and stores it in the ocl structure. 
    * Later it will enable us to support both OpenCL 1.2 and 2.0 platforms and devices 
    * in the same program. 
    */ 
    int GetPlatformAndDeviceVersion (cl_platform_id platformId, ocl_args_d_t *ocl) 
    { 
     cl_int err = CL_SUCCESS; 

     // Read the platform's version string length (param_value is NULL). 
     // The value returned in stringLength 
     size_t stringLength = 0; 
     err = clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, 0, NULL, &stringLength); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetPlatformInfo() to get CL_PLATFORM_VERSION length returned '%s'.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Now, that we know the platform's version string length, we can allocate enough space before read it 
     std::vector<char> platformVersion(stringLength); 

     // Read the platform's version string 
     // The read value returned in platformVersion 
     err = clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, stringLength, &platformVersion[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetplatform_ids() to get CL_PLATFORM_VERSION returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     if (strstr(&platformVersion[0], "OpenCL 2.0") != NULL) 
     { 
      ocl->platformVersion = OPENCL_VERSION_2_0; 
     } 

     // Read the device's version string length (param_value is NULL). 
     err = clGetDeviceInfo(ocl->device, CL_DEVICE_VERSION, 0, NULL, &stringLength); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetDeviceInfo() to get CL_DEVICE_VERSION length returned '%s'.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Now, that we know the device's version string length, we can allocate enough space before read it 
     std::vector<char> deviceVersion(stringLength); 

     // Read the device's version string 
     // The read value returned in deviceVersion 
     err = clGetDeviceInfo(ocl->device, CL_DEVICE_VERSION, stringLength, &deviceVersion[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetDeviceInfo() to get CL_DEVICE_VERSION returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     if (strstr(&deviceVersion[0], "OpenCL 2.0") != NULL) 
     { 
      ocl->deviceVersion = OPENCL_VERSION_2_0; 
     } 

     // Read the device's OpenCL C version string length (param_value is NULL). 
     err = clGetDeviceInfo(ocl->device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &stringLength); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetDeviceInfo() to get CL_DEVICE_OPENCL_C_VERSION length returned '%s'.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Now, that we know the device's OpenCL C version string length, we can allocate enough space before read it 
     std::vector<char> compilerVersion(stringLength); 

     // Read the device's OpenCL C version string 
     // The read value returned in compilerVersion 
     err = clGetDeviceInfo(ocl->device, CL_DEVICE_OPENCL_C_VERSION, stringLength, &compilerVersion[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetDeviceInfo() to get CL_DEVICE_OPENCL_C_VERSION returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     else if (strstr(&compilerVersion[0], "OpenCL C 2.0") != NULL) 
     { 
      ocl->compilerVersion = OPENCL_VERSION_2_0; 
     } 

     return err; 
    } 


    /* 
    * Generate random value for input buffers 
    */ 
    void generateInput(cl_int* inputArray, cl_uint arrayWidth, cl_uint arrayHeight) 
    { 
     srand(12345); 

     // random initialization of input 
     cl_uint array_size = arrayWidth * arrayHeight; 
     for (cl_uint i = 0; i < array_size; ++i) 
     { 
      inputArray[i] = rand(); 
     } 
    } 

    int SetupOpenCL(ocl_args_d_t *ocl, cl_device_type deviceType) 
    { 
     // The following variable stores return codes for all OpenCL calls. 
     cl_int err = CL_SUCCESS; 

     // Query for all available OpenCL platforms on the system 
     // Here you enumerate all platforms and pick one which name has preferredPlatform as a sub-string 
     cl_platform_id platformId = FindOpenCLPlatform("Intel", deviceType); 
     if (NULL == platformId) 
     { 
      LogError("Error: Failed to find OpenCL platform.\n"); 
      return CL_INVALID_VALUE; 
     } 

     // Create context with device of specified type. 
     // Required device type is passed as function argument deviceType. 
     // So you may use this function to create context for any CPU or GPU OpenCL device. 
     // The creation is synchronized (pfn_notify is NULL) and NULL user_data 
     cl_context_properties contextProperties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platformId, 0}; 
     ocl->context = clCreateContextFromType(contextProperties, deviceType, NULL, NULL, &err); 
     if ((CL_SUCCESS != err) || (NULL == ocl->context)) 
     { 
      LogError("Couldn't create a context, clCreateContextFromType() returned '%s'.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Query for OpenCL device which was used for context creation 
     err = clGetContextInfo(ocl->context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), &ocl->device, NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetContextInfo() to get list of devices returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Read the OpenCL platform's version and the device OpenCL and OpenCL C versions 
     GetPlatformAndDeviceVersion(platformId, ocl); 

     // Create command queue. 
     // OpenCL kernels are enqueued for execution to a particular device through special objects called command queues. 
     // Command queue guarantees some ordering between calls and other OpenCL commands. 
     // Here you create a simple in-order OpenCL command queue that doesn't allow execution of two kernels in parallel on a target device. 
    #ifdef CL_VERSION_2_0 
     if (OPENCL_VERSION_2_0 == ocl->deviceVersion) 
     { 
      const cl_command_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; 
      ocl->commandQueue = clCreateCommandQueueWithProperties(ocl->context, ocl->device, properties, &err); 
     } 
     else { 
      // default behavior: OpenCL 1.2 
      cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; 
      ocl->commandQueue = clCreateCommandQueue(ocl->context, ocl->device, properties, &err); 
     } 
    #else 
     // default behavior: OpenCL 1.2 
     cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; 
     ocl->commandQueue = clCreateCommandQueue(ocl->context, ocl->device, properties, &err); 
    #endif 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateCommandQueue() returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     return CL_SUCCESS; 
    } 


    /* 
    * Create and build OpenCL program from its source code 
    */ 
    int CreateAndBuildProgram(ocl_args_d_t *ocl) 
    { 
     cl_int err = CL_SUCCESS; 

     // Upload the OpenCL C source code from the input file to source 
     // The size of the C program is returned in sourceSize 
     char* source = NULL; 
     size_t src_size = 0; 
     err = ReadSourceFromFile("Template.cl", &source, &src_size); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: ReadSourceFromFile returned %s.\n", TranslateOpenCLError(err)); 
      goto Finish; 
     } 

     // And now after you obtained a regular C string call clCreateProgramWithSource to create OpenCL program object. 
     ocl->program = clCreateProgramWithSource(ocl->context, 1, (const char**)&source, &src_size, &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateProgramWithSource returned %s.\n", TranslateOpenCLError(err)); 
      goto Finish; 
     } 

     // Build the program 
     // During creation a program is not built. You need to explicitly call build function. 
     // Here you just use create-build sequence, 
     // but there are also other possibilities when program consist of several parts, 
     // some of which are libraries, and you may want to consider using clCompileProgram and clLinkProgram as 
     // alternatives. 
     err = clBuildProgram(ocl->program, 1, &ocl->device, "", NULL, NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clBuildProgram() for source program returned %s.\n", TranslateOpenCLError(err)); 

      // In case of error print the build log to the standard output 
      // First check the size of the log 
      // Then allocate the memory and obtain the log from the program 
      if (err == CL_BUILD_PROGRAM_FAILURE) 
      { 
       size_t log_size = 0; 
       clGetProgramBuildInfo(ocl->program, ocl->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); 

       std::vector<char> build_log(log_size); 
       clGetProgramBuildInfo(ocl->program, ocl->device, CL_PROGRAM_BUILD_LOG, log_size, &build_log[0], NULL); 

       LogError("Error happened during the build of OpenCL program.\nBuild log:%s", &build_log[0]); 
      } 
     } 

    Finish: 
     if (source) 
     { 
      delete[] source; 
      source = NULL; 
     } 

     return err; 
    } 


    int CreateBufferArguments(ocl_args_d_t *ocl, cl_int* inputA, cl_int* inputB, cl_int* outputC, cl_uint arrayWidth, cl_uint arrayHeight) 
    { 
     cl_int err = CL_SUCCESS; 

     // Create new OpenCL buffer objects 
     // As these buffer are used only for read by the kernel, you are recommended to create it with flag CL_MEM_READ_ONLY. 
     // Always set minimal read/write flags for buffers, it may lead to better performance because it allows runtime 
     // to better organize data copying. 
     // You use CL_MEM_COPY_HOST_PTR here, because the buffers should be populated with bytes at inputA and inputB. 

     ocl->srcA = clCreateBuffer(ocl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uint) * arrayWidth * arrayHeight, inputA, &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateBuffer for srcA returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     ocl->srcB = clCreateBuffer(ocl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uint) * arrayWidth * arrayHeight, inputB, &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateBuffer for srcB returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // If the output buffer is created directly on top of output buffer using CL_MEM_USE_HOST_PTR, 
     // then, depending on the OpenCL runtime implementation and hardware capabilities, 
     // it may save you not necessary data copying. 
     // As it is known that output buffer will be write only, you explicitly declare it using CL_MEM_WRITE_ONLY. 
     ocl->dstMem = clCreateBuffer(ocl->context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uint) * arrayWidth * arrayHeight, outputC, &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateBuffer for dstMem returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 


     return CL_SUCCESS; 
    } 


    cl_uint SetKernelArguments(ocl_args_d_t *ocl) 
    { 
     cl_int err = CL_SUCCESS; 

     err = clSetKernelArg(ocl->kernel, 0, sizeof(cl_mem), (void *)&ocl->srcA); 
     if (CL_SUCCESS != err) 
     { 
      LogError("error: Failed to set argument srcA, returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     err = clSetKernelArg(ocl->kernel, 1, sizeof(cl_mem), (void *)&ocl->srcB); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: Failed to set argument srcB, returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     err = clSetKernelArg(ocl->kernel, 2, sizeof(cl_mem), (void *)&ocl->dstMem); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: Failed to set argument dstMem, returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     return err; 
    } 


    /* 
    * Execute the kernel 
    */ 
    cl_uint ExecuteAddKernel(ocl_args_d_t *ocl, cl_uint width, cl_uint height) 
    { 
     cl_int err = CL_SUCCESS; 

     // Define global iteration space for clEnqueueNDRangeKernel. 
     size_t globalWorkSize[2] = {width, height}; 


     // execute kernel 
     err = clEnqueueNDRangeKernel(ocl->commandQueue, ocl->kernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: Failed to run kernel, return %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Wait until the queued kernel is completed by the device 
     err = clFinish(ocl->commandQueue); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clFinish return %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     return CL_SUCCESS; 
    } 


    /* 
    * "Read" the result buffer (mapping the buffer to the host memory address) 
    */ 
    bool ReadAndVerify(ocl_args_d_t *ocl, cl_uint width, cl_uint height, cl_int *inputA, cl_int *inputB) 
    { 
     cl_int err = CL_SUCCESS; 
     bool result = true; 

     // Enqueue a command to map the buffer object (ocl->dstMem) into the host address space and returns a pointer to it 
     // The map operation is blocking 
     cl_int *resultPtr = (cl_int *)clEnqueueMapBuffer(ocl->commandQueue, ocl->dstMem, true, CL_MAP_READ, 0, sizeof(cl_uint) * width * height, 0, NULL, NULL, &err); 

     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clEnqueueMapBuffer returned %s\n", TranslateOpenCLError(err)); 
      return false; 
     } 

     // Call clFinish to guarantee that output region is updated 
     err = clFinish(ocl->commandQueue); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clFinish returned %s\n", TranslateOpenCLError(err)); 
     } 

     // We mapped dstMem to resultPtr, so resultPtr is ready and includes the kernel output !!! 
     // Verify the results 
     unsigned int size = width * height; 
     for (unsigned int k = 0; k < size; ++k) 
     { 
      if (resultPtr[k] != inputA[k] + inputB[k]) 
      { 
       LogError("Verification failed at %d: (%d + %d = %d)\n", k, inputA[k], inputB[k], resultPtr[k]); 
       result = false; 
      } 
     } 

     // Unmapped the output buffer before releasing it 
     err = clEnqueueUnmapMemObject(ocl->commandQueue, ocl->dstMem, resultPtr, 0, NULL, NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clEnqueueUnmapMemObject returned %s\n", TranslateOpenCLError(err)); 
     } 

     return result; 
    } 


    /* 
    * main execution routine 
    * Basically it consists of three parts: 
    * - generating the inputs 
    * - running OpenCL kernel 
    * - reading results of processing 
    */ 
    int _tmain(int argc, TCHAR* argv[]) 
    { 
     cl_int err; 
     ocl_args_d_t ocl; 
     cl_device_type deviceType = CL_DEVICE_TYPE_CPU; 

     LARGE_INTEGER perfFrequency; 
     LARGE_INTEGER performanceCountNDRangeStart; 
     LARGE_INTEGER performanceCountNDRangeStop; 

     cl_uint arrayWidth = 1024; 
     cl_uint arrayHeight = 1024; 

     //initialize Open CL objects (context, queue, etc.) 
     if (CL_SUCCESS != SetupOpenCL(&ocl, deviceType)) 
     { 
      return -1; 
     } 

     // allocate working buffers. 
     // the buffer should be aligned with 4K page and size should fit 64-byte cached line 
     cl_uint optimizedSize = ((sizeof(cl_int) * arrayWidth * arrayHeight - 1)/64 + 1) * 64; 
     cl_int* inputA = (cl_int*)_aligned_malloc(optimizedSize, 4096); 
     cl_int* inputB = (cl_int*)_aligned_malloc(optimizedSize, 4096); 
     cl_int* outputC = (cl_int*)_aligned_malloc(optimizedSize, 4096); 
     if (NULL == inputA || NULL == inputB || NULL == outputC) 
     { 
      LogError("Error: _aligned_malloc failed to allocate buffers.\n"); 
      return -1; 
     } 

     //random input 
     generateInput(inputA, arrayWidth, arrayHeight); 
     generateInput(inputB, arrayWidth, arrayHeight); 

     // Create OpenCL buffers from host memory 
     // These buffers will be used later by the OpenCL kernel 
     if (CL_SUCCESS != CreateBufferArguments(&ocl, inputA, inputB, outputC, arrayWidth, arrayHeight)) 
     { 
      return -1; 
     } 

     // Create and build the OpenCL program 
     if (CL_SUCCESS != CreateAndBuildProgram(&ocl)) 
     { 
      return -1; 
     } 

     // Program consists of kernels. 
     // Each kernel can be called (enqueued) from the host part of OpenCL application. 
     // To call the kernel, you need to create it from existing program. 
     ocl.kernel = clCreateKernel(ocl.program, "Add", &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateKernel returned %s\n", TranslateOpenCLError(err)); 
      return -1; 
     } 

     // Passing arguments into OpenCL kernel. 
     if (CL_SUCCESS != SetKernelArguments(&ocl)) 
     { 
      return -1; 
     } 

     // Regularly you wish to use OpenCL in your application to achieve greater performance results 
     // that are hard to achieve in other ways. 
     // To understand those performance benefits you may want to measure time your application spent in OpenCL kernel execution. 
     // The recommended way to obtain this time is to measure interval between two moments: 
     // - just before clEnqueueNDRangeKernel is called, and 
     // - just after clFinish is called 
     // clFinish is necessary to measure entire time spending in the kernel, measuring just clEnqueueNDRangeKernel is not enough, 
     // because this call doesn't guarantees that kernel is finished. 
     // clEnqueueNDRangeKernel is just enqueue new command in OpenCL command queue and doesn't wait until it ends. 
     // clFinish waits until all commands in command queue are finished, that suits your need to measure time. 
     bool queueProfilingEnable = true; 
     if (queueProfilingEnable) 
      QueryPerformanceCounter(&performanceCountNDRangeStart); 
     // Execute (enqueue) the kernel 
     if (CL_SUCCESS != ExecuteAddKernel(&ocl, arrayWidth, arrayHeight)) 
     { 
      return -1; 
     } 
     if (queueProfilingEnable) 
      QueryPerformanceCounter(&performanceCountNDRangeStop); 

     // The last part of this function: getting processed results back. 
     // use map-unmap sequence to update original memory area with output buffer. 
     ReadAndVerify(&ocl, arrayWidth, arrayHeight, inputA, inputB); 

     // retrieve performance counter frequency 
     if (queueProfilingEnable) 
     { 
      QueryPerformanceFrequency(&perfFrequency); 
      LogInfo("NDRange performance counter time %f ms.\n", 
       1000.0f*(float)(performanceCountNDRangeStop.QuadPart - performanceCountNDRangeStart.QuadPart)/(float)perfFrequency.QuadPart); 
     } 

     _aligned_free(inputA); 
     _aligned_free(inputB); 
     _aligned_free(outputC); 

     return 0; 
    } 

Hier ist der Kernel-Code (Template.cl):

__kernel void Add(__global int* pA, __global int* pB, __global int* pC) 
{ 
    const int x  = get_global_id(0); 
    const int y  = get_global_id(1); 
    const int width = get_global_size(0); 

    const int id = y * width + x; 

    pC[id] = pA[id] + pB[id]; 
} 
+0

kann es sein, dass Sie formatierten Code für bessere Lesbarkeit schreiben können? Es ist nicht wirklich klar, woher der Fehler kommt. – Farside

+0

Sicher. Ich füge alles hinzu, was es mir gibt. Nun, atleast die cpp und cl Datei –

Antwort

1

Könnte dies ein Duplikat hier:

OpenCL code 'Error MSB3721' for Intel OpenCL SDK on Visual Studio 2010

Mögliche Lösung war das‘zu entfernen. cl 'Datei aus dem Projekt.

+0

Yep! Es funktioniert, aber das ist eine seltsame Lösung ... vielleicht ein Visual Studio Bug? –

+1

Nicht wirklich ein Fehler. Für alle Dateien im Projekt muss VS wissen, was mit ihnen zu tun ist, wenn das Projekt erstellt wird. Für .cl-Datei weiß VS nicht, was damit zu tun ist, daher der Fehler. Und da VS nichts zu tun braucht, besteht die Lösung darin, die Datei aus dem Projekt zu entfernen. Es kann auch möglich sein, VS anzuweisen, diese Datei während des Builds über einige Projekteinstellungen zu ignorieren, aber das habe ich nicht versucht. –

Verwandte Themen