diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 412eeda..0000000 --- a/.gitattributes +++ /dev/null @@ -1,22 +0,0 @@ -# Auto detect text files and perform LF normalization -* text=auto - -# Custom for Visual Studio -*.cs diff=csharp -*.sln merge=union -*.csproj merge=union -*.vbproj merge=union -*.fsproj merge=union -*.dbproj merge=union - -# Standard to msysgit -*.doc diff=astextplain -*.DOC diff=astextplain -*.docx diff=astextplain -*.DOCX diff=astextplain -*.dot diff=astextplain -*.DOT diff=astextplain -*.pdf diff=astextplain -*.PDF diff=astextplain -*.rtf diff=astextplain -*.RTF diff=astextplain diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 5ebd21a..0000000 --- a/.gitignore +++ /dev/null @@ -1,163 +0,0 @@ -################# -## Eclipse -################# - -*.pydevproject -.project -.metadata -bin/ -tmp/ -*.tmp -*.bak -*.swp -*~.nib -local.properties -.classpath -.settings/ -.loadpath - -# External tool builders -.externalToolBuilders/ - -# Locally stored "Eclipse launch configurations" -*.launch - -# CDT-specific -.cproject - -# PDT-specific -.buildpath - - -################# -## Visual Studio -################# - -## Ignore Visual Studio temporary files, build results, and -## files generated by popular Visual Studio add-ons. - -# User-specific files -*.suo -*.user -*.sln.docstates - -# Build results -[Dd]ebug/ -[Rr]elease/ -*_i.c -*_p.c -*.ilk -*.meta -*.obj -*.pch -*.pdb -*.pgc -*.pgd -*.rsp -*.sbr -*.tlb -*.tli -*.tlh -*.tmp -*.vspscc -.builds -*.dotCover - -## TODO: If you have NuGet Package Restore enabled, uncomment this -#packages/ - -# Visual C++ cache files -ipch/ -*.aps -*.ncb -*.opensdf -*.sdf - -# Visual Studio profiler -*.psess -*.vsp - -# ReSharper is a .NET coding add-in -_ReSharper* - -# Installshield output folder -[Ee]xpress - -# DocProject is a documentation generator add-in -DocProject/buildhelp/ -DocProject/Help/*.HxT -DocProject/Help/*.HxC -DocProject/Help/*.hhc -DocProject/Help/*.hhk -DocProject/Help/*.hhp -DocProject/Help/Html2 -DocProject/Help/html - -# Click-Once directory -publish - -# Others -[Bb]in -[Oo]bj -sql -TestResults -*.Cache -ClientBin -stylecop.* -~$* -*.dbmdl -Generated_Code #added for RIA/Silverlight projects - -# Backup & report files from converting an old project file to a newer -# Visual Studio version. Backup files are not needed, because we have git ;-) -_UpgradeReport_Files/ -Backup*/ -UpgradeLog*.XML - - - -############ -## Windows -############ - -# Windows image file caches -Thumbs.db - -# Folder config file -Desktop.ini - - -############# -## Python -############# - -*.py[co] - -# Packages -*.egg -*.egg-info -dist -build -eggs -parts -bin -var -sdist -develop-eggs -.installed.cfg - -# Installer logs -pip-log.txt - -# Unit test / coverage reports -.coverage -.tox - -#Translations -*.mo - -#Mr Developer -.mr.developer.cfg - -# Mac crap -.DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index ed70bf1..5a012f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1 +1,2 @@ +cmake_minimum_required (VERSION 3.6) add_subdirectory(clcc) diff --git a/README b/README deleted file mode 100644 index 21aba4c..0000000 --- a/README +++ /dev/null @@ -1,33 +0,0 @@ -clcc - an NVIDIA OpenCL command line compiler - -I was having problems with the NVIDIA compiler crashing on my code. As I was poking around trying to figure out what was causing the crash, I figured out the shared library interface to the NVIDIA OpenCL clang/llvm compiler. - -I created a little program that uses this compiler (bundled with the drivers) to do command line compiling of code. - -I hope this will be useful to people who want to integrate a OpenCL C to PTX compiler into their build chain. You might be able to run ptxas on the resulting PTX file to get cubin, but I have not yet investigated if clCreateProgramWithBinary will accept cubin. - -The other useful feature is that you can control the compiler options to select a different target CUDA Compute Level (sm_XX) than the card currently installed. You can even run the compiler without needing a GPU installed at all - just extract needed nvcompiler.dll/nvcompiler32.dll/libnvidia-compiler.so file from the latest driver installer. I imagine this will be useful for automated/dedicated build machines that likely lack a GPU. - -The format of the command is this: -clcc ["build-options"] input.cl output.ptx -"build-options" - a quoted string containing the build options to pass to the NVIDIA compiler (equivalent to clBuildProgram's options string). -It accepts all the OpenCL 1.0/1.1 clBuildProgram options (http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clBuildProgram.html), the NVIDIA extended ones from cl_nv_compiler_options (http://developer.download.nvidia.com/compute/cuda/3_2_prod/toolkit/docs/OpenCL_Extensions/cl_nv_compiler_options.txt), and these hidden extras: --cl-nv-arch sm_XX - selects the target CUDA Compute Level architecture to compile for (sm_10 for 1.0, sm_11 for 1.1, sm_12 for 1.2, sm_13 for 1.3, sm_20 for 2.0, sm_21 for 2.1, sm_30 for 3.0 and sm_35 for 3.5) --cl-nv-cstd=CLX.X - selects the target OpenCL C language version (CL1.0 or CL1.1) -(there may be others) -input.cl - the source OpenCL C file -output.ptx - the destination PTX file - -Currently I only support one input per PTX file, but the API supports multiple inputs so I may add that feature. - -Try it out, test it, use it etc. and give me feedback, bugs, suggestions etc via GitHub (or on the NVIDIA forum thread). - -BUILDING: -This version worked on Windows when I wrote it (back in December 2010) with the then current NVIDIA drivers. If there are any problems with newer drivers or fixes needed please log an issue on GitHub. - -I have not been able to test the Linux version for a while but other users have reported it works fine. If there are any problems on Linux or fixes needed please log an issue on GitHub. - -The files include both Visual Studio 2008 for Windows build, and CMake script for Linux build. - -Feel free to discuss on the NVIDIA forums: -http://forums.nvidia.com/index.php?showtopic=188884 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..63aec08 --- /dev/null +++ b/README.md @@ -0,0 +1,116 @@ +# clcc - an NVIDIA OpenCL command line compiler + +## History + +### Leith Bade: + +I was having problems with the NVIDIA compiler crashing on my code. As I was poking around trying to figure out what was causing the crash, I figured out the shared library interface to the NVIDIA OpenCL clang/llvm compiler. + +I created a little program that uses this compiler (bundled with the drivers) to do command line compiling of code. + +I hope this will be useful to people who want to integrate a OpenCL C to PTX compiler into their build chain. You might be able to run ptxas on the resulting PTX file to get cubin, but I have not yet investigated if clCreateProgramWithBinary will accept cubin. + +The other useful feature is that you can control the compiler options to select a different target CUDA Compute Level (sm_XX) than the card currently installed. You can even run the compiler without needing a GPU installed at all - just extract needed nvcompiler.dll/nvcompiler32.dll/libnvidia-compiler.so file from the latest driver installer. I imagine this will be useful for automated/dedicated build machines that likely lack a GPU. + +Feel free to discuss on the NVIDIA forums: +http://forums.nvidia.com/index.php?showtopic=188884 + +### David Castells-Rufas + +I was interested in compiling and testing OpenCL kernels on different platforms and especially receive feedback about the used resources. + +I noticed that NVIDIA OpenCL gives different information wether it is executed from the former clcc implementation and the OpenCL runtime compiler clBuildProgram logs. + +So I added a way to select wether Leith direct shared library access method is used or classic OpenCL runtime. + +I also modified the command line interface to be easier to add new flags. + +I also introduced a compilation flow using cmake. I tested it in Cygwin and Ubuntu Linux. + +Finally I moved from C to C++, to reuse OpenCL functions that use STL classes. + +## Execution + +The format of the command is this: + +clcc [<options>] [--flags "<build-options>"] -i input.cl -o output.ptx + +<options> can be + +--help shows the usage + +--opencl-runtime uses the OpenCL runtime to compile the code. Otherwise it uses the original direct access to NVIDIA library. + +<build-options> - a quoted string containing the build options to pass to the NVIDIA compiler + +It accepts all the OpenCL 1.0/1.1 clBuildProgram options (http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clBuildProgram.html), the NVIDIA extended ones from cl_nv_compiler_options (http://developer.download.nvidia.com/compute/cuda/3_2_prod/toolkit/docs/OpenCL_Extensions/cl_nv_compiler_options.txt), and these hidden extras: +-cl-nv-arch sm_XX - selects the target CUDA Compute Level architecture to compile for (sm_10 for 1.0, sm_11 for 1.1, sm_12 for 1.2, sm_13 for 1.3, sm_20 for 2.0, sm_21 for 2.1, sm_30 for 3.0 and sm_35 for 3.5) +-cl-nv-cstd=CLX.X - selects the target OpenCL C language version (CL1.0 or CL1.1) + +See the flags section below, for more info. + +input.cl - the source OpenCL C file + +output.ptx - the destination PTX file + +Currently I only support one input per PTX file, but the API supports multiple inputs so I may add that feature. + +Try it out, test it, use it etc. and give me feedback, bugs, suggestions etc via GitHub (or on the NVIDIA forum thread). + +### IMPORTANT NOTE + +NVIDIA compiler chaches the results of previous compilations, so that you can not receive any feedback +from -cl-nv-verbose if the compiler uses the cached version. To force the recompilation you must flush the cache. + +In my Linux system this cache is in /home/<user>/.nv/ComputeCache + + +## building + +### Visual Studio 2008 for Windows + +Open the clcc.sln solutions + +### Cygwin + +Run the cmake_make.sh script, it will create a build directory with a Makefile that can be used to compile + +### Netbeans 8.2 + +A prebuild step invoking cmake_make.sh should be specified. Alternative this step can be run from a Cygwin console. + +Set the directory used during compilation to "build" and use standard make, and make clean. + +### Linux + +Run the cmake_make.sh script, it will create a build directory with a Makefile that can be used to compile + + +## Flags + +There is little information about the flags accepted by the OpenCL compilers. Here I try to review some + + +### General Flags + +| Flag | Description | Comments | +|------|-------------|----------| +| -cl-opt-disable | +| -cl-single-precision-constant +| -cl-denorms-are-zero +| -cl-fp32-correctly-rounded-divide-sqrt +| -cl-mad-enable +| -cl-no-signed-zeros +| -cl-unsafe-math-optimizations +| -cl-finite-math-only +| -cl-fast-relaxed-math + +### NVIDIA flags + +| Flag | Description | Comments | +|------|-------------|----------| +| -cl-nv-arch | architecture | | +| -cl-nv-cstd=<version> | version is the OpenCL version | can be CL1.0, CL1.1, etc. | +| -cl-nv-maxrregcount <N> | Specify the maximum number of registers that GPU functions can use. | It fails in some systems| +| -cl-nv-opt-level <N> | 0 (no optimization). | | +| -cl-nv-verbose | | Does not show much information | \ No newline at end of file diff --git a/clcc/CMakeLists.txt b/clcc/CMakeLists.txt index fbfdc27..8a34fb3 100644 --- a/clcc/CMakeLists.txt +++ b/clcc/CMakeLists.txt @@ -1,2 +1,18 @@ -add_executable(clcc clcc.c compiler.c load_compiler_unx.c) -target_link_libraries(clcc ${CMAKE_DL_LIBS}) +cmake_minimum_required (VERSION 3.6) +IF (CYGWIN) + add_executable(clcc clcc.cpp compiler.cpp load_compiler_win.cpp OpenCLUtils.cpp) + link_directories(/cygdrive/c/Intel/OpenCL/sdk/lib/) + add_library(other STATIC IMPORTED) + set_property(TARGET other PROPERTY IMPORTED_LOCATION /cygdrive/c/Intel/OpenCL/sdk/lib/x64/OpenCL.lib) + SET (CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".LIB" ".dll" ".DLL") + find_library(OPENCL_LIBRARY OpenCL PATHS /cygdrive/c/Intel/OpenCL/sdk/lib/) + #target_link_libraries(clcc ${CMAKE_DL_LIBS} OpenCL.lib) + target_include_directories(clcc PUBLIC /cygdrive/c/Intel/OpenCL/sdk/include) + target_link_libraries(clcc PUBLIC ${CMAKE_DL_LIBS} other) + #target_link_libraries(clcc PUBLIC ) +ELSE (CYGWIN) + add_executable(clcc clcc.cpp compiler.cpp load_compiler_unx.cpp OpenCLUtils.cpp) + find_package(OpenCL REQUIRED) + target_link_libraries(clcc ${CMAKE_DL_LIBS} OpenCL::OpenCL) +ENDIF (CYGWIN) +install(TARGETS clcc DESTINATION bin) diff --git a/clcc/OpenCLUtils.cpp b/clcc/OpenCLUtils.cpp new file mode 100644 index 0000000..f2405b7 --- /dev/null +++ b/clcc/OpenCLUtils.cpp @@ -0,0 +1,377 @@ +#include "OpenCLUtils.h" + +#include + +OpenCLUtils::OpenCLUtils() +{ +} + + +OpenCLUtils::~OpenCLUtils() +{ +} + + +cl_platform_id selectPlatform(cl_uint selected_platform_index) +{ + using namespace std; + + cl_uint num_of_platforms = 0; + // get total number of available platforms: + cl_int err = clGetPlatformIDs(0, 0, &num_of_platforms); + SAMPLE_CHECK_ERRORS(err); + + // use vector for automatic memory management + vector platforms(num_of_platforms); + // get IDs for all platforms: + err = clGetPlatformIDs(num_of_platforms, &platforms[0], 0); + SAMPLE_CHECK_ERRORS(err); + + + bool by_index = true; + + + cout << "Platforms (" << num_of_platforms << "):\n"; + + // TODO In case of empty platform name select the default platform or 0th platform? + + for (cl_uint i = 0; i < num_of_platforms; ++i) + { + // Get the length for the i-th platform name + size_t platform_name_length = 0; + err = clGetPlatformInfo( + platforms[i], + CL_PLATFORM_NAME, + 0, + 0, + &platform_name_length + ); + SAMPLE_CHECK_ERRORS(err); + + // Get the name itself for the i-th platform + // use vector for automatic memory management + vector platform_name(platform_name_length); + err = clGetPlatformInfo( + platforms[i], + CL_PLATFORM_NAME, + platform_name_length, + &platform_name[0], + 0 + ); + SAMPLE_CHECK_ERRORS(err); + + cout << " [" << i << "] " << &platform_name[0]; + + // decide if this i-th platform is what we are looking for + // we select the first one matched skipping the next one if any + // + if ( + selected_platform_index == i || // we already selected the platform by index + //string(&platform_name[0]).find(required_platform_subname) != string::npos && + selected_platform_index == num_of_platforms // haven't selected yet + ) + { + cout << " [Selected]"; + selected_platform_index = i; + // do not stop here, just want to see all available platforms + } + + // TODO Something when more than one platform matches a given subname + + cout << endl; + } + + if (by_index && selected_platform_index >= num_of_platforms) + { + throw Error( + "Given index of platform (" + to_string(selected_platform_index) + ") " + "is out of range of available platforms" + ); + } + + /*if (!by_index && selected_platform_index >= num_of_platforms) + { + throw Error( + "There is no found platform with name containing \"" + + required_platform_subname + "\" as a substring\n" + ); + }*/ + + return platforms[selected_platform_index]; +} + +string opencl_error_to_str(cl_int error) +{ +#define CASE_CL_CONSTANT(NAME) case NAME: return #NAME; + + // Suppose that no combinations are possible. + // TODO: Test whether all error codes are listed here + switch (error) + { + CASE_CL_CONSTANT(CL_SUCCESS) + CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND) + CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE) + CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE) + CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE) + CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES) + CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY) + CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE) + CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP) + CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH) + CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED) + CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE) + CASE_CL_CONSTANT(CL_MAP_FAILURE) + CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET) + CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) + CASE_CL_CONSTANT(CL_INVALID_VALUE) + CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE) + CASE_CL_CONSTANT(CL_INVALID_PLATFORM) + CASE_CL_CONSTANT(CL_INVALID_DEVICE) + CASE_CL_CONSTANT(CL_INVALID_CONTEXT) + CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES) + CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE) + CASE_CL_CONSTANT(CL_INVALID_HOST_PTR) + CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT) + CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) + CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE) + CASE_CL_CONSTANT(CL_INVALID_SAMPLER) + CASE_CL_CONSTANT(CL_INVALID_BINARY) + CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS) + CASE_CL_CONSTANT(CL_INVALID_PROGRAM) + CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE) + CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME) + CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION) + CASE_CL_CONSTANT(CL_INVALID_KERNEL) + CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX) + CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE) + CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE) + CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS) + CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION) + CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE) + CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE) + CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET) + CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST) + CASE_CL_CONSTANT(CL_INVALID_EVENT) + CASE_CL_CONSTANT(CL_INVALID_OPERATION) + CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT) + CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE) + CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL) + CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE) + CASE_CL_CONSTANT(CL_INVALID_PROPERTY) +// CASE_CL_CONSTANT(CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR) + default: + return "UNKNOWN ERROR CODE " + error; + } + +#undef CASE_CL_CONSTANT +} + + + +cl_device_id selectDevice(cl_platform_id platform, cl_uint selected_device_index) +{ + using namespace std; + + // List devices of a given type only + cl_device_type device_type = CL_DEVICE_TYPE_ALL; // = parseDeviceType(device_type_name); + + cl_uint num_of_devices = 0; + cl_int err = clGetDeviceIDs( + platform, + device_type, + 0, + 0, + &num_of_devices + ); + + SAMPLE_CHECK_ERRORS(err); + + vector devices(num_of_devices); + + cl_device_id selectedDeviceId = 0; + + err = clGetDeviceIDs( + platform, + device_type, + num_of_devices, + &devices[0], + 0 + ); + SAMPLE_CHECK_ERRORS(err); + + cout << "Devices (" << num_of_devices; + + cout << "):\n"; + + for (cl_uint i = 0; i < num_of_devices; ++i) + { + // Get the length for the i-th device name + size_t device_name_length = 0; + err = clGetDeviceInfo( + devices[i], + CL_DEVICE_NAME, + 0, + 0, + &device_name_length + ); + SAMPLE_CHECK_ERRORS(err); + + // Get the name itself for the i-th device + // use vector for automatic memory management + vector device_name(device_name_length); + err = clGetDeviceInfo( + devices[i], + CL_DEVICE_NAME, + device_name_length, + &device_name[0], + 0 + ); + SAMPLE_CHECK_ERRORS(err); + + cout << " [" << i << "] " << &device_name[0]; + + if (i == selected_device_index) + { + cout << " [Selected]"; + selectedDeviceId = devices[i]; + } + + cout << '\n'; + } + + return selectedDeviceId; +} + +void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data) +{ + fprintf(stderr, "[pfn_notify] OpenCL Error : %s\n", errinfo); + fflush(stderr); +} + + +string getPlatformName(cl_platform_id platform_id) +{ + // Get the length for the i-th platform name + size_t platform_name_length = 0; + cl_int err = clGetPlatformInfo( + platform_id, + CL_PLATFORM_NAME, + 0, + 0, + &platform_name_length + ); + SAMPLE_CHECK_ERRORS(err); + + // Get the name itself for the i-th platform + // use vector for automatic memory management + vector platform_name(platform_name_length); + err = clGetPlatformInfo( + platform_id, + CL_PLATFORM_NAME, + platform_name_length, + &platform_name[0], + 0 + ); + SAMPLE_CHECK_ERRORS(err); + + string str = &platform_name[0]; + + return str; +} + +string getDeviceName(cl_device_id device_id) +{ + size_t device_name_length = 0; + cl_int err = clGetDeviceInfo( + device_id, + CL_DEVICE_NAME, + 0, + 0, + &device_name_length + ); + SAMPLE_CHECK_ERRORS(err); + + // Get the name itself for the i-th device + // use vector for automatic memory management + vector device_name(device_name_length); + err = clGetDeviceInfo( + device_id, + CL_DEVICE_NAME, + device_name_length, + &device_name[0], + 0 + ); + SAMPLE_CHECK_ERRORS(err); + + string str = &device_name[0]; + + return str; + +} + + + + + +cl_context createContext(cl_platform_id platform, cl_device_id device/*const cl_context_properties* additional_context_props*/) +{ + using namespace std; + + if (!platform) + { + throw Error("Platform is not selected"); + } + + if (!device) + { + throw Error("Device is not selected"); + } + + size_t number_of_additional_props = 0; + /*if (additional_context_props) + { + // count all additional props including terminating 0 + while (additional_context_props[number_of_additional_props++]); + number_of_additional_props--; // now exclude terminating 0 + }*/ + + // allocate enough space for platform and all additional props if any + std::vector context_props( + 2 + // for CL_CONTEXT_PLATFORM and platform itself + number_of_additional_props + + 1 // for terminating zero + ); + + context_props[0] = CL_CONTEXT_PLATFORM; + context_props[1] = cl_context_properties(platform); + /* + std::copy( + additional_context_props, + additional_context_props + number_of_additional_props, + context_props.begin() + 2 // +2 -- skipping already initialized platform entries + );*/ + + context_props.back() = 0; + + cl_int err = 0; + cl_context context = clCreateContext(&context_props[0], 1, &device, pfn_notify, 0, &err); + SAMPLE_CHECK_ERRORS(err); + + return context; +} + +cl_command_queue createQueue(cl_device_id device, cl_context context, cl_command_queue_properties queue_properties) +{ + using namespace std; + + if (!device) + { + throw Error("Device is not selected"); + } + + cl_int err = 0; + cl_command_queue queue = clCreateCommandQueue(context, device, queue_properties, &err); + SAMPLE_CHECK_ERRORS(err); + + return queue; +} \ No newline at end of file diff --git a/clcc/OpenCLUtils.h b/clcc/OpenCLUtils.h new file mode 100644 index 0000000..d00a6bc --- /dev/null +++ b/clcc/OpenCLUtils.h @@ -0,0 +1,89 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ + +/* + * File: OpenCLUtils.h + * Author: dcr + * + * Created on January 27, 2018, 10:35 AM + */ + +#ifndef OPENCLUTILS_H +#define OPENCLUTILS_H + + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + + +// Report about an OpenCL problem. +// Macro is used instead of a function here +// to report source file name and line number. +//#define SAMPLE_CHECK_ERRORS(ERR) \ +// if(ERR != CL_SUCCESS) \ +// { \ +// throw Error( \ +// string("OpenCL error ") + \ +// string(opencl_error_to_str(ERR)) + \ +// string(" happened in file ") + \ +// string((const char*)(__FILE__)) + \ +// string(" at line ") + \ +// string((const char*)(__LINE__)) +\ +// string(".") \ +// ); \ +// } + +#define SAMPLE_CHECK_ERRORS(ERR) \ + if(ERR != CL_SUCCESS) \ + { \ + throw Error( \ + string("OpenCL error ") + \ + string(opencl_error_to_str(ERR)) \ + + string(" happened in file ") \ + + string((const char*)(__FILE__)) \ + + string(" at line ") + to_string(__LINE__) \ + ); \ + } + +string opencl_error_to_str(cl_int error); +cl_platform_id selectPlatform(cl_uint selected_platform_index); +cl_device_id selectDevice(cl_platform_id platform, cl_uint selected_device_index); +string getDeviceName(cl_device_id device_id); +string getPlatformName(cl_platform_id platform_id); +cl_context createContext(cl_platform_id platform, cl_device_id device/*const cl_context_properties* additional_context_props*/); +cl_command_queue createQueue(cl_device_id device, cl_context context, cl_command_queue_properties queue_properties); + + +class OpenCLUtils +{ +public: + OpenCLUtils(); + ~OpenCLUtils(); + +public: + +}; + +// Base class for all exception in samples +class Error : public std::runtime_error +{ +public: + Error(const string& msg) : + std::runtime_error(msg) + { + } +}; + +#endif /* OPENCLUTILS_H */ + diff --git a/clcc/clcc.c b/clcc/clcc.c deleted file mode 100644 index 10aabc8..0000000 --- a/clcc/clcc.c +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "compiler.h" - -int main(int argc, char **argv) -{ - // Some test arguments: - // const char source[] = "__kernel void test(__global int *input, __global int *output) { int i = get_global_id(0); output[i] = input[i]; }"; - // const char options[] = "-cl-nv-verbose -cl-nv-arch sm_12 -cl-nv-cstd=CL1.0"; - // const char *strings[] = { source }; - // const size_t lengths[] = { sizeof(source) }; - - char *source; - char *options; - const char *strings[1]; - size_t lengths[1]; - const unsigned int count = 1; - char *binary = NULL; - char *log = NULL; - int result; - FILE *sourceFile = NULL; - char *sourceFilename = NULL; - FILE *binaryFile = NULL; - char *binaryFilename = NULL; - struct stat sourceStat; - size_t size; - - if (argc < 3 || argc > 4) - { - printf("Usage: clcc [\"compiler-options\"] input.cl output.ptx\n"); - exit(EXIT_SUCCESS); - } - sourceFilename = argv[argc - 2]; - binaryFilename = argv[argc - 1]; - if (argc > 3) - { - options = argv[1]; - } - else - { - options = ""; - } - - sourceFile = fopen(sourceFilename, "rb"); - if (sourceFile == NULL) - { - perror(sourceFilename); - exit(EXIT_FAILURE); - } - - result = stat(sourceFilename, &sourceStat); - if (result != 0) - { - perror(sourceFilename); - exit(EXIT_FAILURE); - } - - source = (char *)malloc(sourceStat.st_size + 1); - if (source == NULL) - { - fprintf(stderr, "malloc: Unable to allocate memory for source file."); - exit(EXIT_FAILURE); - } - source[sourceStat.st_size] = 0; - - size = fread(source, sizeof(*source), sourceStat.st_size / sizeof(*source), sourceFile); - if (size != sourceStat.st_size / sizeof(*source)) - { - perror(sourceFilename); - exit(EXIT_FAILURE); - } - - strings[0] = source; - lengths[0] = strlen(source); - result = NvCliCompileProgram(strings, count, lengths, options, &log, &binary); - - if (result != 0) - { - //printf("\n%s", log); // TODO: replace with better one (like the drivers do) - - char* temp; - temp = strtok (log,"\n"); - while (temp != NULL) - { - if(temp[0] == ':') printf("%s", sourceFilename); - printf ("%s\n",temp); - temp = strtok (NULL, "\n"); - } - NvCliCompileLogFree(log); - exit(EXIT_FAILURE); - } - assert(binary != NULL); - - - binaryFile = fopen(binaryFilename, "w+b"); - if (binaryFile == NULL) - { - perror(binaryFilename); - exit(EXIT_FAILURE); - } - - size = fwrite(binary, sizeof(*binary), strlen(binary), binaryFile); - if (size != strlen(binary)) - { - perror(binaryFilename); - exit(EXIT_FAILURE); - } - - free(source); - fclose(sourceFile); - NvCliCompiledProgramFree(binary); - fclose(binaryFile); - - return EXIT_SUCCESS; -} diff --git a/clcc/clcc.cpp b/clcc/clcc.cpp new file mode 100644 index 0000000..4ca1d8f --- /dev/null +++ b/clcc/clcc.cpp @@ -0,0 +1,268 @@ +#include +#include +#include +#include +#include +#include + +#include "OpenCLUtils.h" + +#include "compiler.h" + +int doHelp = 0; +int doOpenCLRuntime = 0; + +char *options; +char *sourceFilename = NULL; +char *binaryFilename = NULL; + +void compileWithOpenCLRuntime(const char *strings[], size_t lengths[], const unsigned int count); +void compileWithNativeLibrary(const char *strings[], size_t lengths[], const unsigned int count); + +void parseOptions(int argc, char* argv[]) +{ + for (int i=0; i < argc; i++) + { + if (strcmp(argv[i], "--help") == 0) + { + doHelp = 1; + } + else if (strcmp(argv[i], "--opencl-runtime") == 0) + { + doOpenCLRuntime = 1; + } + else if (strcmp(argv[i], "--flags") == 0) + { + i++; + options = argv[i]; + } + else if (strcmp(argv[i], "-i") == 0) + { + i++; + sourceFilename = argv[i]; + } + else if (strcmp(argv[i], "-o") == 0) + { + i++; + binaryFilename = argv[i]; + } + } + + if (doHelp) + return; + + if (sourceFilename == NULL) + { + fprintf(stderr, "No input file specified\n"); + exit(-1); + } + if (binaryFilename == NULL) + { + fprintf(stderr, "No output file specified\n"); + exit(-1); + } + +} + +void usage() +{ + printf("Usage: clcc [options] --flags [\"compiler-options\"] -i input.cl -o output.ptx\n"); + + printf("\n"); + printf("Where options are..\n"); + printf("\n"); + printf("--help\n"); + printf("\n"); + printf("--opencl-runtime\n"); + printf("\n"); + + printf("\n"); + printf("Where flags can be...\n"); + printf("\n"); + printf("-cl-nv-verbose\n"); + printf("\n"); + + // ?? + printf("-cl-opt-disable\n"); + printf("\n"); + printf("-cl-single-precision-constant\n"); + printf("\n"); + printf("-cl-denorms-are-zero\n"); + printf("\n"); + printf("-cl-fp32-correctly-rounded-divide-sqrt\n"); + printf("\n"); + printf("-cl-mad-enable\n"); + printf("\n"); + printf("-cl-no-signed-zeros\n"); + printf("\n"); + printf("-cl-unsafe-math-optimizations\n"); + printf("\n"); + printf("-cl-finite-math-only\n"); + printf("\n"); + printf("-cl-fast-relaxed-math\n"); + printf("\n"); + // ? + + printf("-cl-nv-arch\n"); + printf("\n"); + + printf("-cl-nv-cstd=\n"); + printf(" version is the OpenCL version supported such as CL1.0 or CL1.1 "); + printf("\n"); + printf("-cl-nv-maxrregcount \n"); + printf(" Passed on to ptxas as --maxrregcount \n"); + printf(" N is a positive integer.\n"); + printf(" Specify the maximum number of registers that GPU functions can use.\n"); + printf("\n"); + printf("-cl-nv-opt-level \n"); + printf(" Passed on to ptxas as --opt-level \n"); + printf(" N is a positive integer, or 0 (no optimization).\n"); + printf("\n"); + printf("-cl-nv-verbose\n"); + printf(" Passed on to ptxas as --verbose\n"); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + char *source; + const char *strings[1]; + size_t lengths[1]; + const unsigned int count = 1; + + FILE *sourceFile = NULL; + struct stat sourceStat; + size_t size; + + parseOptions(argc, argv); + + if (doHelp) + usage(); + + sourceFile = fopen(sourceFilename, "rb"); + if (sourceFile == NULL) + { + perror(sourceFilename); + exit(EXIT_FAILURE); + } + + int result = stat(sourceFilename, &sourceStat); + if (result != 0) + { + perror(sourceFilename); + exit(EXIT_FAILURE); + } + + source = (char *)malloc(sourceStat.st_size + 1); + if (source == NULL) + { + fprintf(stderr, "malloc: Unable to allocate memory for source file."); + exit(EXIT_FAILURE); + } + source[sourceStat.st_size] = 0; + + size = fread(source, sizeof(*source), sourceStat.st_size / sizeof(*source), sourceFile); + if (size != sourceStat.st_size / sizeof(*source)) + { + perror(sourceFilename); + exit(EXIT_FAILURE); + } + + strings[0] = source; + lengths[0] = strlen(source); + + if (doOpenCLRuntime) + compileWithOpenCLRuntime(strings, lengths, count); + else + compileWithNativeLibrary(strings, lengths, count); + + free(source); + fclose(sourceFile); + + return EXIT_SUCCESS; +} + + +void compileWithOpenCLRuntime(const char *strings[], size_t lengths[], const unsigned int count) +{ + cl_uint spi = 0; + cl_uint sdi = 0; + + cl_platform_id pi = selectPlatform(spi); + cl_device_id di = selectDevice(pi, sdi); + cl_context ct = createContext(pi, di); + + cl_int ret; + cl_program program = clCreateProgramWithSource(ct, 1, strings, lengths, &ret); + SAMPLE_CHECK_ERRORS(ret); + + + ret = clBuildProgram(program, 1, &di, options, NULL, NULL); + + //if (ret == CL_BUILD_PROGRAM_FAILURE) + { + // Determine the size of the log + size_t log_size; + ret = clGetProgramBuildInfo(program, di, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + SAMPLE_CHECK_ERRORS(ret); + + // Allocate memory for the log + char *log = (char *) malloc(log_size); + + // Get the log + ret = clGetProgramBuildInfo(program, di, CL_PROGRAM_BUILD_LOG, log_size, log, NULL); + SAMPLE_CHECK_ERRORS(ret); + + // Print the log + printf("Compilation Flags:\n"); + printf(options); + printf("\n"); + printf("Compilation Output Log >>>>>\n"); + printf(log); + printf("\n<<<<<<\n"); + } + + SAMPLE_CHECK_ERRORS(ret); + +} + +void compileWithNativeLibrary(const char *strings[], size_t lengths[], const unsigned int count) +{ + char *log = NULL; + char *binary = NULL; + FILE *binaryFile = NULL; + size_t size; + + int result = NvCliCompileProgram(strings, count, lengths, options, &log, &binary); + + //if (result != 0) + { + //printf("\n%s", log); // TODO: replace with better one (like the drivers do) + + printf(log); + NvCliCompileLogFree(log); + + if (result != 0) + exit(EXIT_FAILURE); + } + assert(binary != NULL); + + + binaryFile = fopen(binaryFilename, "w+b"); + if (binaryFile == NULL) + { + perror(binaryFilename); + exit(EXIT_FAILURE); + } + + size = fwrite(binary, sizeof(*binary), strlen(binary), binaryFile); + if (size != strlen(binary)) + { + perror(binaryFilename); + exit(EXIT_FAILURE); + } + + NvCliCompiledProgramFree(binary); + fclose(binaryFile); + +} \ No newline at end of file diff --git a/clcc/compiler.c b/clcc/compiler.cpp similarity index 100% rename from clcc/compiler.c rename to clcc/compiler.cpp diff --git a/clcc/load_compiler_unx.c b/clcc/load_compiler_unx.cpp similarity index 91% rename from clcc/load_compiler_unx.c rename to clcc/load_compiler_unx.cpp index 83427e4..d502f42 100644 --- a/clcc/load_compiler_unx.c +++ b/clcc/load_compiler_unx.cpp @@ -5,9 +5,9 @@ #include "compiler.h" void *libnvidiacompiler = NULL; -tNvCliCompileProgram pNvCliCompileProgram; -tNvCliCompileLogFree pNvCliCompileLogFree; -tNvCliCompiledProgramFree pNvCliCompiledProgramFree; +tNvCliCompileProgram* pNvCliCompileProgram; +tNvCliCompileLogFree* pNvCliCompileLogFree; +tNvCliCompiledProgramFree* pNvCliCompiledProgramFree; int loaded = 0; void unload_compiler(void) diff --git a/clcc/load_compiler_win.c b/clcc/load_compiler_win.cpp similarity index 93% rename from clcc/load_compiler_win.c rename to clcc/load_compiler_win.cpp index d6092a5..b0c441e 100644 --- a/clcc/load_compiler_win.c +++ b/clcc/load_compiler_win.cpp @@ -10,11 +10,11 @@ tNvCliCompileLogFree pNvCliCompileLogFree; tNvCliCompiledProgramFree pNvCliCompiledProgramFree; int loaded = 0; -int unload_compiler(void) +void unload_compiler(void) { if (!loaded) { - return 0; + return; // 0; } pNvCliCompileProgram = NULL; @@ -26,7 +26,7 @@ int unload_compiler(void) loaded = 0; - return 0; + // return 0; } void print_error() @@ -87,7 +87,8 @@ void load_compiler() exit(EXIT_FAILURE); } - _onexit(unload_compiler); + //_onexit(unload_compiler); + atexit(unload_compiler); loaded = 1; } \ No newline at end of file diff --git a/cmake_make.sh b/cmake_make.sh new file mode 100644 index 0000000..e85d45c --- /dev/null +++ b/cmake_make.sh @@ -0,0 +1,21 @@ +#!/bin/bash +if [ $# -ge 1 ] +then + if [ $1="clean" ] + then + if [ -e ./build/Makefile ] + then + cd ./build/; make clean; cd - 1>/dev/null + fi + rm -rf build/ + else + echo "Command not found" + exit 1 + fi +else + if [ ! -d ./build/ ] + then + mkdir ./build/ + fi + cd ./build/; cmake ..; make; cd - 1>/dev/null +fi diff --git a/nbproject/configurations.xml b/nbproject/configurations.xml new file mode 100644 index 0000000..97114d3 --- /dev/null +++ b/nbproject/configurations.xml @@ -0,0 +1,122 @@ + + + + + + + + + CMakeCCompilerId.c + + + CMakeCXXCompilerId.cpp + + + feature_tests.c + feature_tests.cxx + + + + OpenCLUtils.h + + + + /usr/share/cmake-3.5/Modules/CMakeCCompilerABI.c + /usr/share/cmake-3.5/Modules/CMakeCXXCompilerABI.cpp + /usr/share/cmake-3.5/Modules/CMakeCompilerABI.h + + + Makefile + nbproject/private/launcher.properties + + + ^(nbproject)$ + + . + + Makefile + + + + default + false + false + + + + + + + + + + build + ${MAKE} + ${MAKE} clean + + + + . + ./cmake_make.sh + + + + + + + + + + + + + + build/CMakeFiles/CMakeTmp + + + + + + + build/CMakeFiles/CMakeTmp + + + + + + + + + build/CMakeFiles/CMakeTmp + + + + + build/CMakeFiles/CMakeTmp + + + + + + diff --git a/nbproject/project.xml b/nbproject/project.xml new file mode 100644 index 0000000..081e027 --- /dev/null +++ b/nbproject/project.xml @@ -0,0 +1,26 @@ + + + org.netbeans.modules.cnd.makeproject + + + clcc + c + cpp,cxx + h + UTF-8 + + + . + + + + Default + 0 + + + + false + + + +