From 0b50911559bbed03b43293891a2f1ecb009f4926 Mon Sep 17 00:00:00 2001 From: Philip Heinisch Date: Fri, 29 Mar 2019 14:07:15 +0100 Subject: [PATCH 1/7] Added automatic PCIe topology detection --- include/ocl_dev_mgr.hpp | 2 + src/hdf5_io.cpp | 48 +--- src/main.cpp | 247 ++++++++++---------- src/ocl_dev_mgr.cpp | 492 ++++++++++++++++++++++------------------ 4 files changed, 394 insertions(+), 395 deletions(-) diff --git a/include/ocl_dev_mgr.hpp b/include/ocl_dev_mgr.hpp index 6ecb214..130cf70 100644 --- a/include/ocl_dev_mgr.hpp +++ b/include/ocl_dev_mgr.hpp @@ -22,6 +22,7 @@ class ocl_dev_mgr { std::string name; cl::Platform platform; std::string platform_name; + std::string vendor; cl_device_type type; std::string ocl_version; cl_ulong max_mem; @@ -35,6 +36,7 @@ class ocl_dev_mgr { cl_uint float_perf; }; + std::string getDevicePCIeID(cl_uint avail_device_idx); cl_ulong init_device(cl_uint avail_device_idx); cl::CommandQueue& get_queue(cl_uint context_idx, cl_uint queue_idx); cl::Context& get_context(cl_uint context_idx); diff --git a/src/hdf5_io.cpp b/src/hdf5_io.cpp index a080bf8..985c288 100644 --- a/src/hdf5_io.cpp +++ b/src/hdf5_io.cpp @@ -381,18 +381,11 @@ bool h5_read_string(char const* filename, char const* varname, std::string& outp { if (!fileExists(filename)) { std::cerr << ERROR_INFO << "File '" << filename << "' not found." << std::endl; - //TODO: Exception? Only error code? + //TODO: File not found - no idea what error code to use return false; } hid_t h5_file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); - if (H5LTpath_valid(h5_file_id, varname, true) <= 0) { - std::cerr << ERROR_INFO << "Variable '" << varname << "' not found in file '" << filename << "'." << std::endl; - //TODO: Exception? Only error code? - H5Fclose(h5_file_id); - return false; - } - hid_t dataset = H5Dopen(h5_file_id, varname, H5P_DEFAULT); hid_t datatype = H5Dget_type(dataset); @@ -407,13 +400,7 @@ bool h5_read_string(char const* filename, char const* varname, std::string& outp hsize_t size = accumulate(begin(dims), end(dims), 1, std::multiplies()); std::vector buffer(size * sizeof(char*)); - herr_t err = H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); - if (err < 0) { - std::cerr << ERROR_INFO << "Reading variable '" << varname << "' in file '" << filename << "' not possible." << std::endl; - //TODO: Exception? Only error code? - H5Fclose(h5_file_id); - return false; - } + H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); output = std::string(buffer.at(0)); H5Dvlen_reclaim(datatype, dataspace, H5P_DEFAULT, &(buffer[0])); @@ -423,13 +410,7 @@ bool h5_read_string(char const* filename, char const* varname, std::string& outp hssize_t npoints = H5Sget_simple_extent_npoints(dataspace); std::vector buffer(datatype_size * npoints, '\0'); - herr_t err = H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); - if (err < 0) { - std::cerr << ERROR_INFO << "Reading variable '" << varname << "' in file '" << filename << "' not possible." << std::endl; - //TODO: Exception? Only error code? - H5Fclose(h5_file_id); - return false; - } + H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); output = std::string(begin(buffer), end(buffer)); } @@ -470,13 +451,6 @@ bool h5_read_strings(char const* filename, char const* varname, std::vector()); std::vector buffer(size * sizeof(char*)); - herr_t err = H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); - if (err < 0) { - std::cerr << ERROR_INFO << "Reading variable '" << varname << "' in file '" << filename << "' not possible." << std::endl; - //TODO: Exception? Only error code? - H5Fclose(h5_file_id); - return false; - } + H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); for (char const* line : buffer) { if (line == nullptr) { @@ -513,13 +481,7 @@ bool h5_read_strings(char const* filename, char const* varname, std::vector buffer(line_length * num_lines, '\0'); - herr_t err = H5LTread_dataset_string(h5_file_id, varname, &(buffer[0])); - if (err < 0) { - std::cerr << ERROR_INFO << "Reading variable '" << varname << "' in file '" << filename << "' not possible." << std::endl; - //TODO: Exception? Only error code? - H5Fclose(h5_file_id); - return false; - } + H5LTread_dataset_string(h5_file_id, varname, &(buffer[0])); size_t str_start = 0; for (hssize_t lines_found = 0; lines_found < num_lines; ++lines_found) { diff --git a/src/main.cpp b/src/main.cpp index 4d4cc72..b494ff3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -430,7 +430,7 @@ bool nvidia_log_power = false; bool nvidia_log_temp = false; cl_uint nvidia_power_rate = 0; cl_uint nvidia_temp_rate = 0; - +nvmlDevice_t device; std::vector nvidia_temp; std::vector nvidia_temp_time; @@ -448,9 +448,6 @@ void nvidia_log_power_func() nvidia_power.clear(); nvidia_power_time.clear(); - nvmlDevice_t device; - nvmlDeviceGetHandleByIndex(0, &device); - while (nvidia_log_power == true) { std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_power_rate)); @@ -477,9 +474,6 @@ void nvidia_log_temp_func() nvidia_temp.clear(); nvidia_temp_time.clear(); - nvmlDevice_t device; - nvmlDeviceGetHandleByIndex(0, &device); - while (nvidia_log_temp == true) { std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_temp_rate)); @@ -587,6 +581,21 @@ std::string getOS() } +// command line arguments +char const* getCmdOption(char** begin, char** end, std::string const& option) +{ + char** itr = find(begin, end, option); + if (itr != end && ++itr != end) { + return *itr; + } + return 0; +} + +bool cmdOptionExists(char** begin, char** end, const std::string& option) +{ + return find(begin, end, option) != end; +} + void print_help() { @@ -625,148 +634,95 @@ int main(int argc, char *argv[]) { Timer timer; //used to track performance + cl_uint deviceIndex = 0; // set default OpenCL Device + ocl_dev_mgr& dev_mgr = ocl_dev_mgr::getInstance(); cl_uint devices_availble = dev_mgr.get_avail_dev_num(); - cout << "Available OpenCL devices: " << devices_availble << endl; - // default options - cl_uint deviceIndex = 0; + // parse command line arguments bool benchmark_mode = false; - char const* filename = nullptr; + if (cmdOptionExists(argv, argv + argc, "-b")) { + benchmark_mode = true; + cout << "Benchmark mode" << endl << endl; + } + else { + cout << "Available OpenCL devices: " << devices_availble << endl; + } - // parse command line arguments starting at index 1 (because toolkitICL is the 0th argument) - for (int option_idx = 1; option_idx < argc; ++option_idx) - { - if (argv[option_idx] == string("-h")) { - print_help(); - return 0; - } - else if (argv[option_idx] == string("-b")) { - benchmark_mode = true; - cout << "Benchmark mode" << endl << endl; - } - else if (argv[option_idx] == string("-d")) { - ++option_idx; - try { - deviceIndex = stoi(argv[option_idx]); - } - catch (const std::exception& e) { - cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; - throw(e); - } - } - else if (argv[option_idx] == string("-c")) { - ++option_idx; - filename = argv[option_idx]; - } -#if defined(USENVML) - else if (argv[option_idx] == string("-nvidia_power")) { - ++option_idx; - try { - nvidia_power_rate = stoi(argv[option_idx]); - } - catch (const std::exception& e) { - cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; - throw(e); - } - nvidia_log_power = true; - } - else if (argv[option_idx] == string("-nvidia_temp") || argv[option_idx] == string("-nvidia_temperature")) { - ++option_idx; - try { - nvidia_temp_rate = stoi(argv[option_idx]); - } - catch (const std::exception& e) { - cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; - throw(e); - } - nvidia_log_temp = true; - } -#endif // defined(USENVML) -#if defined(USEIPG) || defined(USEIRAPL) - else if (argv[option_idx] == string("-intel_power")) { - ++option_idx; - try { - intel_power_rate = stoi(argv[option_idx]); - } - catch (const std::exception& e) { - cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; - throw(e); - } - intel_log_power = true; - } - else if (argv[option_idx] == string("-intel_temp") || argv[option_idx] == string("-intel_temperature")) { - ++option_idx; - try { - intel_temp_rate = stoi(argv[option_idx]); - } - catch (const std::exception& e) { - cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; - throw(e); - } - intel_log_temp = true; - } -#endif // defined(USEIPG) || defined(USEIRAPL) -#if defined(USEAMDP) - else if (argv[option_idx] == string("-amd_cpu_power")) { - ++option_idx; - try { - amd_power_rate = stoi(argv[option_idx]); - } - catch (const std::exception& e) { - cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; - throw(e); - } - amd_log_power = true; - } - else if (argv[option_idx] == string("-amd_cpu_temp") || argv[option_idx] == string("-amd_cpu_temperature")) { - ++option_idx; - try { - amd_temp_rate = stoi(argv[option_idx]); - } - catch (const std::exception& e) { - cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; - throw(e); - } - amd_log_temp = true; - } -#endif // defined(USEAMDP) - else { - cerr << "Error: Unknown command line option '" << argv[option_idx] << "'." << endl; - print_help(); - return -1; - } + if (cmdOptionExists(argv, argv + argc, "-d")) { + char const* dev_id = getCmdOption(argv, argv + argc, "-d"); + deviceIndex = atoi(dev_id); } - // check necessary/incompatible command line arguments - if (filename == nullptr) { - cerr << "Error: A configuration file must be given as command line argument." << endl; + if (cmdOptionExists(argv, argv + argc, "-h") || !cmdOptionExists(argv, argv + argc, "-c")) { print_help(); - return -1; + return 0; } + char const* filename = getCmdOption(argv, argv + argc, "-c"); +#if defined(USENVML) + if (cmdOptionExists(argv, argv + argc, "-nvidia_power")) { + char const* tmp = getCmdOption(argv, argv + argc, "-nvidia_power"); + nvidia_power_rate = atoi(tmp); + nvidia_log_power = true; + } + + if (cmdOptionExists(argv, argv + argc, "-nvidia_temp")) { + char const* tmp = getCmdOption(argv, argv + argc, "-nvidia_temp"); + nvidia_temp_rate = atoi(tmp); + nvidia_log_temp = true; + } +#endif +#if defined(USEIPG) || defined(USEIRAPL) + if (cmdOptionExists(argv, argv + argc, "-intel_power")) { + char const* tmp = getCmdOption(argv, argv + argc, "-intel_power"); + intel_power_rate = atoi(tmp); + intel_log_power = true; + } + if (cmdOptionExists(argv, argv + argc, "-intel_temp")) { + char const* tmp = getCmdOption(argv, argv + argc, "-intel_temp"); + intel_temp_rate = atoi(tmp); + intel_log_temp = true; + } +#endif #if defined(USEAMDP) - if (amd_log_temp && amd_log_power) { - cerr << endl << "Error: Concurrent logging on AMD systems is not suported, yet!" << endl; - return -1; + if (cmdOptionExists(argv, argv + argc, "-amd_cpu_power")) { + char const* tmp = getCmdOption(argv, argv + argc, "-amd_cpu_power"); + amd_power_rate = atoi(tmp); + amd_log_power = true; + } + if (cmdOptionExists(argv, argv + argc, "-amd_cpu_temp")) { + char const* tmp = getCmdOption(argv, argv + argc, "-amd_cpu_temp"); + amd_temp_rate = atoi(tmp); + amd_log_temp = true; +} + + if ((amd_log_temp == true) && (amd_log_power == true)) { + cout< kernel_source; h5_read_strings(filename, "kernel_source", kernel_source); ofstream tmp_clfile; @@ -809,7 +765,9 @@ int main(int argc, char *argv[]) { std::vector found_kernels; dev_mgr.get_kernel_names(0, "ocl_Kernel", found_kernels); - cout << "Found Kernels: " << found_kernels.size() << endl; + if (benchmark_mode == false) { + cout << "Found Kernels: " << found_kernels.size() << endl; + } if (found_kernels.size() == 0) { cerr << ERROR_INFO << "No valid kernels found." << endl; return -1; @@ -830,8 +788,7 @@ int main(int argc, char *argv[]) { h5_get_content(filename, "/data/", data_names, data_types, data_sizes); cout << "Creating output HDF5 file..." << endl; - string out_name = filename; - out_name = "out_" + out_name.substr(out_name.find_last_of("/\\") + 1); + string out_name = "out_" + string(filename); if (fileExists(out_name)) { remove(out_name.c_str()); @@ -943,7 +900,9 @@ int main(int argc, char *argv[]) { push_time = timer.getTimeMicroseconds() - push_time; - cout << "Setting range..." << endl; + if (benchmark_mode == false) { + cout << "Setting range..." << endl; + } cl::NDRange range_start; cl::NDRange global_range; @@ -1023,6 +982,38 @@ int main(int argc, char *argv[]) { nvidia_log_power = false; } } + if (nvidia_log_power || nvidia_log_temp) + { + nvmlPciInfo_t nv_pciinfo; + cl_uint nvml_devnum; + cl_uint nvml_devid = 0; + + nvmlDeviceGetCount(&nvml_devnum); + + for (cl_uint i = 0; i < nvml_devnum; i++) + { + nvmlDeviceGetHandleByIndex(i, &device); + nvmlDeviceGetPciInfo(device, &nv_pciinfo); + + std::ostringstream tmp_devid; + tmp_devid << nv_pciinfo.domain << ":" << nv_pciinfo.bus << ":" << nv_pciinfo.device; + + std::size_t found = dev_mgr.getDevicePCIeID(deviceIndex).find(tmp_devid.str()); + if (found != std::string::npos) { + nvml_devid = i; + if (benchmark_mode == false) { + cout << "NVidia OpenCL device " << tmp_devid.str() << " found in NVML device list." << endl; + } + } + else { + cout << "NVidia OpenCL device " << tmp_devid.str() << " not found in NVML device list! Aborting!" << endl; + nvmlShutdown(); + exit(EXIT_FAILURE); + } + } + nvmlDeviceGetHandleByIndex(nvml_devid, &device); + } + std::thread nvidia_log_power_thread(nvidia_log_power_func); std::thread nvidia_log_temp_thread(nvidia_log_temp_func); #endif @@ -1032,7 +1023,7 @@ int main(int argc, char *argv[]) { { cout << "Using Intel Power Gadget interface..." << endl; h5_create_dir(out_name, "/housekeeping"); - h5_create_dir(out_name, "/housekeeping/intel"); + h5_create_dir(out_name, "/housekeeping/intel"); rapl = new Rapl(); } diff --git a/src/ocl_dev_mgr.cpp b/src/ocl_dev_mgr.cpp index 02d5ddc..486ac63 100644 --- a/src/ocl_dev_mgr.cpp +++ b/src/ocl_dev_mgr.cpp @@ -4,365 +4,409 @@ #include #include #include - +#include #include "util.hpp" #include "ocl_dev_mgr.hpp" inline void compile(cl::Program& cl_prog, char const* options) { - std::string compile_options = std::string(" ") + std::string(options); - - try { - cl_prog.build(compile_options.c_str()); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } + std::string compile_options = std::string(" ") + std::string(options); + + try { + cl_prog.build(compile_options.c_str()); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } } inline std::string loadProgram(std::string const& input_filename) { - std::ifstream input(input_filename.c_str()); - if (!input.is_open()) { - std::cerr << ERROR_INFO << "Cannot open file '" << input_filename << "'." << std::endl; - exit(1); - } + std::ifstream input(input_filename.c_str()); + if (!input.is_open()) { + std::cerr << ERROR_INFO << "Cannot open file '" << input_filename << "'." << std::endl; + exit(1); + } - return std::string(std::istreambuf_iterator(input), (std::istreambuf_iterator())); + return std::string(std::istreambuf_iterator(input), (std::istreambuf_iterator())); } ocl_dev_mgr::ocl_dev_mgr() { - initialize(); + initialize(); } cl::Kernel* ocl_dev_mgr::getKernelbyName(cl_uint context_idx, std::string const& prog_name, std::string const& kernel_name) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - return nullptr; - } - - uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - - if (con_list.at(context_idx).kernels.at(idx).size() > 1) { - for (cl_uint i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { - if (kernel_name == con_list.at(context_idx).kernel_names.at(idx).at(i)) { - return &(con_list.at(context_idx).kernels.at(idx).at(i)); - } - } - } - - return &(con_list.at(context_idx).kernels.at(idx).at(0)); + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + return nullptr; + } + + uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + + if (con_list.at(context_idx).kernels.at(idx).size() > 1) { + for (cl_uint i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { + if (kernel_name == con_list.at(context_idx).kernel_names.at(idx).at(i)) { + return &(con_list.at(context_idx).kernels.at(idx).at(i)); + } + } + } + + return &(con_list.at(context_idx).kernels.at(idx).at(0)); } cl::Kernel* ocl_dev_mgr::getKernelbyID(cl_uint context_idx, std::string const& prog_name, cl_ulong kernel_id) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - return nullptr; - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + return nullptr; + } - uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - return &(con_list.at(context_idx).kernels.at(idx).at(kernel_id)); + return &(con_list.at(context_idx).kernels.at(idx).at(kernel_id)); } std::string ocl_dev_mgr::getDeviceType(cl_uint avail_device_idx) { - if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_CPU) { - return(type_cpu_str); - } - else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_GPU) { - return(type_gpu_str); - } - else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_ACCELERATOR) { - return(type_acc_str); - } - else { - return(type_other_str); - } + if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_CPU) { + return(type_cpu_str); + } + else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_GPU) { + return(type_gpu_str); + } + else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_ACCELERATOR) { + return(type_acc_str); + } + else { + return(type_other_str); + } } +std::string ocl_dev_mgr::getDevicePCIeID(cl_uint avail_device_idx) +{ +#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} cl_device_topology_amd; + + cl_device_topology_amd amd_topo; + cl_int bus_id; + cl_int slot_id; + std::ostringstream tmp_stream; + + std::size_t found = 0; + found = available_devices.at(avail_device_idx).vendor.find("NVIDIA"); + if (found != std::string::npos) { + available_devices.at(avail_device_idx).device.getInfo(CL_DEVICE_PCI_BUS_ID_NV,&bus_id); + available_devices.at(avail_device_idx).device.getInfo(CL_DEVICE_PCI_SLOT_ID_NV, &slot_id); + + cl_uint domain, bus, dev, func; + domain = bus_id >> 8; + bus = bus_id & 0xff; + dev = slot_id >> 3; + func = slot_id & 0x7; + tmp_stream << domain << ":" << bus << ":" << dev; + } + else + { + found = available_devices.at(avail_device_idx).vendor.find("Advanced Micro Devices"); + if (found != std::string::npos) { + available_devices.at(avail_device_idx).device.getInfo(CL_DEVICE_TOPOLOGY_AMD, &amd_topo); + tmp_stream << "0:" << (unsigned int)amd_topo.pcie.bus << ":" << (unsigned int)amd_topo.pcie.device; //Domain is not returned? + } + } + + + return tmp_stream.str(); +} + +cl_int bus_id; +cl_int slot_id; cl_ulong ocl_dev_mgr::getDeviceList(std::vector& devices) { - // Get list of platforms - std::vector platforms; - cl::Platform::get(&platforms); - - // Enumerate devices - for (cl::Platform const& platform : platforms) - { - std::vector plat_devices; - platform.getDevices(CL_DEVICE_TYPE_ALL, &plat_devices); - devices.insert(devices.end(), plat_devices.begin(), plat_devices.end()); - } - - return devices.size(); + // Get list of platforms + std::vector platforms; + cl::Platform::get(&platforms); + + // Enumerate devices + for (cl::Platform const& platform : platforms) + { + std::vector plat_devices; + platform.getDevices(CL_DEVICE_TYPE_ALL, &plat_devices); + devices.insert(devices.end(), plat_devices.begin(), plat_devices.end()); + } + + return devices.size(); } cl_ulong ocl_dev_mgr::init_device(cl_uint avail_device_idx) { - ocl_context tmp_context; + ocl_context tmp_context; - tmp_context.devices.push_back(available_devices.at(avail_device_idx)); + tmp_context.devices.push_back(available_devices.at(avail_device_idx)); - std::vector tmp_devices; - tmp_devices.push_back(available_devices.at(avail_device_idx).device); + std::vector tmp_devices; + tmp_devices.push_back(available_devices.at(avail_device_idx).device); - cl::Context context(tmp_devices, NULL); - tmp_context.context = context; + cl::Context context(tmp_devices, NULL); + tmp_context.context = context; - tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); - //push second queue for async copy - tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); + tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); + //push second queue for async copy + tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); - con_list.push_back(tmp_context); + con_list.push_back(tmp_context); - return con_list.size(); + return con_list.size(); } cl::CommandQueue& ocl_dev_mgr::get_queue(cl_uint context_idx, cl_uint queue_idx) { - return con_list.at(context_idx).queues.at(queue_idx); + return con_list.at(context_idx).queues.at(queue_idx); } cl::Context& ocl_dev_mgr::get_context(cl_uint context_idx) { - return con_list.at(context_idx).context; + return con_list.at(context_idx).context; } cl_ulong ocl_dev_mgr::get_avail_dev_num() { - return num_available_devices; + return num_available_devices; } cl_ulong ocl_dev_mgr::get_context_num() { - return con_list.size(); + return con_list.size(); } bool ocl_dev_mgr::add_program_url(cl_uint context_idx, std::string prog_name, std::string const& url) { - if (!fileExists(url)) { - return false; - } + if (!fileExists(url)) { + return false; + } - return add_program_str(context_idx, prog_name, loadProgram(url)); + return add_program_str(context_idx, prog_name, loadProgram(url)); } bool ocl_dev_mgr::add_program_str(cl_uint context_idx, std::string prog_name, std::string kernel) { - con_list.at(context_idx).programs.push_back(cl::Program(con_list.at(context_idx).context, kernel)); - con_list.at(context_idx).prog_names.push_back(prog_name); - con_list.at(context_idx).kernels.resize(con_list.at(context_idx).kernels.size() + 1); - con_list.at(context_idx).kernel_names.resize(con_list.at(context_idx).kernel_names.size() + 1); - return true; + con_list.at(context_idx).programs.push_back(cl::Program(con_list.at(context_idx).context, kernel)); + con_list.at(context_idx).prog_names.push_back(prog_name); + con_list.at(context_idx).kernels.resize(con_list.at(context_idx).kernels.size() + 1); + con_list.at(context_idx).kernel_names.resize(con_list.at(context_idx).kernel_names.size() + 1); + return true; } cl::Program& ocl_dev_mgr::get_program(cl_uint context_idx, std::string const& prog_name) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p != con_list.at(context_idx).prog_names.end()) { - return con_list.at(context_idx).programs.at(distance(con_list.at(context_idx).prog_names.begin(), it_p)); - } - else { - std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; - //TODO: Exception? - return con_list.at(context_idx).programs.at(0); - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p != con_list.at(context_idx).prog_names.end()) { + return con_list.at(context_idx).programs.at(distance(con_list.at(context_idx).prog_names.begin(), it_p)); + } + else { + std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; + //TODO: Exception? + return con_list.at(context_idx).programs.at(0); + } } ocl_dev_mgr::ocl_device_info& ocl_dev_mgr::get_avail_dev_info(cl_uint avail_device_idx) { - return available_devices.at(avail_device_idx); + return available_devices.at(avail_device_idx); } ocl_dev_mgr::ocl_device_info& ocl_dev_mgr::get_context_dev_info(cl_uint context_idx, cl_uint device_idx) { - return con_list.at(context_idx).devices.at(device_idx); + return con_list.at(context_idx).devices.at(device_idx); } // return execution time in µs cl_ulong ocl_dev_mgr::execute_kernel(cl::Kernel& kernel, cl::CommandQueue& queue, - cl::NDRange global_range, cl::NDRange local_range, - std::vector& dev_Buffers) + cl::NDRange global_range, cl::NDRange local_range, + std::vector& dev_Buffers) { - cl::Event event; - cl_ulong time_start, time_end; - - try { - for (cl_uint i = 0; i < dev_Buffers.size(); i++) { - kernel.setArg(i, *dev_Buffers[i]); - } - - queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, &event); - event.wait(); - event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); - event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } - - return (time_end - time_start) / 1000; + cl::Event event; + cl_ulong time_start, time_end; + + try { + for (cl_uint i = 0; i < dev_Buffers.size(); i++) { + kernel.setArg(i, *dev_Buffers[i]); + } + + queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, &event); + event.wait(); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); + event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } + + return (time_end - time_start) / 1000; } // return execution time in µs cl_ulong ocl_dev_mgr::execute_kernelNA(cl::Kernel& kernel, cl::CommandQueue& queue, - cl::NDRange range_start, cl::NDRange global_range, cl::NDRange local_range) + cl::NDRange range_start, cl::NDRange global_range, cl::NDRange local_range) { - cl::Event event; - cl_ulong time_start, time_end; - - try { - queue.enqueueNDRangeKernel(kernel, range_start, global_range, local_range, NULL, &event); - event.wait(); - event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); - event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } - - return (time_end - time_start) / 1000; + cl::Event event; + cl_ulong time_start, time_end; + + try { + queue.enqueueNDRangeKernel(kernel, range_start, global_range, local_range, NULL, &event); + event.wait(); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); + event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } + + return (time_end - time_start) / 1000; } // don't return execution time in µs void ocl_dev_mgr::execute_kernel_async(cl::Kernel& kernel, cl::CommandQueue& queue, - cl::NDRange global_range, cl::NDRange local_range, - std::vector& dev_Buffers) + cl::NDRange global_range, cl::NDRange local_range, + std::vector& dev_Buffers) { - try { - for (cl_uint i = 0; i < dev_Buffers.size(); i++) { - kernel.setArg(i, *dev_Buffers[i]); - } - - queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, NULL); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } + try { + for (cl_uint i = 0; i < dev_Buffers.size(); i++) { + kernel.setArg(i, *dev_Buffers[i]); + } + + queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, NULL); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } } // Compile kernels and return the number of compiled kernels. cl_ulong ocl_dev_mgr::compile_kernel(cl_uint context_idx, std::string const& prog_name, std::string const& options) { - std::string compile_options = std::string(" ") + options; + std::string compile_options = std::string(" ") + options; - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; - //TODO: Exception? - return 0; - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; + //TODO: Exception? + return 0; + } - int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - try { - con_list.at(context_idx).programs.at(idx).build(compile_options.c_str()); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } + try { + con_list.at(context_idx).programs.at(idx).build(compile_options.c_str()); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } - con_list.at(context_idx).programs.at(idx).createKernels(&(con_list.at(context_idx).kernels.at(idx))); + con_list.at(context_idx).programs.at(idx).createKernels(&(con_list.at(context_idx).kernels.at(idx))); - con_list.at(context_idx).kernel_names.at(idx).clear(); //make sure to clear kernel_names list + con_list.at(context_idx).kernel_names.at(idx).clear(); //make sure to clear kernel_names list - for (uint32_t i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { - con_list.at(context_idx).kernel_names.at(idx).push_back(con_list.at(context_idx).kernels.at(idx).at(i).getInfo()); - } + for (uint32_t i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { + con_list.at(context_idx).kernel_names.at(idx).push_back(con_list.at(context_idx).kernels.at(idx).at(i).getInfo()); + } - return con_list.at(context_idx).kernels.at(idx).size(); + return con_list.at(context_idx).kernels.at(idx).size(); } cl_ulong ocl_dev_mgr::get_kernel_names(cl_uint context_idx, std::string const& prog_name, std::vector& found_kernels) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; - //TODO: Exception? - return 0; - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; + //TODO: Exception? + return 0; + } - int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - for (uint32_t kernel_id = 0; kernel_id < con_list.at(context_idx).kernel_names.at(idx).size(); kernel_id++) { - found_kernels.push_back(con_list.at(context_idx).kernel_names.at(idx).at(kernel_id)); - } + for (uint32_t kernel_id = 0; kernel_id < con_list.at(context_idx).kernel_names.at(idx).size(); kernel_id++) { + found_kernels.push_back(con_list.at(context_idx).kernel_names.at(idx).at(kernel_id)); + } - return con_list.at(context_idx).kernel_names.at(idx).size(); + return con_list.at(context_idx).kernel_names.at(idx).size(); } void ocl_dev_mgr::initialize() { - std::vector tmp_devices; - getDeviceList(tmp_devices); - num_available_devices = tmp_devices.size(); - - available_devices = std::vector(num_available_devices); - - for (size_t i = 0; i < tmp_devices.size(); i++) { - - available_devices.at(i).device = tmp_devices.at(i); - std::vector tmp_size; - - available_devices.at(i).device.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &available_devices.at(i).max_mem); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &available_devices.at(i).max_mem_alloc); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &available_devices.at(i).lw_dim); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &available_devices.at(i).wg_size); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES, &tmp_size); - available_devices.at(i).lw_size = tmp_size.at(0); - available_devices.at(i).device.getInfo(CL_DEVICE_NAME, &available_devices.at(i).name); - available_devices.at(i).device.getInfo(CL_DEVICE_VERSION, &available_devices.at(i).ocl_version); - available_devices.at(i).device.getInfo(CL_DEVICE_TYPE, &available_devices.at(i).type); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &available_devices.at(i).compute_units); - - available_devices.at(i).device.getInfo(CL_DEVICE_PLATFORM, &available_devices.at(i).platform); - available_devices.at(i).platform.getInfo(CL_PLATFORM_NAME, &available_devices.at(i).platform_name); - } + std::vector tmp_devices; + getDeviceList(tmp_devices); + num_available_devices = tmp_devices.size(); + + available_devices = std::vector(num_available_devices); + + for (size_t i = 0; i < tmp_devices.size(); i++) { + + available_devices.at(i).device = tmp_devices.at(i); + std::vector tmp_size; + + available_devices.at(i).device.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &available_devices.at(i).max_mem); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &available_devices.at(i).max_mem_alloc); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &available_devices.at(i).lw_dim); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &available_devices.at(i).wg_size); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES, &tmp_size); + available_devices.at(i).lw_size = tmp_size.at(0); + available_devices.at(i).device.getInfo(CL_DEVICE_NAME, &available_devices.at(i).name); + available_devices.at(i).device.getInfo(CL_DEVICE_VERSION, &available_devices.at(i).ocl_version); + available_devices.at(i).device.getInfo(CL_DEVICE_TYPE, &available_devices.at(i).type); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &available_devices.at(i).compute_units); + available_devices.at(i).device.getInfo(CL_DEVICE_PLATFORM, &available_devices.at(i).platform); + available_devices.at(i).device.getInfo(CL_DEVICE_VENDOR, &available_devices.at(i).vendor); + available_devices.at(i).platform.getInfo(CL_PLATFORM_NAME, &available_devices.at(i).platform_name); + } } void ocl_dev_mgr::deinitalize() { - //Deinitialization should be performed automatically, but there seems to be segfaults - //under certain conditions using Windows, hence the vetor is cleared manually - con_list.clear(); + //Deinitialization should be performed automatically, but there seems to be segfaults + //under certain conditions using Windows, hence the vetor is cleared manually + con_list.clear(); } From 8c2c122f919882000497febfa4cceacc182cebe6 Mon Sep 17 00:00:00 2001 From: Philip Heinisch Date: Fri, 29 Mar 2019 14:19:12 +0100 Subject: [PATCH 2/7] Small NVML detection fix. --- src/main.cpp | 2010 +++++++++++++++++++++++++------------------------- 1 file changed, 1007 insertions(+), 1003 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index b494ff3..b3ce65e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -27,20 +27,20 @@ int gettimeofday(struct timeval * tp, struct timezone * tzp) { - static const uint64_t EPOCH = ((uint64_t)116444736000000000ULL); + static const uint64_t EPOCH = ((uint64_t)116444736000000000ULL); - SYSTEMTIME system_time; - FILETIME file_time; - uint64_t time; + SYSTEMTIME system_time; + FILETIME file_time; + uint64_t time; - GetSystemTime(&system_time); - SystemTimeToFileTime(&system_time, &file_time); - time = ((uint64_t)file_time.dwLowDateTime); - time += ((uint64_t)file_time.dwHighDateTime) << 32; + GetSystemTime(&system_time); + SystemTimeToFileTime(&system_time, &file_time); + time = ((uint64_t)file_time.dwLowDateTime); + time += ((uint64_t)file_time.dwHighDateTime) << 32; - tp->tv_sec = (long)((time - EPOCH) / 10000000L); - tp->tv_usec = (long)(system_time.wMilliseconds * 1000); - return 0; + tp->tv_sec = (long)((time - EPOCH) / 10000000L); + tp->tv_usec = (long)(system_time.wMilliseconds * 1000); + return 0; } #else @@ -49,8 +49,8 @@ int gettimeofday(struct timeval * tp, struct timezone * tzp) inline double timeval2storage(const timeval& timepoint) { - // convert microseconds to seconds using a resolution of milliseconds - return timepoint.tv_sec + 1.e-3 * (timepoint.tv_usec / 1000); + // convert microseconds to seconds using a resolution of milliseconds + return timepoint.tv_sec + 1.e-3 * (timepoint.tv_usec / 1000); } @@ -75,96 +75,96 @@ std::vector AMDP_names; std::vector AMDT_names; bool initAMDPP(uint32_t sample_rate) { - AMDTResult hResult = AMDT_STATUS_OK; - - // Initialize online mode - hResult = AMDTPwrProfileInitialize(AMDT_PWR_MODE_TIMELINE_ONLINE); - // check AMDT_STATUS_OK == hResult - AMDTUInt32 nbrCounters = 0; - AMDTPwrCounterDesc* pCounters = nullptr; - - hResult = AMDTPwrGetSupportedCounters(&nbrCounters, &pCounters); - // check AMDT_STATUS_OK == hResult - - // cout << endl << nbrCounters << endl; - for (AMDTUInt32 idx = 0; idx < nbrCounters; idx++) - { - //get only power - for now - if ((pCounters[idx].m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER) && (amd_power_rate > 0)) + AMDTResult hResult = AMDT_STATUS_OK; + + // Initialize online mode + hResult = AMDTPwrProfileInitialize(AMDT_PWR_MODE_TIMELINE_ONLINE); + // check AMDT_STATUS_OK == hResult + AMDTUInt32 nbrCounters = 0; + AMDTPwrCounterDesc* pCounters = nullptr; + + hResult = AMDTPwrGetSupportedCounters(&nbrCounters, &pCounters); + // check AMDT_STATUS_OK == hResult + + // cout << endl << nbrCounters << endl; + for (AMDTUInt32 idx = 0; idx < nbrCounters; idx++) { - hResult = AMDTPwrEnableCounter(pCounters[idx].m_counterID); - } - if ((pCounters[idx].m_category == AMDT_PWR_CATEGORY_TEMPERATURE) && (amd_temp_rate > 0)) { - hResult = AMDTPwrEnableCounter(pCounters[idx].m_counterID); + //get only power - for now + if ((pCounters[idx].m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER) && (amd_power_rate > 0)) + { + hResult = AMDTPwrEnableCounter(pCounters[idx].m_counterID); + } + if ((pCounters[idx].m_category == AMDT_PWR_CATEGORY_TEMPERATURE) && (amd_temp_rate > 0)) { + hResult = AMDTPwrEnableCounter(pCounters[idx].m_counterID); + } + } + AMDTPwrSetTimerSamplingPeriod(100); - } - AMDTPwrSetTimerSamplingPeriod(100); + //dry profiling run, to see which counter are available for real sampling + + AMDTPwrStartProfiling(); + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + + AMDTPwrSample* pSampleData = nullptr; + AMDTUInt32 nbrSamples = 0; + + hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData); + + std::vector usable_power_counters; + std::vector usable_temp_counters; + + if ((nullptr != pSampleData) && (nbrSamples > 0)) + { + for (size_t j = 0; j < nbrSamples; j++) + { - //dry profiling run, to see which counter are available for real sampling + for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned + { + AMDTPwrCounterDesc counterDesc; + AMDTPwrGetCounterDesc(pSampleData[0].m_counterValues->m_counterID, &counterDesc); - AMDTPwrStartProfiling(); - std::this_thread::sleep_for(std::chrono::milliseconds(500)); + if ((counterDesc.m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER)) + { - AMDTPwrSample* pSampleData = nullptr; - AMDTUInt32 nbrSamples = 0; + if (std::find(usable_power_counters.begin(), usable_power_counters.end(), pSampleData[0].m_counterValues->m_counterID) != usable_power_counters.end() == false) { + AMDP_names.push_back(counterDesc.m_name); + usable_power_counters.push_back(pSampleData[0].m_counterValues->m_counterID); + // cout << counterDesc.m_name << " P " << pSampleData[0].m_counterValues->m_counterID << pSampleData[0].m_counterValues->m_counterID << endl; + } + } + if ((counterDesc.m_category == AMDT_PWR_CATEGORY_TEMPERATURE)) + { - hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData); + if (std::find(usable_temp_counters.begin(), usable_temp_counters.end(), pSampleData[0].m_counterValues->m_counterID) != usable_temp_counters.end() == false) { + AMDT_names.push_back(counterDesc.m_name); + usable_temp_counters.push_back(pSampleData[0].m_counterValues->m_counterID); + // cout << counterDesc.m_name << " T " << pSampleData[0].m_counterValues->m_counterID << endl; + } + } + pSampleData[i].m_counterValues++; + } - std::vector usable_power_counters; - std::vector usable_temp_counters; + } - if ((nullptr != pSampleData) && (nbrSamples > 0)) - { - for (size_t j = 0; j < nbrSamples; j++) - { - for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned + } + //restart AMD profiling + AMDTPwrStopProfiling(); + AMDTPwrProfileClose(); + hResult = AMDTPwrProfileInitialize(AMDT_PWR_MODE_TIMELINE_ONLINE); + AMDTPwrSetTimerSamplingPeriod(sample_rate); + //reenable usable counters + for (AMDTUInt32 idx = 0; idx < usable_power_counters.size(); idx++) + { + AMDTPwrEnableCounter(usable_power_counters.at(idx)); + } + for (AMDTUInt32 idx = 0; idx < usable_temp_counters.size(); idx++) { - AMDTPwrCounterDesc counterDesc; - AMDTPwrGetCounterDesc(pSampleData[0].m_counterValues->m_counterID, &counterDesc); - - if ((counterDesc.m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER) ) - { - - if (std::find(usable_power_counters.begin(), usable_power_counters.end(), pSampleData[0].m_counterValues->m_counterID) != usable_power_counters.end() == false) { - AMDP_names.push_back(counterDesc.m_name); - usable_power_counters.push_back(pSampleData[0].m_counterValues->m_counterID); - // cout << counterDesc.m_name << " P " << pSampleData[0].m_counterValues->m_counterID << pSampleData[0].m_counterValues->m_counterID << endl; - } - } - if ((counterDesc.m_category == AMDT_PWR_CATEGORY_TEMPERATURE) ) - { - - if (std::find(usable_temp_counters.begin(), usable_temp_counters.end(), pSampleData[0].m_counterValues->m_counterID) != usable_temp_counters.end() == false) { - AMDT_names.push_back(counterDesc.m_name); - usable_temp_counters.push_back(pSampleData[0].m_counterValues->m_counterID); - // cout << counterDesc.m_name << " T " << pSampleData[0].m_counterValues->m_counterID << endl; - } - } - pSampleData[i].m_counterValues++; - } - - } - - - } - //restart AMD profiling - AMDTPwrStopProfiling(); - AMDTPwrProfileClose(); - hResult = AMDTPwrProfileInitialize(AMDT_PWR_MODE_TIMELINE_ONLINE); - AMDTPwrSetTimerSamplingPeriod(sample_rate); - //reenable usable counters - for (AMDTUInt32 idx = 0; idx < usable_power_counters.size(); idx++) - { - AMDTPwrEnableCounter(usable_power_counters.at(idx)); - } - for (AMDTUInt32 idx = 0; idx < usable_temp_counters.size(); idx++) - { - AMDTPwrEnableCounter(usable_temp_counters.at(idx)); - } - AMDTPwrStartProfiling(); - return true; + AMDTPwrEnableCounter(usable_temp_counters.at(idx)); + } + AMDTPwrStartProfiling(); + return true; } std::vector amd_temp_time; @@ -175,39 +175,39 @@ std::vector amd_temp[10]; //Socket 0 void amd_log_power_func() { - while ((amd_log_power == true)|| (amd_log_temp == true)) - { - timeval rawtime; - - std::this_thread::sleep_for(std::chrono::milliseconds(amd_power_rate)); + while ((amd_log_power == true) || (amd_log_temp == true)) + { + timeval rawtime; - AMDTResult hResult = AMDT_STATUS_OK; - AMDTPwrSample* pSampleData = nullptr; - AMDTUInt32 nbrSamples = 0; + std::this_thread::sleep_for(std::chrono::milliseconds(amd_power_rate)); - gettimeofday(&rawtime, NULL); - hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData); + AMDTResult hResult = AMDT_STATUS_OK; + AMDTPwrSample* pSampleData = nullptr; + AMDTUInt32 nbrSamples = 0; - if ((nullptr != pSampleData) && (nbrSamples > 0)) - { - for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned - { + gettimeofday(&rawtime, NULL); + hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData); - AMDTPwrCounterDesc counterDesc; - AMDTPwrGetCounterDesc(pSampleData[0].m_counterValues->m_counterID, &counterDesc); - if ((counterDesc.m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER)) - { - amd_power[i].push_back(pSampleData[0].m_counterValues->m_data); - } - if ((counterDesc.m_category == AMDT_PWR_CATEGORY_TEMPERATURE)) + if ((nullptr != pSampleData) && (nbrSamples > 0)) { - amd_temp[i].push_back(pSampleData[0].m_counterValues->m_data); + for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned + { + + AMDTPwrCounterDesc counterDesc; + AMDTPwrGetCounterDesc(pSampleData[0].m_counterValues->m_counterID, &counterDesc); + if ((counterDesc.m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER)) + { + amd_power[i].push_back(pSampleData[0].m_counterValues->m_data); + } + if ((counterDesc.m_category == AMDT_PWR_CATEGORY_TEMPERATURE)) + { + amd_temp[i].push_back(pSampleData[0].m_counterValues->m_data); + } + pSampleData[i].m_counterValues++; + } + amd_power_time.push_back(timeval2storage(rawtime)); } - pSampleData[i].m_counterValues++; - } - amd_power_time.push_back(timeval2storage(rawtime)); } - } } @@ -261,7 +261,7 @@ std::vector intel_power_time; std::vector intel_temp_time; std::vector intel_power0[5]; //Socket 0 std::vector intel_power1[5]; //Socket 1 -std::vector MSR_names { "package", "cores", "DRAM", "GT" }; +std::vector MSR_names{ "package", "cores", "DRAM", "GT" }; std::vector intel_temp0; std::vector intel_temp1; @@ -269,77 +269,77 @@ Rapl *rapl; void intel_log_power_func() { - timeval rawtime; - - if (intel_power_rate > 0) - { - uint64_t pkg; - uint64_t pp0; - uint64_t pp1; - uint64_t dram; - - intel_power0[0].clear(); - intel_power0[1].clear(); - intel_power0[2].clear(); - intel_power0[3].clear(); - intel_power1[0].clear(); - intel_power1[1].clear(); - intel_power1[2].clear(); - intel_power1[3].clear(); - intel_power_time.clear(); - - while (intel_log_power == true) + timeval rawtime; + + if (intel_power_rate > 0) { - rapl->sample(); - std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate / 2)); - gettimeofday(&rawtime, NULL); - std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate / 2)); - intel_power_time.push_back(timeval2storage(rawtime)); - - rapl->get_socket0_data(pkg, pp0, pp1, dram); - intel_power0[0].push_back(pkg); - intel_power0[1].push_back(pp0); - intel_power0[2].push_back(dram); - intel_power0[3].push_back(pp1); - - if (rapl->detect_socket1() == true) - { - rapl->get_socket1_data(pkg, pp0, pp1, dram); - intel_power1[0].push_back(pkg); - intel_power1[1].push_back(pp0); - intel_power1[2].push_back(dram); - intel_power1[3].push_back(pp1); - } - } - } + uint64_t pkg; + uint64_t pp0; + uint64_t pp1; + uint64_t dram; + + intel_power0[0].clear(); + intel_power0[1].clear(); + intel_power0[2].clear(); + intel_power0[3].clear(); + intel_power1[0].clear(); + intel_power1[1].clear(); + intel_power1[2].clear(); + intel_power1[3].clear(); + intel_power_time.clear(); + + while (intel_log_power == true) + { + rapl->sample(); + std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate / 2)); + gettimeofday(&rawtime, NULL); + std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate / 2)); + intel_power_time.push_back(timeval2storage(rawtime)); + + rapl->get_socket0_data(pkg, pp0, pp1, dram); + intel_power0[0].push_back(pkg); + intel_power0[1].push_back(pp0); + intel_power0[2].push_back(dram); + intel_power0[3].push_back(pp1); + + if (rapl->detect_socket1() == true) + { + rapl->get_socket1_data(pkg, pp0, pp1, dram); + intel_power1[0].push_back(pkg); + intel_power1[1].push_back(pp0); + intel_power1[2].push_back(dram); + intel_power1[3].push_back(pp1); + } + } + } } void intel_log_temp_func() { - uint32_t temp0 = 0, temp1 = 0; - timeval rawtime; - - if (intel_temp_rate > 0) - { - intel_temp0.clear(); - intel_temp1.clear(); - intel_temp_time.clear(); - - while (intel_log_temp == true) { - std::this_thread::sleep_for(std::chrono::milliseconds(intel_temp_rate)); - - gettimeofday(&rawtime, NULL); - temp0 = rapl->get_temp0(); - if (rapl->detect_socket1() == true) - { - temp1 = rapl->get_temp1(); - intel_temp1.push_back(temp1); - } - - intel_temp_time.push_back(timeval2storage(rawtime)); - intel_temp0.push_back(temp0); - } - } + uint32_t temp0 = 0, temp1 = 0; + timeval rawtime; + + if (intel_temp_rate > 0) + { + intel_temp0.clear(); + intel_temp1.clear(); + intel_temp_time.clear(); + + while (intel_log_temp == true) { + std::this_thread::sleep_for(std::chrono::milliseconds(intel_temp_rate)); + + gettimeofday(&rawtime, NULL); + temp0 = rapl->get_temp0(); + if (rapl->detect_socket1() == true) + { + temp1 = rapl->get_temp1(); + intel_temp1.push_back(temp1); + } + + intel_temp_time.push_back(timeval2storage(rawtime)); + intel_temp0.push_back(temp0); + } + } } #endif // USEIRAPL @@ -353,8 +353,8 @@ Rapl *rapl; std::string utf16ToUtf8(const std::wstring& utf16Str) { - std::wstring_convert> conv; - return conv.to_bytes(utf16Str); + std::wstring_convert> conv; + return conv.to_bytes(utf16Str); } std::vector intel_power_time; @@ -371,51 +371,51 @@ std::vector intel_temp; void intel_log_temp_func() { - // int temp; - timeval rawtime; - - if (intel_temp_rate > 0) - { - intel_temp.clear(); - intel_temp_time.clear(); + // int temp; + timeval rawtime; - while (intel_log_temp == true) + if (intel_temp_rate > 0) { - std::this_thread::sleep_for(std::chrono::milliseconds(intel_temp_rate)); - intel_temp.push_back(rapl->get_temp0()); - gettimeofday(&rawtime, NULL); - intel_temp_time.push_back(timeval2storage(rawtime)); + intel_temp.clear(); + intel_temp_time.clear(); + + while (intel_log_temp == true) + { + std::this_thread::sleep_for(std::chrono::milliseconds(intel_temp_rate)); + intel_temp.push_back(rapl->get_temp0()); + gettimeofday(&rawtime, NULL); + intel_temp_time.push_back(timeval2storage(rawtime)); + } } - } } void intel_log_power_func() { - double data[3]; - int nData; - timeval rawtime; - - if (intel_power_rate > 0) - { - for (unsigned int i = 0; i < MSR.size(); i++) { - intel_power[i].clear(); - } - intel_power_time.clear(); - - while (intel_log_power == true) + double data[3]; + int nData; + timeval rawtime; + + if (intel_power_rate > 0) { - std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate)); - rapl->sample(); - gettimeofday(&rawtime, NULL); - intel_power_time.push_back(timeval2storage(rawtime)); + for (unsigned int i = 0; i < MSR.size(); i++) { + intel_power[i].clear(); + } + intel_power_time.clear(); - for (unsigned int i = 0; i < MSR.size(); i++) { - rapl->GetPowerData(0, MSR.at(i), data, &nData); - intel_power[i].push_back((float)data[0]); - } + while (intel_log_power == true) + { + std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate)); + rapl->sample(); + gettimeofday(&rawtime, NULL); + intel_power_time.push_back(timeval2storage(rawtime)); + + for (unsigned int i = 0; i < MSR.size(); i++) { + rapl->GetPowerData(0, MSR.at(i), data, &nData); + intel_power[i].push_back((float)data[0]); + } + } } - } } @@ -439,53 +439,53 @@ std::vector nvidia_power_time; void nvidia_log_power_func() { - if (nvidia_power_rate > 0) - { - unsigned int temp; - nvmlReturn_t result; - timeval rawtime; + if (nvidia_power_rate > 0) + { + unsigned int temp; + nvmlReturn_t result; + timeval rawtime; - nvidia_power.clear(); - nvidia_power_time.clear(); + nvidia_power.clear(); + nvidia_power_time.clear(); - while (nvidia_log_power == true) - { - std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_power_rate)); + while (nvidia_log_power == true) + { + std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_power_rate)); - nvmlDeviceGetPowerUsage(device, &temp); - gettimeofday(&rawtime, NULL); - nvidia_power_time.push_back(timeval2storage(rawtime)); - // convert milliwatt to watt - nvidia_power.push_back(1.e-3f * (float)(temp)); - } + nvmlDeviceGetPowerUsage(device, &temp); + gettimeofday(&rawtime, NULL); + nvidia_power_time.push_back(timeval2storage(rawtime)); + // convert milliwatt to watt + nvidia_power.push_back(1.e-3f * (float)(temp)); + } - nvmlShutdown(); - } + nvmlShutdown(); + } } void nvidia_log_temp_func() { - if (nvidia_temp_rate > 0) - { - unsigned int temp; - nvmlReturn_t result; - timeval rawtime; + if (nvidia_temp_rate > 0) + { + unsigned int temp; + nvmlReturn_t result; + timeval rawtime; - nvidia_temp.clear(); - nvidia_temp_time.clear(); + nvidia_temp.clear(); + nvidia_temp_time.clear(); - while (nvidia_log_temp == true) - { - std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_temp_rate)); - result = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temp); - gettimeofday(&rawtime, NULL); - nvidia_temp_time.push_back(timeval2storage(rawtime)); - nvidia_temp.push_back(temp); - } + while (nvidia_log_temp == true) + { + std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_temp_rate)); + result = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temp); + gettimeofday(&rawtime, NULL); + nvidia_temp_time.push_back(timeval2storage(rawtime)); + nvidia_temp.push_back(temp); + } - nvmlShutdown(); + nvmlShutdown(); - } + } } #endif // USENVML @@ -498,20 +498,20 @@ typedef LONG NTSTATUS, *PNTSTATUS; typedef NTSTATUS(WINAPI* RtlGetVersionPtr)(PRTL_OSVERSIONINFOEXW); RTL_OSVERSIONINFOEXW GetRealOSVersion() { - HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); - if (hMod) { - RtlGetVersionPtr fxPtr = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); - if (fxPtr != nullptr) { - RTL_OSVERSIONINFOEXW rovi = { 0 }; - rovi.dwOSVersionInfoSize = sizeof(rovi); - if (STATUS_SUCCESS == fxPtr(&rovi)) { - return rovi; - } - } - } - - RTL_OSVERSIONINFOEXW rovi = { 0 }; - return rovi; + HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); + if (hMod) { + RtlGetVersionPtr fxPtr = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); + if (fxPtr != nullptr) { + RTL_OSVERSIONINFOEXW rovi = { 0 }; + rovi.dwOSVersionInfoSize = sizeof(rovi); + if (STATUS_SUCCESS == fxPtr(&rovi)) { + return rovi; + } + } + } + + RTL_OSVERSIONINFOEXW rovi = { 0 }; + return rovi; } #else #include @@ -519,877 +519,881 @@ RTL_OSVERSIONINFOEXW GetRealOSVersion() { std::string getOS() { - std::stringstream version; + std::stringstream version; #if defined(_WIN32) - version << "Windows " << GetRealOSVersion().dwMajorVersion << "." << GetRealOSVersion().dwMinorVersion; + version << "Windows " << GetRealOSVersion().dwMajorVersion << "." << GetRealOSVersion().dwMinorVersion; - if (GetRealOSVersion().wProductType == VER_NT_WORKSTATION) { - version << " Workstation"; - } - else { - version << " Server"; - } + if (GetRealOSVersion().wProductType == VER_NT_WORKSTATION) { + version << " Workstation"; + } + else { + version << " Server"; + } #elif defined(__APPLE__) - char line[256]; - string product_name, product_version; - FILE* sw_vers = popen("sw_vers", "r"); - while (fgets(&line[0], sizeof(line), sw_vers) != nullptr) { - if (strncmp(line, "ProductName:", 12) == 0) { - product_name = string(&line[13]); - product_name.pop_back(); // erase the newline - } - else if (strncmp(line, "ProductVersion:", 15) == 0) { - product_version = string(&line[16]); - product_version.pop_back(); // erase the newline + char line[256]; + string product_name, product_version; + FILE* sw_vers = popen("sw_vers", "r"); + while (fgets(&line[0], sizeof(line), sw_vers) != nullptr) { + if (strncmp(line, "ProductName:", 12) == 0) { + product_name = string(&line[13]); + product_name.pop_back(); // erase the newline + } + else if (strncmp(line, "ProductVersion:", 15) == 0) { + product_version = string(&line[16]); + product_version.pop_back(); // erase the newline + } } - } - pclose(sw_vers); - version << product_name << " " << product_version; + pclose(sw_vers); + version << product_name << " " << product_version; #else // linux - struct utsname unameData; - uname(&unameData); - string line; + struct utsname unameData; + uname(&unameData); + string line; - version << unameData.sysname << " "; + version << unameData.sysname << " "; - ifstream rel_file("/etc/os-release"); - if (rel_file.is_open()) { - while (rel_file.good()) { - getline(rel_file, line); - if (line.size() >= 1 && line.substr(0, 11) == "PRETTY_NAME") { - version << line.substr(13, line.length() - 14); - break; - } - } + ifstream rel_file("/etc/os-release"); + if (rel_file.is_open()) { + while (rel_file.good()) { + getline(rel_file, line); + if (line.size() >= 1 && line.substr(0, 11) == "PRETTY_NAME") { + version << line.substr(13, line.length() - 14); + break; + } + } - rel_file.close(); - } - else { - version << "Unknown Distribution"; - } + rel_file.close(); + } + else { + version << "Unknown Distribution"; + } - version << "/" << unameData.release << "/" << unameData.version; + version << "/" << unameData.release << "/" << unameData.version; #endif - return version.str(); + return version.str(); } // command line arguments char const* getCmdOption(char** begin, char** end, std::string const& option) { - char** itr = find(begin, end, option); - if (itr != end && ++itr != end) { - return *itr; - } - return 0; + char** itr = find(begin, end, option); + if (itr != end && ++itr != end) { + return *itr; + } + return 0; } bool cmdOptionExists(char** begin, char** end, const std::string& option) { - return find(begin, end, option) != end; + return find(begin, end, option) != end; } void print_help() { - cout - << "Usage: toolkitICL [options] -c config.h5" << endl - << "Options:" << endl - << " -d device_id: \n" - " Use the device specified by `device_id`." << endl - << " -b: \n" - " Activate the benchmark mode (additional delay before & after runs)." << endl - << " -c config.h5: \n" - " Specify the URL `config.h5` of the HDF5 configuration file." << endl + cout + << "Usage: toolkitICL [options] -c config.h5" << endl + << "Options:" << endl + << " -d device_id: \n" + " Use the device specified by `device_id`." << endl + << " -b: \n" + " Activate the benchmark mode (additional delay before & after runs)." << endl + << " -c config.h5: \n" + " Specify the URL `config.h5` of the HDF5 configuration file." << endl #if defined(USENVML) - << " -nvidia_power sample_rate: \n" - " Log Nvidia GPU power consumption with `sample_rate` (ms)" << endl - << " -nvidia_temp sample_rate: \n" - " Log Nvidia GPU temperature with `sample_rate` (ms)" << endl + << " -nvidia_power sample_rate: \n" + " Log Nvidia GPU power consumption with `sample_rate` (ms)" << endl + << " -nvidia_temp sample_rate: \n" + " Log Nvidia GPU temperature with `sample_rate` (ms)" << endl #endif #if defined(USEIPG) || defined(USEIRAPL) - << " -intel_power sample_rate: \n" - " Log Intel system power consumption with `sample_rate` (ms)" << endl - << " -intel_temp sample_rate: \n" - " Log Intel package temperature with `sample_rate` (ms)" << endl + << " -intel_power sample_rate: \n" + " Log Intel system power consumption with `sample_rate` (ms)" << endl + << " -intel_temp sample_rate: \n" + " Log Intel package temperature with `sample_rate` (ms)" << endl #endif #if defined(USEAMDP) - << " -amd_cpu_power sample_rate: \n" - " Log AMD CPU power consumption with `sample_rate` (ms)" << endl - << " -amd_cpu_temp sample_rate: \n" - " Log AMD CPU temperaturen with `sample_rate` (ms)" << endl + << " -amd_cpu_power sample_rate: \n" + " Log AMD CPU power consumption with `sample_rate` (ms)" << endl + << " -amd_cpu_temp sample_rate: \n" + " Log AMD CPU temperaturen with `sample_rate` (ms)" << endl #endif - << endl; + << endl; } int main(int argc, char *argv[]) { - Timer timer; //used to track performance + Timer timer; //used to track performance - cl_uint deviceIndex = 0; // set default OpenCL Device + cl_uint deviceIndex = 0; // set default OpenCL Device - ocl_dev_mgr& dev_mgr = ocl_dev_mgr::getInstance(); - cl_uint devices_availble = dev_mgr.get_avail_dev_num(); + ocl_dev_mgr& dev_mgr = ocl_dev_mgr::getInstance(); + cl_uint devices_availble = dev_mgr.get_avail_dev_num(); - // parse command line arguments - bool benchmark_mode = false; - if (cmdOptionExists(argv, argv + argc, "-b")) { - benchmark_mode = true; - cout << "Benchmark mode" << endl << endl; - } - else { - cout << "Available OpenCL devices: " << devices_availble << endl; - } + // parse command line arguments + bool benchmark_mode = false; + if (cmdOptionExists(argv, argv + argc, "-b")) { + benchmark_mode = true; + cout << "Benchmark mode" << endl << endl; + } + else { + cout << "Available OpenCL devices: " << devices_availble << endl; + } - if (cmdOptionExists(argv, argv + argc, "-d")) { - char const* dev_id = getCmdOption(argv, argv + argc, "-d"); - deviceIndex = atoi(dev_id); - } + if (cmdOptionExists(argv, argv + argc, "-d")) { + char const* dev_id = getCmdOption(argv, argv + argc, "-d"); + deviceIndex = atoi(dev_id); + } - if (cmdOptionExists(argv, argv + argc, "-h") || !cmdOptionExists(argv, argv + argc, "-c")) { - print_help(); - return 0; - } - char const* filename = getCmdOption(argv, argv + argc, "-c"); + if (cmdOptionExists(argv, argv + argc, "-h") || !cmdOptionExists(argv, argv + argc, "-c")) { + print_help(); + return 0; + } + char const* filename = getCmdOption(argv, argv + argc, "-c"); #if defined(USENVML) - if (cmdOptionExists(argv, argv + argc, "-nvidia_power")) { - char const* tmp = getCmdOption(argv, argv + argc, "-nvidia_power"); - nvidia_power_rate = atoi(tmp); - nvidia_log_power = true; - } - - if (cmdOptionExists(argv, argv + argc, "-nvidia_temp")) { - char const* tmp = getCmdOption(argv, argv + argc, "-nvidia_temp"); - nvidia_temp_rate = atoi(tmp); - nvidia_log_temp = true; - } + if (cmdOptionExists(argv, argv + argc, "-nvidia_power")) { + char const* tmp = getCmdOption(argv, argv + argc, "-nvidia_power"); + nvidia_power_rate = atoi(tmp); + nvidia_log_power = true; + } + + if (cmdOptionExists(argv, argv + argc, "-nvidia_temp")) { + char const* tmp = getCmdOption(argv, argv + argc, "-nvidia_temp"); + nvidia_temp_rate = atoi(tmp); + nvidia_log_temp = true; + } #endif #if defined(USEIPG) || defined(USEIRAPL) - if (cmdOptionExists(argv, argv + argc, "-intel_power")) { - char const* tmp = getCmdOption(argv, argv + argc, "-intel_power"); - intel_power_rate = atoi(tmp); - intel_log_power = true; - } - if (cmdOptionExists(argv, argv + argc, "-intel_temp")) { - char const* tmp = getCmdOption(argv, argv + argc, "-intel_temp"); - intel_temp_rate = atoi(tmp); - intel_log_temp = true; - } + if (cmdOptionExists(argv, argv + argc, "-intel_power")) { + char const* tmp = getCmdOption(argv, argv + argc, "-intel_power"); + intel_power_rate = atoi(tmp); + intel_log_power = true; + } + if (cmdOptionExists(argv, argv + argc, "-intel_temp")) { + char const* tmp = getCmdOption(argv, argv + argc, "-intel_temp"); + intel_temp_rate = atoi(tmp); + intel_log_temp = true; + } #endif #if defined(USEAMDP) - if (cmdOptionExists(argv, argv + argc, "-amd_cpu_power")) { - char const* tmp = getCmdOption(argv, argv + argc, "-amd_cpu_power"); - amd_power_rate = atoi(tmp); - amd_log_power = true; - } - if (cmdOptionExists(argv, argv + argc, "-amd_cpu_temp")) { - char const* tmp = getCmdOption(argv, argv + argc, "-amd_cpu_temp"); - amd_temp_rate = atoi(tmp); - amd_log_temp = true; -} + if (cmdOptionExists(argv, argv + argc, "-amd_cpu_power")) { + char const* tmp = getCmdOption(argv, argv + argc, "-amd_cpu_power"); + amd_power_rate = atoi(tmp); + amd_log_power = true; + } + if (cmdOptionExists(argv, argv + argc, "-amd_cpu_temp")) { + char const* tmp = getCmdOption(argv, argv + argc, "-amd_cpu_temp"); + amd_temp_rate = atoi(tmp); + amd_log_temp = true; + } - if ((amd_log_temp == true) && (amd_log_power == true)) { - cout< kernel_source; + h5_read_strings(filename, "kernel_source", kernel_source); + ofstream tmp_clfile; + tmp_clfile.open("tmp_kernel.cl"); + for (string const& kernel : kernel_source) { + tmp_clfile << kernel << endl; + } + + tmp_clfile.close(); + kernel_url = string("tmp_kernel.cl"); + } + else { + cerr << "No kernel information found! " << endl; + return -1; + } + + std::vector kernel_list; + h5_read_strings(filename, "kernels", kernel_list); + + cl_ulong kernel_repetitions = 1; + if (h5_check_object(filename, "settings/kernel_repetitions")) { + kernel_repetitions = h5_read_single(filename, "settings/kernel_repetitions"); + } + if (kernel_repetitions <= 0) { + cout << "Warning: Setting `kernel_repetitions = " << kernel_repetitions << "` implies that no kernels are executed." << endl; + } + + dev_mgr.add_program_url(0, "ocl_Kernel", kernel_url); + + string settings; + h5_read_string(filename, "settings/kernel_settings", settings); + + + uint64_t num_kernels_found = 0; + num_kernels_found = dev_mgr.compile_kernel(0, "ocl_Kernel", settings); + if (num_kernels_found == 0) { + cerr << ERROR_INFO << "No valid kernels found" << endl; + return -1; + } + + std::vector found_kernels; + dev_mgr.get_kernel_names(0, "ocl_Kernel", found_kernels); + if (benchmark_mode == false) { + cout << "Found Kernels: " << found_kernels.size() << endl; + } + if (found_kernels.size() == 0) { + cerr << ERROR_INFO << "No valid kernels found." << endl; + return -1; + } + + cout << "Number of Kernels to execute: " << kernel_list.size() * kernel_repetitions << endl; + + //TODO: Clean up; debug mode? + // for (uint32_t kernel_idx = 0; kernel_idx < kernel_list.size(); kernel_idx++) { + // cout <<"Found : "<< kernel_list.at(kernel_idx) << endl; + // } + + cout << "Ingesting HDF5 config file..." << endl; + + std::vector data_names; + std::vector data_types; + std::vector data_sizes; + h5_get_content(filename, "/data/", data_names, data_types, data_sizes); + + cout << "Creating output HDF5 file..." << endl; + string out_name = "out_" + string(filename); + + if (fileExists(out_name)) { + remove(out_name.c_str()); + cout << "Old HDF5 data file found and deleted!" << endl; + } + + h5_create_dir(out_name, "/settings"); + h5_write_string(out_name, "/settings/kernel_settings", settings); + h5_write_single(out_name, "/settings/kernel_repetitions", kernel_repetitions); + + std::vector data_in; + bool blocking = CL_TRUE; + + //TODO: Implement functionality! Allow other integer types instead of cl_int? + vector data_rw_flags(data_names.size(), 0); + + uint64_t push_time, pull_time; + push_time = timer.getTimeMicroseconds(); + + for (cl_uint i = 0; i < data_names.size(); i++) { + try { + uint8_t *tmp_data = nullptr; + size_t var_size = 0; + + switch (data_types.at(i)) { + case H5_float: + var_size = data_sizes.at(i) * sizeof(float); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (float*)tmp_data); + break; + case H5_double: + var_size = data_sizes.at(i) * sizeof(double); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (double*)tmp_data); + break; + case H5_char: + var_size = data_sizes.at(i) * sizeof(cl_char); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_char*)tmp_data); + break; + case H5_uchar: + var_size = data_sizes.at(i) * sizeof(cl_uchar); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_uchar*)tmp_data); + break; + case H5_short: + var_size = data_sizes.at(i) * sizeof(cl_short); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_short*)tmp_data); + break; + case H5_ushort: + var_size = data_sizes.at(i) * sizeof(cl_ushort); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_ushort*)tmp_data); + break; + case H5_int: + var_size = data_sizes.at(i) * sizeof(cl_int); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_int*)tmp_data); + break; + case H5_uint: + var_size = data_sizes.at(i) * sizeof(cl_uint); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_uint*)tmp_data); + break; + case H5_long: + var_size = data_sizes.at(i) * sizeof(cl_long); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_long*)tmp_data); + break; + case H5_ulong: + var_size = data_sizes.at(i) * sizeof(cl_ulong); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_ulong*)tmp_data); + break; + default: + cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; + break; + } + + switch (data_rw_flags.at(i)) { + case 0: + data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, var_size)); + dev_mgr.get_queue(0, 0).enqueueWriteBuffer(data_in.back(), blocking, 0, var_size, tmp_data); + break; + case 1: + data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, var_size)); + dev_mgr.get_queue(0, 0).enqueueWriteBuffer(data_in.back(), blocking, 0, var_size, tmp_data); + break; + case 2: + data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, var_size)); + break; + } + + for (uint32_t kernel_idx = 0; kernel_idx < found_kernels.size(); kernel_idx++) { + dev_mgr.getKernelbyName(0, "ocl_Kernel", found_kernels.at(kernel_idx))->setArg(i, data_in.back()); + } + + if (tmp_data != nullptr) { + delete[] tmp_data; tmp_data = nullptr; + } + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception: " << err.what() << std::endl; + } + } + + dev_mgr.get_queue(0, 0).finish(); // Buffer Copy is asynchronous + + push_time = timer.getTimeMicroseconds() - push_time; + if (benchmark_mode == false) { - cout << "Reading kernel from file: " << kernel_url << "... " << endl; - } - } - else if (h5_check_object(filename, "kernel_source") == true) { - if (benchmark_mode == false) { - cout << "Reading kernel from HDF5 file... " << endl; - } - std::vector kernel_source; - h5_read_strings(filename, "kernel_source", kernel_source); - ofstream tmp_clfile; - tmp_clfile.open("tmp_kernel.cl"); - for (string const& kernel : kernel_source) { - tmp_clfile << kernel << endl; - } - - tmp_clfile.close(); - kernel_url = string("tmp_kernel.cl"); - } - else { - cerr << "No kernel information found! " << endl; - return -1; - } - - std::vector kernel_list; - h5_read_strings(filename, "kernels", kernel_list); - - cl_ulong kernel_repetitions = 1; - if (h5_check_object(filename, "settings/kernel_repetitions")) { - kernel_repetitions = h5_read_single(filename, "settings/kernel_repetitions"); - } - if (kernel_repetitions <= 0) { - cout << "Warning: Setting `kernel_repetitions = " << kernel_repetitions << "` implies that no kernels are executed." << endl; - } - - dev_mgr.add_program_url(0, "ocl_Kernel", kernel_url); - - string settings; - h5_read_string(filename, "settings/kernel_settings", settings); - - - uint64_t num_kernels_found = 0; - num_kernels_found = dev_mgr.compile_kernel(0, "ocl_Kernel", settings); - if (num_kernels_found == 0) { - cerr << ERROR_INFO << "No valid kernels found" << endl; - return -1; - } - - std::vector found_kernels; - dev_mgr.get_kernel_names(0, "ocl_Kernel", found_kernels); - if (benchmark_mode == false) { - cout << "Found Kernels: " << found_kernels.size() << endl; - } - if (found_kernels.size() == 0) { - cerr << ERROR_INFO << "No valid kernels found." << endl; - return -1; - } - - cout << "Number of Kernels to execute: " << kernel_list.size() * kernel_repetitions << endl; - - //TODO: Clean up; debug mode? - // for (uint32_t kernel_idx = 0; kernel_idx < kernel_list.size(); kernel_idx++) { - // cout <<"Found : "<< kernel_list.at(kernel_idx) << endl; - // } - - cout << "Ingesting HDF5 config file..." << endl; - - std::vector data_names; - std::vector data_types; - std::vector data_sizes; - h5_get_content(filename, "/data/", data_names, data_types, data_sizes); - - cout << "Creating output HDF5 file..." << endl; - string out_name = "out_" + string(filename); - - if (fileExists(out_name)) { - remove(out_name.c_str()); - cout << "Old HDF5 data file found and deleted!" << endl; - } - - h5_create_dir(out_name, "/settings"); - h5_write_string(out_name, "/settings/kernel_settings", settings); - h5_write_single(out_name, "/settings/kernel_repetitions", kernel_repetitions); - - std::vector data_in; - bool blocking = CL_TRUE; - - //TODO: Implement functionality! Allow other integer types instead of cl_int? - vector data_rw_flags(data_names.size(), 0); - - uint64_t push_time, pull_time; - push_time = timer.getTimeMicroseconds(); - - for (cl_uint i = 0; i < data_names.size(); i++) { - try { - uint8_t *tmp_data = nullptr; - size_t var_size = 0; - - switch (data_types.at(i)) { - case H5_float: - var_size = data_sizes.at(i) * sizeof(float); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (float*)tmp_data); - break; - case H5_double: - var_size = data_sizes.at(i) * sizeof(double); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (double*)tmp_data); - break; - case H5_char: - var_size = data_sizes.at(i) * sizeof(cl_char); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_char*)tmp_data); - break; - case H5_uchar: - var_size = data_sizes.at(i) * sizeof(cl_uchar); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_uchar*)tmp_data); - break; - case H5_short: - var_size = data_sizes.at(i) * sizeof(cl_short); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_short*)tmp_data); - break; - case H5_ushort: - var_size = data_sizes.at(i) * sizeof(cl_ushort); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_ushort*)tmp_data); - break; - case H5_int: - var_size = data_sizes.at(i) * sizeof(cl_int); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_int*)tmp_data); - break; - case H5_uint: - var_size = data_sizes.at(i) * sizeof(cl_uint); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_uint*)tmp_data); - break; - case H5_long: - var_size = data_sizes.at(i) * sizeof(cl_long); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_long*)tmp_data); - break; - case H5_ulong: - var_size = data_sizes.at(i) * sizeof(cl_ulong); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_ulong*)tmp_data); - break; - default: - cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; - break; - } - - switch (data_rw_flags.at(i)) { - case 0: - data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, var_size)); - dev_mgr.get_queue(0, 0).enqueueWriteBuffer(data_in.back(), blocking, 0, var_size, tmp_data); - break; - case 1: - data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, var_size)); - dev_mgr.get_queue(0, 0).enqueueWriteBuffer(data_in.back(), blocking, 0, var_size, tmp_data); - break; - case 2: - data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, var_size)); - break; - } - - for (uint32_t kernel_idx = 0; kernel_idx < found_kernels.size(); kernel_idx++) { - dev_mgr.getKernelbyName(0, "ocl_Kernel", found_kernels.at(kernel_idx))->setArg(i, data_in.back()); - } - - if (tmp_data != nullptr) { - delete[] tmp_data; tmp_data = nullptr; - } - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception: " << err.what() << std::endl; - } - } - - dev_mgr.get_queue(0, 0).finish(); // Buffer Copy is asynchronous - - push_time = timer.getTimeMicroseconds() - push_time; - - if (benchmark_mode == false) { - cout << "Setting range..." << endl; - } - - cl::NDRange range_start; - cl::NDRange global_range; - cl::NDRange local_range; - - //TODO: Allow other integer types instead of cl_int? - cl_int tmp_range[3]; - h5_read_buffer(filename, "/settings/global_range", tmp_range); - global_range = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); - h5_write_buffer(out_name, "/settings/global_range", tmp_range, 3); - - h5_read_buffer(filename, "/settings/range_start", tmp_range); - range_start = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); - h5_write_buffer(out_name, "/settings/range_start", tmp_range, 3); - - h5_read_buffer(filename, "/settings/local_range", tmp_range); - h5_write_buffer(out_name, "/settings/local_range", tmp_range, 3); - if ((tmp_range[0] == 0) && (tmp_range[1] == 0) && (tmp_range[2] == 0)) { - local_range = cl::NullRange; - } - else { - local_range = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); - } + cout << "Setting range..." << endl; + } + + cl::NDRange range_start; + cl::NDRange global_range; + cl::NDRange local_range; + + //TODO: Allow other integer types instead of cl_int? + cl_int tmp_range[3]; + h5_read_buffer(filename, "/settings/global_range", tmp_range); + global_range = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); + h5_write_buffer(out_name, "/settings/global_range", tmp_range, 3); + + h5_read_buffer(filename, "/settings/range_start", tmp_range); + range_start = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); + h5_write_buffer(out_name, "/settings/range_start", tmp_range, 3); + + h5_read_buffer(filename, "/settings/local_range", tmp_range); + h5_write_buffer(out_name, "/settings/local_range", tmp_range, 3); + if ((tmp_range[0] == 0) && (tmp_range[1] == 0) && (tmp_range[2] == 0)) { + local_range = cl::NullRange; + } + else { + local_range = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); + } #if defined(USEAMDP) - if (amd_log_power||amd_log_temp) - { - cout << "Using AMD Power Profiling interface..." << endl << endl; - h5_create_dir(out_name, "/housekeeping"); - h5_create_dir(out_name, "/housekeeping/amd"); - initAMDPP(amd_power_rate); - } - std::thread amd_log_power_thread(amd_log_power_func); - // std::thread amd_log_temp_thread(amd_log_temp_func); + if (amd_log_power || amd_log_temp) + { + cout << "Using AMD Power Profiling interface..." << endl << endl; + h5_create_dir(out_name, "/housekeeping"); + h5_create_dir(out_name, "/housekeeping/amd"); + initAMDPP(amd_power_rate); + } + std::thread amd_log_power_thread(amd_log_power_func); + // std::thread amd_log_temp_thread(amd_log_temp_func); #endif #if defined(USENVML) #if defined(_WIN32) - if (nvidia_log_power || nvidia_log_temp) - { - //Get Program Files path from system - TCHAR pf[MAX_PATH]; - SHGetSpecialFolderPath(0,pf,CSIDL_PROGRAM_FILES,FALSE); - std::string nvsmi_path; - nvsmi_path.append(pf); - nvsmi_path.append("/NVIDIA Corporation/NVSMI/nvml.dll"); - if (fileExists(nvsmi_path)) { - LoadLibraryEx(nvsmi_path.c_str(), NULL, 0); - }else { - if (fileExists("nvml.dll")) { - LoadLibraryEx("nvml.dll", NULL, 0); - } - else { - //No NVML found abort - cout << "NVML library not found..." << endl; - nvidia_log_temp = false; - nvidia_log_power = false; - nvidia_power_rate = 0; - nvidia_temp_rate = 0; - } - } - } -#endif - if (nvidia_log_power || nvidia_log_temp) - { - nvmlReturn_t result; - result = nvmlInit(); - if (NVML_SUCCESS == result) - { - cout << "Using NVML interface..." << endl; - h5_create_dir(out_name, "/housekeeping"); - h5_create_dir(out_name, "/housekeeping/nvidia"); - } - else { - cout << "NVML failure..." << endl; - nvidia_log_temp = false; - nvidia_log_power = false; - } - } - if (nvidia_log_power || nvidia_log_temp) - { - nvmlPciInfo_t nv_pciinfo; - cl_uint nvml_devnum; - cl_uint nvml_devid = 0; - - nvmlDeviceGetCount(&nvml_devnum); - - for (cl_uint i = 0; i < nvml_devnum; i++) - { - nvmlDeviceGetHandleByIndex(i, &device); - nvmlDeviceGetPciInfo(device, &nv_pciinfo); - - std::ostringstream tmp_devid; - tmp_devid << nv_pciinfo.domain << ":" << nv_pciinfo.bus << ":" << nv_pciinfo.device; - - std::size_t found = dev_mgr.getDevicePCIeID(deviceIndex).find(tmp_devid.str()); - if (found != std::string::npos) { - nvml_devid = i; - if (benchmark_mode == false) { - cout << "NVidia OpenCL device " << tmp_devid.str() << " found in NVML device list." << endl; - } - } - else { - cout << "NVidia OpenCL device " << tmp_devid.str() << " not found in NVML device list! Aborting!" << endl; - nvmlShutdown(); - exit(EXIT_FAILURE); - } - } - nvmlDeviceGetHandleByIndex(nvml_devid, &device); - } - - std::thread nvidia_log_power_thread(nvidia_log_power_func); - std::thread nvidia_log_temp_thread(nvidia_log_temp_func); + if (nvidia_log_power || nvidia_log_temp) + { + //Get Program Files path from system + TCHAR pf[MAX_PATH]; + SHGetSpecialFolderPath(0, pf, CSIDL_PROGRAM_FILES, FALSE); + std::string nvsmi_path; + nvsmi_path.append(pf); + nvsmi_path.append("/NVIDIA Corporation/NVSMI/nvml.dll"); + if (fileExists(nvsmi_path)) { + LoadLibraryEx(nvsmi_path.c_str(), NULL, 0); + } + else { + if (fileExists("nvml.dll")) { + LoadLibraryEx("nvml.dll", NULL, 0); + } + else { + //No NVML found abort + cout << "NVML library not found..." << endl; + nvidia_log_temp = false; + nvidia_log_power = false; + nvidia_power_rate = 0; + nvidia_temp_rate = 0; + } + } + } #endif - -#if defined(USEIPG) - if (intel_log_power || intel_log_temp) - { - cout << "Using Intel Power Gadget interface..." << endl; - h5_create_dir(out_name, "/housekeeping"); - h5_create_dir(out_name, "/housekeeping/intel"); - rapl = new Rapl(); - } - - if (intel_log_power) - { - h5_write_single(out_name, "/housekeeping/intel/TDP" , (float)rapl->get_TDP(), - "Thermal Design Power in watt"); - - int numMsrs = rapl->get_NumMSR(); - - //This is necesarry for initalization - rapl->sample(); - rapl->sample(); - rapl->sample(); - - for (int j = 0; j < numMsrs; j++) + if (nvidia_log_power || nvidia_log_temp) { - int funcID; - double data[3]; - int nData; - wchar_t szName[MAX_PATH]; - - rapl->GetMsrFunc(j, &funcID); - rapl->GetMsrName(j, szName); - - if ((funcID == 1)) { - MSR.push_back(j); - if (utf16ToUtf8(szName) == "Processor") { - MSR_names.push_back("package"); + nvmlReturn_t result; + result = nvmlInit(); + if (NVML_SUCCESS == result) + { + cout << "Using NVML interface..." << endl; + h5_create_dir(out_name, "/housekeeping"); + h5_create_dir(out_name, "/housekeeping/nvidia"); } else { - if (utf16ToUtf8(szName) == "IA") { - MSR_names.push_back("cores"); - } - else { - MSR_names.push_back(utf16ToUtf8(szName)); - } + cout << "NVML failure..." << endl; + nvidia_log_temp = false; + nvidia_log_power = false; } - } + } + if (nvidia_log_power || nvidia_log_temp) + { + nvmlPciInfo_t nv_pciinfo; + cl_uint nvml_devnum; + cl_int nvml_devid = -1; + + nvmlDeviceGetCount(&nvml_devnum); + + for (cl_uint i = 0; i < nvml_devnum; i++) + { + nvmlDeviceGetHandleByIndex(i, &device); + nvmlDeviceGetPciInfo(device, &nv_pciinfo); + + std::ostringstream tmp_devid; + tmp_devid << nv_pciinfo.domain << ":" << nv_pciinfo.bus << ":" << nv_pciinfo.device; + //cout<< tmp_devid.str() <(out_name, "/housekeeping/intel/TDP", (float)rapl->get_TDP(), + "Thermal Design Power in watt"); + + int numMsrs = rapl->get_NumMSR(); + + //This is necesarry for initalization + rapl->sample(); + rapl->sample(); + rapl->sample(); - //Get Package Power Limit - if ((funcID == 3) ) { - double data[3]; - int nData; - rapl->GetPowerData(0, j, data, &nData); - std::string varname = "/housekeeping/intel/" + utf16ToUtf8(szName) + "_power_limit"; - h5_write_single(out_name, varname.c_str() , data[0]); - } + for (int j = 0; j < numMsrs; j++) + { + int funcID; + double data[3]; + int nData; + wchar_t szName[MAX_PATH]; + + rapl->GetMsrFunc(j, &funcID); + rapl->GetMsrName(j, szName); + + if ((funcID == 1)) { + MSR.push_back(j); + if (utf16ToUtf8(szName) == "Processor") { + MSR_names.push_back("package"); + } + else { + if (utf16ToUtf8(szName) == "IA") { + MSR_names.push_back("cores"); + } + else { + MSR_names.push_back(utf16ToUtf8(szName)); + } + } + } + //Get Package Power Limit + if ((funcID == 3)) { + double data[3]; + int nData; + rapl->GetPowerData(0, j, data, &nData); + std::string varname = "/housekeeping/intel/" + utf16ToUtf8(szName) + "_power_limit"; + h5_write_single(out_name, varname.c_str(), data[0]); + } + + } } - } - std::thread intel_log_power_thread(intel_log_power_func); - std::thread intel_log_temp_thread(intel_log_temp_func); + std::thread intel_log_power_thread(intel_log_power_func); + std::thread intel_log_temp_thread(intel_log_temp_func); #endif #if defined(USEIRAPL) - if (intel_log_power || intel_log_temp) - { - cout << "Using Intel MSR interface..." << endl; - h5_create_dir(out_name, "/housekeeping"); - h5_create_dir(out_name, "/housekeeping/intel"); - rapl = new Rapl(); - } - - if (intel_log_power) - { - h5_write_single(out_name, "/housekeeping/intel/TDP", (float)rapl->get_TDP(), - "Thermal Design Power in watt"); - } - - std::thread intel_log_power_thread(intel_log_power_func); - std::thread intel_log_temp_thread(intel_log_temp_func); + if (intel_log_power || intel_log_temp) + { + cout << "Using Intel MSR interface..." << endl; + h5_create_dir(out_name, "/housekeeping"); + h5_create_dir(out_name, "/housekeeping/intel"); + rapl = new Rapl(); + } + + if (intel_log_power) + { + h5_write_single(out_name, "/housekeeping/intel/TDP", (float)rapl->get_TDP(), + "Thermal Design Power in watt"); + } + + std::thread intel_log_power_thread(intel_log_power_func); + std::thread intel_log_temp_thread(intel_log_temp_func); #endif - if (benchmark_mode == true) { - cout << "Sleeping for 4s" << endl << endl; - std::chrono::milliseconds timespan(4000); - std::this_thread::sleep_for(timespan); - } + if (benchmark_mode == true) { + cout << "Sleeping for 4s" << endl << endl; + std::chrono::milliseconds timespan(4000); + std::this_thread::sleep_for(timespan); + } - cout << "Launching kernel..." << endl; + cout << "Launching kernel..." << endl; - //get execution timestamp - timeval start_timeinfo; - gettimeofday(&start_timeinfo, NULL); + //get execution timestamp + timeval start_timeinfo; + gettimeofday(&start_timeinfo, NULL); - uint64_t exec_time = 0; - uint32_t kernels_run = 0; + uint64_t exec_time = 0; + uint32_t kernels_run = 0; - uint64_t total_exec_time = timer.getTimeMicroseconds(); + uint64_t total_exec_time = timer.getTimeMicroseconds(); - for (cl_ulong repetition = 0; repetition < kernel_repetitions; ++repetition) { - for (string const& kernel_name : kernel_list) { - exec_time = exec_time + dev_mgr.execute_kernelNA(*(dev_mgr.getKernelbyName(0, "ocl_Kernel", kernel_name)), - dev_mgr.get_queue(0, 0), range_start, global_range, local_range); - kernels_run++; + for (cl_ulong repetition = 0; repetition < kernel_repetitions; ++repetition) { + for (string const& kernel_name : kernel_list) { + exec_time = exec_time + dev_mgr.execute_kernelNA(*(dev_mgr.getKernelbyName(0, "ocl_Kernel", kernel_name)), + dev_mgr.get_queue(0, 0), range_start, global_range, local_range); + kernels_run++; + } } - } - total_exec_time = timer.getTimeMicroseconds() - total_exec_time; - h5_create_dir(out_name, "housekeeping"); - h5_write_single(out_name, "/housekeeping/total_execution_time", 1.e-6 * total_exec_time, - "Time in seconds of the total execution (data transfer, kernel, and host code)."); + total_exec_time = timer.getTimeMicroseconds() - total_exec_time; + h5_create_dir(out_name, "housekeeping"); + h5_write_single(out_name, "/housekeeping/total_execution_time", 1.e-6 * total_exec_time, + "Time in seconds of the total execution (data transfer, kernel, and host code)."); - cout << "Kernels executed: " << kernels_run << endl; - cout << "Kernel runtime: " << exec_time / 1000 << " ms" << endl; // TODO: ms or s, int or double? + cout << "Kernels executed: " << kernels_run << endl; + cout << "Kernel runtime: " << exec_time / 1000 << " ms" << endl; // TODO: ms or s, int or double? - if (benchmark_mode == true) { - cout << endl << "Sleeping for 4s" << endl; - std::chrono::milliseconds timespan(4000); + if (benchmark_mode == true) { + cout << endl << "Sleeping for 4s" << endl; + std::chrono::milliseconds timespan(4000); - std::this_thread::sleep_for(timespan); - } + std::this_thread::sleep_for(timespan); + } - cout << "Saving results... " << endl; + cout << "Saving results... " << endl; #if defined(USEAMDP) - amd_log_power = false; - amd_log_temp = false; - amd_log_power_thread.join(); - //amd_log_temp_thread.join(); + amd_log_power = false; + amd_log_temp = false; + amd_log_power_thread.join(); + //amd_log_temp_thread.join(); - if ((amd_power_rate > 0) || (amd_temp_rate > 0)) { + if ((amd_power_rate > 0) || (amd_temp_rate > 0)) { - AMDTPwrStopProfiling(); - } + AMDTPwrStopProfiling(); + } - if (amd_power_rate > 0) - { - h5_write_buffer(out_name, "/housekeeping/amd/power_time", amd_power_time.data(), amd_power_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + if (amd_power_rate > 0) + { + h5_write_buffer(out_name, "/housekeeping/amd/power_time", amd_power_time.data(), amd_power_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - for (size_t i = 0; i < AMDP_names.size(); i++) + for (size_t i = 0; i < AMDP_names.size(); i++) + { + std::string varname = "/housekeeping/amd/" + AMDP_names.at(i); + //cout<<"Power: "<< AMDP_names.at(i) <(out_name, varname.c_str(), amd_power[i].data(), amd_power[i].size(), + "Power in watt"); + } + + } + if (amd_temp_rate > 0) { - std::string varname = "/housekeeping/amd/" + AMDP_names.at(i); - //cout<<"Power: "<< AMDP_names.at(i) <(out_name, varname.c_str(), amd_power[i].data(), amd_power[i].size(), - "Power in watt"); - } - - } - if (amd_temp_rate > 0) - { - //amd_power_time has to be changed to amd_temp_time as soon as simultaneous logging is fixed! - h5_write_buffer(out_name, "/housekeeping/amd/temperature_time", amd_power_time.data(), amd_power_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - for (size_t i = 0; i < AMDT_names.size(); i++) - { - std::string varname = "/housekeeping/amd/" + AMDT_names.at(i); - // cout << "Temp: "<< AMDT_names.at(i) << endl; - h5_write_buffer(out_name, varname.c_str(), amd_temp[i].data(), amd_temp[i].size(), - "Temperatures in degree C"); - } - } + //amd_power_time has to be changed to amd_temp_time as soon as simultaneous logging is fixed! + h5_write_buffer(out_name, "/housekeeping/amd/temperature_time", amd_power_time.data(), amd_power_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + for (size_t i = 0; i < AMDT_names.size(); i++) + { + std::string varname = "/housekeeping/amd/" + AMDT_names.at(i); + // cout << "Temp: "<< AMDT_names.at(i) << endl; + h5_write_buffer(out_name, varname.c_str(), amd_temp[i].data(), amd_temp[i].size(), + "Temperatures in degree C"); + } + } #endif #if defined(USEIRAPL) - intel_log_power = false; - intel_log_power_thread.join(); + intel_log_power = false; + intel_log_power_thread.join(); - intel_log_temp = false; - intel_log_temp_thread.join(); + intel_log_temp = false; + intel_log_temp_thread.join(); - if (intel_power_rate > 0) - { - // size()-1 because differences are computed later - h5_write_buffer(out_name, "/housekeeping/intel/power_time", intel_power_time.data(), intel_power_time.size()-1, - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + if (intel_power_rate > 0) + { + // size()-1 because differences are computed later + h5_write_buffer(out_name, "/housekeeping/intel/power_time", intel_power_time.data(), intel_power_time.size() - 1, + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - std::vector tmp_vector; + std::vector tmp_vector; - size_t max_entries = MSR_names.size(); - if (rapl->detect_igp() == false) { - // no GT data - max_entries--; - } - - for (size_t i = 0; i < max_entries; i++) - { - tmp_vector.clear(); + size_t max_entries = MSR_names.size(); + if (rapl->detect_igp() == false) { + // no GT data + max_entries--; + } - for (size_t j = 0; j < intel_power0[i].size()-1; j++) - { - tmp_vector.push_back((rapl->get_e_unit()*(double)(intel_power0[i].at(j+1)-intel_power0[i].at(j))) / ((double)intel_power_rate*0.001)); - } - std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "0"; - h5_write_buffer(out_name, varname.c_str(), tmp_vector.data(), tmp_vector.size(), - "Power in watt"); - } + for (size_t i = 0; i < max_entries; i++) + { + tmp_vector.clear(); - if (rapl->detect_socket1() == true) - { - for (size_t i = 0; i < max_entries; i++) - { - tmp_vector.clear(); + for (size_t j = 0; j < intel_power0[i].size() - 1; j++) + { + tmp_vector.push_back((rapl->get_e_unit()*(double)(intel_power0[i].at(j + 1) - intel_power0[i].at(j))) / ((double)intel_power_rate*0.001)); + } + std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "0"; + h5_write_buffer(out_name, varname.c_str(), tmp_vector.data(), tmp_vector.size(), + "Power in watt"); + } - for (size_t j = 0; j < intel_power1[i].size()-1; j++) + if (rapl->detect_socket1() == true) { - tmp_vector.push_back((rapl->get_e_unit()*(double)(intel_power1[i].at(j+1)-intel_power1[i].at(j)))/((double)intel_power_rate*0.001)); + for (size_t i = 0; i < max_entries; i++) + { + tmp_vector.clear(); + + for (size_t j = 0; j < intel_power1[i].size() - 1; j++) + { + tmp_vector.push_back((rapl->get_e_unit()*(double)(intel_power1[i].at(j + 1) - intel_power1[i].at(j))) / ((double)intel_power_rate*0.001)); + } + std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "1"; + h5_write_buffer(out_name, varname.c_str(), tmp_vector.data(), tmp_vector.size(), + "Power in watt"); + } } - std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "1"; - h5_write_buffer(out_name, varname.c_str(), tmp_vector.data(), tmp_vector.size(), - "Power in watt"); - } } - } - if (intel_temp_rate > 0) - { - h5_write_buffer(out_name, "/housekeeping/intel/temperature_time", intel_temp_time.data(), intel_temp_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + if (intel_temp_rate > 0) + { + h5_write_buffer(out_name, "/housekeeping/intel/temperature_time", intel_temp_time.data(), intel_temp_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - h5_write_buffer(out_name, "/housekeeping/intel/package0_temperature", intel_temp0.data(), intel_temp0.size(), - "Temperature in degree Celsius"); + h5_write_buffer(out_name, "/housekeeping/intel/package0_temperature", intel_temp0.data(), intel_temp0.size(), + "Temperature in degree Celsius"); - if (rapl->detect_socket1() == true) - { - h5_write_buffer(out_name, "/housekeeping/intel/package1_temperature", intel_temp1.data(), intel_temp1.size(), - "Temperature in degree Celsius"); + if (rapl->detect_socket1() == true) + { + h5_write_buffer(out_name, "/housekeeping/intel/package1_temperature", intel_temp1.data(), intel_temp1.size(), + "Temperature in degree Celsius"); + } } - } #endif #if defined(USEIPG) - intel_log_power = false; - intel_log_power_thread.join(); - - intel_log_temp = false; - intel_log_temp_thread.join(); + intel_log_power = false; + intel_log_power_thread.join(); - if (intel_power_rate > 0) - { - h5_write_buffer(out_name, "/housekeeping/intel/power_time", intel_power_time.data(), intel_power_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + intel_log_temp = false; + intel_log_temp_thread.join(); - for (size_t i = 0; i < MSR_names.size(); i++) + if (intel_power_rate > 0) { - std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "0"; - h5_write_buffer(out_name, varname.c_str(), intel_power[i].data(), intel_power[i].size(), - "Power in watt"); + h5_write_buffer(out_name, "/housekeeping/intel/power_time", intel_power_time.data(), intel_power_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + + for (size_t i = 0; i < MSR_names.size(); i++) + { + std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "0"; + h5_write_buffer(out_name, varname.c_str(), intel_power[i].data(), intel_power[i].size(), + "Power in watt"); + } } - } - if (intel_temp_rate > 0) - { - h5_write_buffer(out_name, "/housekeeping/intel/temperature_time", intel_temp_time.data(), intel_temp_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + if (intel_temp_rate > 0) + { + h5_write_buffer(out_name, "/housekeeping/intel/temperature_time", intel_temp_time.data(), intel_temp_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - h5_write_buffer(out_name, "/housekeeping/intel/package_temperature", intel_temp.data(), intel_temp.size(), - "Temperature in degree Celsius"); - } + h5_write_buffer(out_name, "/housekeeping/intel/package_temperature", intel_temp.data(), intel_temp.size(), + "Temperature in degree Celsius"); + } #endif #if defined(USENVML) - nvidia_log_power = false; - nvidia_log_temp = false; - nvidia_log_power_thread.join(); - nvidia_log_temp_thread.join(); + nvidia_log_power = false; + nvidia_log_temp = false; + nvidia_log_power_thread.join(); + nvidia_log_temp_thread.join(); - if ((nvidia_power_rate > 0)||(nvidia_temp_rate > 0)) - { - nvmlShutdown(); - } + if ((nvidia_power_rate > 0) || (nvidia_temp_rate > 0)) + { + nvmlShutdown(); + } - if (nvidia_power_rate > 0) { + if (nvidia_power_rate > 0) { - h5_write_buffer(out_name, "/housekeeping/nvidia/power", nvidia_power.data(), nvidia_power.size(), - "Power in watt"); + h5_write_buffer(out_name, "/housekeeping/nvidia/power", nvidia_power.data(), nvidia_power.size(), + "Power in watt"); - h5_write_buffer(out_name, "/housekeeping/nvidia/power_time", nvidia_power_time.data(), nvidia_power_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - } + h5_write_buffer(out_name, "/housekeeping/nvidia/power_time", nvidia_power_time.data(), nvidia_power_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + } - if (nvidia_temp_rate > 0) { + if (nvidia_temp_rate > 0) { - h5_write_buffer(out_name, "/housekeeping/nvidia/temperature", nvidia_temp.data(), nvidia_temp.size(), - "Temperature in degree Celsius"); + h5_write_buffer(out_name, "/housekeeping/nvidia/temperature", nvidia_temp.data(), nvidia_temp.size(), + "Temperature in degree Celsius"); - h5_write_buffer(out_name, "/housekeeping/nvidia/temperature_time", nvidia_temp_time.data(), nvidia_temp_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - } + h5_write_buffer(out_name, "/housekeeping/nvidia/temperature_time", nvidia_temp_time.data(), nvidia_temp_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + } #endif - char time_buffer[100]; - time_t tmp_time = start_timeinfo.tv_sec; - strftime(time_buffer, sizeof(time_buffer), "%Y-%m-%dT%H:%M:%S", localtime(&tmp_time)); - sprintf(time_buffer, "%s.%03ld", time_buffer, start_timeinfo.tv_usec / 1000); - h5_create_dir(out_name, "housekeeping"); - h5_write_string(out_name, "housekeeping/kernel_execution_start", time_buffer); - h5_write_single(out_name, "housekeeping/kernel_execution_time", 1.e-6 * exec_time, - "Time in seconds of the kernel execution (no host code)."); - h5_write_single(out_name, "housekeeping/data_load_time", 1.e-6 * push_time, - "Time in seconds of the data transfer: hdf5 input file -> host -> device."); - - h5_create_dir(out_name, "architecture"); - h5_write_string(out_name, "architecture/host_os", getOS().c_str()); - h5_write_string(out_name, "architecture/opencl_device", dev_mgr.get_avail_dev_info(deviceIndex).name.c_str()); - h5_write_string(out_name, "architecture/opencl_platform", dev_mgr.get_avail_dev_info(deviceIndex).platform_name.c_str()); - h5_write_string(out_name, "architecture/opencl_version", dev_mgr.get_avail_dev_info(deviceIndex).ocl_version.c_str()); - - h5_create_dir(out_name, "/data"); - - pull_time = timer.getTimeMicroseconds(); - - uint32_t buffer_counter = 0; - - for (cl_uint i = 0; i < data_names.size(); i++) { - try { - uint8_t *tmp_data = nullptr; - size_t var_size = 0; - - switch (data_types.at(i)) { - case H5_float: var_size = data_sizes.at(i) * sizeof(cl_float); break; - case H5_double: var_size = data_sizes.at(i) * sizeof(cl_double); break; - case H5_char: var_size = data_sizes.at(i) * sizeof(cl_char); break; - case H5_uchar: var_size = data_sizes.at(i) * sizeof(cl_uchar); break; - case H5_short: var_size = data_sizes.at(i) * sizeof(cl_short); break; - case H5_ushort: var_size = data_sizes.at(i) * sizeof(cl_ushort); break; - case H5_int: var_size = data_sizes.at(i) * sizeof(cl_int); break; - case H5_uint: var_size = data_sizes.at(i) * sizeof(cl_uint); break; - case H5_long: var_size = data_sizes.at(i) * sizeof(cl_long); break; - case H5_ulong: var_size = data_sizes.at(i) * sizeof(cl_ulong); break; - default: cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; - } - - tmp_data = new uint8_t[var_size]; - - switch (data_rw_flags.at(buffer_counter)) { - case 0: dev_mgr.get_queue(0, 0).enqueueReadBuffer(data_in.at(buffer_counter), blocking, 0, var_size, tmp_data); break; - case 1: break; - case 2: dev_mgr.get_queue(0, 0).enqueueReadBuffer(data_in.at(buffer_counter), blocking, 0, var_size, tmp_data); break; - } - - dev_mgr.get_queue(0, 0).finish(); //Buffer Copy is asynchronous - - switch (data_types.at(i)) { - case H5_float: h5_write_buffer( out_name, data_names.at(i).c_str(), (float*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_double: h5_write_buffer( out_name, data_names.at(i).c_str(), (double*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_char: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_char*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_uchar: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_uchar*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_short: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_short*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_ushort: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_ushort*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_int: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_int*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_uint: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_uint*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_long: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_long*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_ulong: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_ulong*)tmp_data, data_sizes.at(buffer_counter)); break; - default: cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; - } - if (tmp_data != nullptr) { - delete[] tmp_data; tmp_data = nullptr; - } - buffer_counter++; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception: " << err.what() << std::endl; - } - } - - pull_time = timer.getTimeMicroseconds() - pull_time; - h5_write_single(out_name, "housekeeping/data_store_time", 1.e-6 * pull_time, - "Time in seconds of the data transfer: device -> host -> hdf5 output file."); - - return 0; + char time_buffer[100]; + time_t tmp_time = start_timeinfo.tv_sec; + strftime(time_buffer, sizeof(time_buffer), "%Y-%m-%dT%H:%M:%S", localtime(&tmp_time)); + sprintf(time_buffer, "%s.%03ld", time_buffer, start_timeinfo.tv_usec / 1000); + h5_create_dir(out_name, "housekeeping"); + h5_write_string(out_name, "housekeeping/kernel_execution_start", time_buffer); + h5_write_single(out_name, "housekeeping/kernel_execution_time", 1.e-6 * exec_time, + "Time in seconds of the kernel execution (no host code)."); + h5_write_single(out_name, "housekeeping/data_load_time", 1.e-6 * push_time, + "Time in seconds of the data transfer: hdf5 input file -> host -> device."); + + h5_create_dir(out_name, "architecture"); + h5_write_string(out_name, "architecture/host_os", getOS().c_str()); + h5_write_string(out_name, "architecture/opencl_device", dev_mgr.get_avail_dev_info(deviceIndex).name.c_str()); + h5_write_string(out_name, "architecture/opencl_platform", dev_mgr.get_avail_dev_info(deviceIndex).platform_name.c_str()); + h5_write_string(out_name, "architecture/opencl_version", dev_mgr.get_avail_dev_info(deviceIndex).ocl_version.c_str()); + + h5_create_dir(out_name, "/data"); + + pull_time = timer.getTimeMicroseconds(); + + uint32_t buffer_counter = 0; + + for (cl_uint i = 0; i < data_names.size(); i++) { + try { + uint8_t *tmp_data = nullptr; + size_t var_size = 0; + + switch (data_types.at(i)) { + case H5_float: var_size = data_sizes.at(i) * sizeof(cl_float); break; + case H5_double: var_size = data_sizes.at(i) * sizeof(cl_double); break; + case H5_char: var_size = data_sizes.at(i) * sizeof(cl_char); break; + case H5_uchar: var_size = data_sizes.at(i) * sizeof(cl_uchar); break; + case H5_short: var_size = data_sizes.at(i) * sizeof(cl_short); break; + case H5_ushort: var_size = data_sizes.at(i) * sizeof(cl_ushort); break; + case H5_int: var_size = data_sizes.at(i) * sizeof(cl_int); break; + case H5_uint: var_size = data_sizes.at(i) * sizeof(cl_uint); break; + case H5_long: var_size = data_sizes.at(i) * sizeof(cl_long); break; + case H5_ulong: var_size = data_sizes.at(i) * sizeof(cl_ulong); break; + default: cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; + } + + tmp_data = new uint8_t[var_size]; + + switch (data_rw_flags.at(buffer_counter)) { + case 0: dev_mgr.get_queue(0, 0).enqueueReadBuffer(data_in.at(buffer_counter), blocking, 0, var_size, tmp_data); break; + case 1: break; + case 2: dev_mgr.get_queue(0, 0).enqueueReadBuffer(data_in.at(buffer_counter), blocking, 0, var_size, tmp_data); break; + } + + dev_mgr.get_queue(0, 0).finish(); //Buffer Copy is asynchronous + + switch (data_types.at(i)) { + case H5_float: h5_write_buffer(out_name, data_names.at(i).c_str(), (float*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_double: h5_write_buffer(out_name, data_names.at(i).c_str(), (double*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_char: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_char*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_uchar: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_uchar*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_short: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_short*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_ushort: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_ushort*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_int: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_int*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_uint: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_uint*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_long: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_long*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_ulong: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_ulong*)tmp_data, data_sizes.at(buffer_counter)); break; + default: cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; + } + if (tmp_data != nullptr) { + delete[] tmp_data; tmp_data = nullptr; + } + buffer_counter++; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception: " << err.what() << std::endl; + } + } + + pull_time = timer.getTimeMicroseconds() - pull_time; + h5_write_single(out_name, "housekeeping/data_store_time", 1.e-6 * pull_time, + "Time in seconds of the data transfer: device -> host -> hdf5 output file."); + + return 0; } From 339327c1dd6788f8c64d014bd5cfb2c55251e949 Mon Sep 17 00:00:00 2001 From: Philip Heinisch Date: Fri, 29 Mar 2019 14:27:45 +0100 Subject: [PATCH 3/7] More NVML fixes --- src/main.cpp | 2 +- src/ocl_dev_mgr.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index b3ce65e..9468bd7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -998,7 +998,7 @@ int main(int argc, char *argv[]) { std::ostringstream tmp_devid; tmp_devid << nv_pciinfo.domain << ":" << nv_pciinfo.bus << ":" << nv_pciinfo.device; - //cout<< tmp_devid.str() <> 3; func = slot_id & 0x7; - tmp_stream << domain << ":" << bus << ":" << dev; + tmp_stream << domain << ":" << bus << ":" << dev << func; } else { From 8be22f5af90af73b45783a12642f9204448661af Mon Sep 17 00:00:00 2001 From: Philip Heinisch Date: Fri, 29 Mar 2019 14:31:30 +0100 Subject: [PATCH 4/7] Removed NVML debug output --- src/main.cpp | 2 +- src/ocl_dev_mgr.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 9468bd7..b3ce65e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -998,7 +998,7 @@ int main(int argc, char *argv[]) { std::ostringstream tmp_devid; tmp_devid << nv_pciinfo.domain << ":" << nv_pciinfo.bus << ":" << nv_pciinfo.device; - cout<< tmp_devid.str() <> 3; func = slot_id & 0x7; - tmp_stream << domain << ":" << bus << ":" << dev << func; + tmp_stream << domain << ":" << bus << ":" << slot_id; } else { From df12b15ee09806211b9e9039d4cb0e6050ac0cbf Mon Sep 17 00:00:00 2001 From: Philip Heinisch Date: Fri, 29 Mar 2019 20:44:12 +0100 Subject: [PATCH 5/7] Reverted hdf5_io.cpp to new version --- src/hdf5_io.cpp | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/src/hdf5_io.cpp b/src/hdf5_io.cpp index 985c288..a080bf8 100644 --- a/src/hdf5_io.cpp +++ b/src/hdf5_io.cpp @@ -381,11 +381,18 @@ bool h5_read_string(char const* filename, char const* varname, std::string& outp { if (!fileExists(filename)) { std::cerr << ERROR_INFO << "File '" << filename << "' not found." << std::endl; - //TODO: File not found - no idea what error code to use + //TODO: Exception? Only error code? return false; } hid_t h5_file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + if (H5LTpath_valid(h5_file_id, varname, true) <= 0) { + std::cerr << ERROR_INFO << "Variable '" << varname << "' not found in file '" << filename << "'." << std::endl; + //TODO: Exception? Only error code? + H5Fclose(h5_file_id); + return false; + } + hid_t dataset = H5Dopen(h5_file_id, varname, H5P_DEFAULT); hid_t datatype = H5Dget_type(dataset); @@ -400,7 +407,13 @@ bool h5_read_string(char const* filename, char const* varname, std::string& outp hsize_t size = accumulate(begin(dims), end(dims), 1, std::multiplies()); std::vector buffer(size * sizeof(char*)); - H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); + herr_t err = H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); + if (err < 0) { + std::cerr << ERROR_INFO << "Reading variable '" << varname << "' in file '" << filename << "' not possible." << std::endl; + //TODO: Exception? Only error code? + H5Fclose(h5_file_id); + return false; + } output = std::string(buffer.at(0)); H5Dvlen_reclaim(datatype, dataspace, H5P_DEFAULT, &(buffer[0])); @@ -410,7 +423,13 @@ bool h5_read_string(char const* filename, char const* varname, std::string& outp hssize_t npoints = H5Sget_simple_extent_npoints(dataspace); std::vector buffer(datatype_size * npoints, '\0'); - H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); + herr_t err = H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); + if (err < 0) { + std::cerr << ERROR_INFO << "Reading variable '" << varname << "' in file '" << filename << "' not possible." << std::endl; + //TODO: Exception? Only error code? + H5Fclose(h5_file_id); + return false; + } output = std::string(begin(buffer), end(buffer)); } @@ -451,6 +470,13 @@ bool h5_read_strings(char const* filename, char const* varname, std::vector()); std::vector buffer(size * sizeof(char*)); - H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); + herr_t err = H5Dread(dataset, datatype, dataspace, dataspace, H5P_DEFAULT, &(buffer[0])); + if (err < 0) { + std::cerr << ERROR_INFO << "Reading variable '" << varname << "' in file '" << filename << "' not possible." << std::endl; + //TODO: Exception? Only error code? + H5Fclose(h5_file_id); + return false; + } for (char const* line : buffer) { if (line == nullptr) { @@ -481,7 +513,13 @@ bool h5_read_strings(char const* filename, char const* varname, std::vector buffer(line_length * num_lines, '\0'); - H5LTread_dataset_string(h5_file_id, varname, &(buffer[0])); + herr_t err = H5LTread_dataset_string(h5_file_id, varname, &(buffer[0])); + if (err < 0) { + std::cerr << ERROR_INFO << "Reading variable '" << varname << "' in file '" << filename << "' not possible." << std::endl; + //TODO: Exception? Only error code? + H5Fclose(h5_file_id); + return false; + } size_t str_start = 0; for (hssize_t lines_found = 0; lines_found < num_lines; ++lines_found) { From d6a26806d6aba77691a381042d8e3fb049d9d644 Mon Sep 17 00:00:00 2001 From: Philip Heinisch Date: Fri, 29 Mar 2019 21:14:48 +0100 Subject: [PATCH 6/7] Fixed missing changes in main.cpp --- src/main.cpp | 2020 ++++++++++++++++++++++--------------------- src/ocl_dev_mgr.cpp | 402 ++++----- 2 files changed, 1227 insertions(+), 1195 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index b3ce65e..58725c6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -27,20 +27,20 @@ int gettimeofday(struct timeval * tp, struct timezone * tzp) { - static const uint64_t EPOCH = ((uint64_t)116444736000000000ULL); + static const uint64_t EPOCH = ((uint64_t)116444736000000000ULL); - SYSTEMTIME system_time; - FILETIME file_time; - uint64_t time; + SYSTEMTIME system_time; + FILETIME file_time; + uint64_t time; - GetSystemTime(&system_time); - SystemTimeToFileTime(&system_time, &file_time); - time = ((uint64_t)file_time.dwLowDateTime); - time += ((uint64_t)file_time.dwHighDateTime) << 32; + GetSystemTime(&system_time); + SystemTimeToFileTime(&system_time, &file_time); + time = ((uint64_t)file_time.dwLowDateTime); + time += ((uint64_t)file_time.dwHighDateTime) << 32; - tp->tv_sec = (long)((time - EPOCH) / 10000000L); - tp->tv_usec = (long)(system_time.wMilliseconds * 1000); - return 0; + tp->tv_sec = (long)((time - EPOCH) / 10000000L); + tp->tv_usec = (long)(system_time.wMilliseconds * 1000); + return 0; } #else @@ -49,8 +49,8 @@ int gettimeofday(struct timeval * tp, struct timezone * tzp) inline double timeval2storage(const timeval& timepoint) { - // convert microseconds to seconds using a resolution of milliseconds - return timepoint.tv_sec + 1.e-3 * (timepoint.tv_usec / 1000); + // convert microseconds to seconds using a resolution of milliseconds + return timepoint.tv_sec + 1.e-3 * (timepoint.tv_usec / 1000); } @@ -75,96 +75,96 @@ std::vector AMDP_names; std::vector AMDT_names; bool initAMDPP(uint32_t sample_rate) { - AMDTResult hResult = AMDT_STATUS_OK; - - // Initialize online mode - hResult = AMDTPwrProfileInitialize(AMDT_PWR_MODE_TIMELINE_ONLINE); - // check AMDT_STATUS_OK == hResult - AMDTUInt32 nbrCounters = 0; - AMDTPwrCounterDesc* pCounters = nullptr; - - hResult = AMDTPwrGetSupportedCounters(&nbrCounters, &pCounters); - // check AMDT_STATUS_OK == hResult - - // cout << endl << nbrCounters << endl; - for (AMDTUInt32 idx = 0; idx < nbrCounters; idx++) + AMDTResult hResult = AMDT_STATUS_OK; + + // Initialize online mode + hResult = AMDTPwrProfileInitialize(AMDT_PWR_MODE_TIMELINE_ONLINE); + // check AMDT_STATUS_OK == hResult + AMDTUInt32 nbrCounters = 0; + AMDTPwrCounterDesc* pCounters = nullptr; + + hResult = AMDTPwrGetSupportedCounters(&nbrCounters, &pCounters); + // check AMDT_STATUS_OK == hResult + + // cout << endl << nbrCounters << endl; + for (AMDTUInt32 idx = 0; idx < nbrCounters; idx++) + { + //get only power - for now + if ((pCounters[idx].m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER) && (amd_power_rate > 0)) { - //get only power - for now - if ((pCounters[idx].m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER) && (amd_power_rate > 0)) - { - hResult = AMDTPwrEnableCounter(pCounters[idx].m_counterID); - } - if ((pCounters[idx].m_category == AMDT_PWR_CATEGORY_TEMPERATURE) && (amd_temp_rate > 0)) { - hResult = AMDTPwrEnableCounter(pCounters[idx].m_counterID); - } - + hResult = AMDTPwrEnableCounter(pCounters[idx].m_counterID); + } + if ((pCounters[idx].m_category == AMDT_PWR_CATEGORY_TEMPERATURE) && (amd_temp_rate > 0)) { + hResult = AMDTPwrEnableCounter(pCounters[idx].m_counterID); } - AMDTPwrSetTimerSamplingPeriod(100); - //dry profiling run, to see which counter are available for real sampling + } + AMDTPwrSetTimerSamplingPeriod(100); - AMDTPwrStartProfiling(); - std::this_thread::sleep_for(std::chrono::milliseconds(500)); + //dry profiling run, to see which counter are available for real sampling - AMDTPwrSample* pSampleData = nullptr; - AMDTUInt32 nbrSamples = 0; + AMDTPwrStartProfiling(); + std::this_thread::sleep_for(std::chrono::milliseconds(500)); - hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData); + AMDTPwrSample* pSampleData = nullptr; + AMDTUInt32 nbrSamples = 0; - std::vector usable_power_counters; - std::vector usable_temp_counters; + hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData); - if ((nullptr != pSampleData) && (nbrSamples > 0)) - { - for (size_t j = 0; j < nbrSamples; j++) - { + std::vector usable_power_counters; + std::vector usable_temp_counters; - for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned - { - AMDTPwrCounterDesc counterDesc; - AMDTPwrGetCounterDesc(pSampleData[0].m_counterValues->m_counterID, &counterDesc); + if ((nullptr != pSampleData) && (nbrSamples > 0)) + { + for (size_t j = 0; j < nbrSamples; j++) + { - if ((counterDesc.m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER)) - { - - if (std::find(usable_power_counters.begin(), usable_power_counters.end(), pSampleData[0].m_counterValues->m_counterID) != usable_power_counters.end() == false) { - AMDP_names.push_back(counterDesc.m_name); - usable_power_counters.push_back(pSampleData[0].m_counterValues->m_counterID); - // cout << counterDesc.m_name << " P " << pSampleData[0].m_counterValues->m_counterID << pSampleData[0].m_counterValues->m_counterID << endl; - } - } - if ((counterDesc.m_category == AMDT_PWR_CATEGORY_TEMPERATURE)) - { - - if (std::find(usable_temp_counters.begin(), usable_temp_counters.end(), pSampleData[0].m_counterValues->m_counterID) != usable_temp_counters.end() == false) { - AMDT_names.push_back(counterDesc.m_name); - usable_temp_counters.push_back(pSampleData[0].m_counterValues->m_counterID); - // cout << counterDesc.m_name << " T " << pSampleData[0].m_counterValues->m_counterID << endl; - } - } - pSampleData[i].m_counterValues++; - } - - } - - - } - //restart AMD profiling - AMDTPwrStopProfiling(); - AMDTPwrProfileClose(); - hResult = AMDTPwrProfileInitialize(AMDT_PWR_MODE_TIMELINE_ONLINE); - AMDTPwrSetTimerSamplingPeriod(sample_rate); - //reenable usable counters - for (AMDTUInt32 idx = 0; idx < usable_power_counters.size(); idx++) + for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned { - AMDTPwrEnableCounter(usable_power_counters.at(idx)); - } - for (AMDTUInt32 idx = 0; idx < usable_temp_counters.size(); idx++) - { - AMDTPwrEnableCounter(usable_temp_counters.at(idx)); - } - AMDTPwrStartProfiling(); - return true; + AMDTPwrCounterDesc counterDesc; + AMDTPwrGetCounterDesc(pSampleData[0].m_counterValues->m_counterID, &counterDesc); + + if ((counterDesc.m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER) ) + { + + if (std::find(usable_power_counters.begin(), usable_power_counters.end(), pSampleData[0].m_counterValues->m_counterID) != usable_power_counters.end() == false) { + AMDP_names.push_back(counterDesc.m_name); + usable_power_counters.push_back(pSampleData[0].m_counterValues->m_counterID); + // cout << counterDesc.m_name << " P " << pSampleData[0].m_counterValues->m_counterID << pSampleData[0].m_counterValues->m_counterID << endl; + } + } + if ((counterDesc.m_category == AMDT_PWR_CATEGORY_TEMPERATURE) ) + { + + if (std::find(usable_temp_counters.begin(), usable_temp_counters.end(), pSampleData[0].m_counterValues->m_counterID) != usable_temp_counters.end() == false) { + AMDT_names.push_back(counterDesc.m_name); + usable_temp_counters.push_back(pSampleData[0].m_counterValues->m_counterID); + // cout << counterDesc.m_name << " T " << pSampleData[0].m_counterValues->m_counterID << endl; + } + } + pSampleData[i].m_counterValues++; + } + + } + + + } + //restart AMD profiling + AMDTPwrStopProfiling(); + AMDTPwrProfileClose(); + hResult = AMDTPwrProfileInitialize(AMDT_PWR_MODE_TIMELINE_ONLINE); + AMDTPwrSetTimerSamplingPeriod(sample_rate); + //reenable usable counters + for (AMDTUInt32 idx = 0; idx < usable_power_counters.size(); idx++) + { + AMDTPwrEnableCounter(usable_power_counters.at(idx)); + } + for (AMDTUInt32 idx = 0; idx < usable_temp_counters.size(); idx++) + { + AMDTPwrEnableCounter(usable_temp_counters.at(idx)); + } + AMDTPwrStartProfiling(); + return true; } std::vector amd_temp_time; @@ -175,39 +175,39 @@ std::vector amd_temp[10]; //Socket 0 void amd_log_power_func() { - while ((amd_log_power == true) || (amd_log_temp == true)) - { - timeval rawtime; + while ((amd_log_power == true)|| (amd_log_temp == true)) + { + timeval rawtime; - std::this_thread::sleep_for(std::chrono::milliseconds(amd_power_rate)); + std::this_thread::sleep_for(std::chrono::milliseconds(amd_power_rate)); - AMDTResult hResult = AMDT_STATUS_OK; - AMDTPwrSample* pSampleData = nullptr; - AMDTUInt32 nbrSamples = 0; + AMDTResult hResult = AMDT_STATUS_OK; + AMDTPwrSample* pSampleData = nullptr; + AMDTUInt32 nbrSamples = 0; - gettimeofday(&rawtime, NULL); - hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData); + gettimeofday(&rawtime, NULL); + hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData); - if ((nullptr != pSampleData) && (nbrSamples > 0)) - { - for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned - { + if ((nullptr != pSampleData) && (nbrSamples > 0)) + { + for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned + { - AMDTPwrCounterDesc counterDesc; - AMDTPwrGetCounterDesc(pSampleData[0].m_counterValues->m_counterID, &counterDesc); - if ((counterDesc.m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER)) - { - amd_power[i].push_back(pSampleData[0].m_counterValues->m_data); - } - if ((counterDesc.m_category == AMDT_PWR_CATEGORY_TEMPERATURE)) - { - amd_temp[i].push_back(pSampleData[0].m_counterValues->m_data); - } - pSampleData[i].m_counterValues++; - } - amd_power_time.push_back(timeval2storage(rawtime)); + AMDTPwrCounterDesc counterDesc; + AMDTPwrGetCounterDesc(pSampleData[0].m_counterValues->m_counterID, &counterDesc); + if ((counterDesc.m_category == AMDT_PWR_CATEGORY_CORRELATED_POWER)) + { + amd_power[i].push_back(pSampleData[0].m_counterValues->m_data); } + if ((counterDesc.m_category == AMDT_PWR_CATEGORY_TEMPERATURE)) + { + amd_temp[i].push_back(pSampleData[0].m_counterValues->m_data); + } + pSampleData[i].m_counterValues++; + } + amd_power_time.push_back(timeval2storage(rawtime)); } + } } @@ -215,8 +215,7 @@ void amd_log_power_func() void amd_log_temp_func() { - while (amd_log_temp == true) - { + while (amd_log_temp == true) { timeval rawtime; std::this_thread::sleep_for(std::chrono::milliseconds(amd_temp_rate)); @@ -228,8 +227,7 @@ void amd_log_temp_func() gettimeofday(&rawtime, NULL); hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData); - if ((nullptr != pSampleData) && (nbrSamples > 0)) - { + if ((nullptr != pSampleData) && (nbrSamples > 0)) { for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned { @@ -261,85 +259,81 @@ std::vector intel_power_time; std::vector intel_temp_time; std::vector intel_power0[5]; //Socket 0 std::vector intel_power1[5]; //Socket 1 -std::vector MSR_names{ "package", "cores", "DRAM", "GT" }; +std::vector MSR_names { "package", "cores", "DRAM", "GT" }; std::vector intel_temp0; std::vector intel_temp1; Rapl *rapl; -void intel_log_power_func() -{ - timeval rawtime; - - if (intel_power_rate > 0) - { - uint64_t pkg; - uint64_t pp0; - uint64_t pp1; - uint64_t dram; - - intel_power0[0].clear(); - intel_power0[1].clear(); - intel_power0[2].clear(); - intel_power0[3].clear(); - intel_power1[0].clear(); - intel_power1[1].clear(); - intel_power1[2].clear(); - intel_power1[3].clear(); - intel_power_time.clear(); - - while (intel_log_power == true) - { - rapl->sample(); - std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate / 2)); - gettimeofday(&rawtime, NULL); - std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate / 2)); - intel_power_time.push_back(timeval2storage(rawtime)); - - rapl->get_socket0_data(pkg, pp0, pp1, dram); - intel_power0[0].push_back(pkg); - intel_power0[1].push_back(pp0); - intel_power0[2].push_back(dram); - intel_power0[3].push_back(pp1); - - if (rapl->detect_socket1() == true) - { - rapl->get_socket1_data(pkg, pp0, pp1, dram); - intel_power1[0].push_back(pkg); - intel_power1[1].push_back(pp0); - intel_power1[2].push_back(dram); - intel_power1[3].push_back(pp1); - } - } - } +void intel_log_power_func() { + timeval rawtime; + + if (intel_power_rate > 0) + { + uint64_t pkg; + uint64_t pp0; + uint64_t pp1; + uint64_t dram; + + intel_power0[0].clear(); + intel_power0[1].clear(); + intel_power0[2].clear(); + intel_power0[3].clear(); + intel_power1[0].clear(); + intel_power1[1].clear(); + intel_power1[2].clear(); + intel_power1[3].clear(); + intel_power_time.clear(); + + while (intel_log_power == true) { + rapl->sample(); + std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate / 2)); + gettimeofday(&rawtime, NULL); + std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate / 2)); + intel_power_time.push_back(timeval2storage(rawtime)); + + rapl->get_socket0_data(pkg, pp0, pp1, dram); + intel_power0[0].push_back(pkg); + intel_power0[1].push_back(pp0); + intel_power0[2].push_back(dram); + intel_power0[3].push_back(pp1); + + if (rapl->detect_socket1() == true) { + rapl->get_socket1_data(pkg, pp0, pp1, dram); + intel_power1[0].push_back(pkg); + intel_power1[1].push_back(pp0); + intel_power1[2].push_back(dram); + intel_power1[3].push_back(pp1); + } + } + } } void intel_log_temp_func() { - uint32_t temp0 = 0, temp1 = 0; - timeval rawtime; + uint32_t temp0 = 0, temp1 = 0; + timeval rawtime; - if (intel_temp_rate > 0) - { - intel_temp0.clear(); - intel_temp1.clear(); - intel_temp_time.clear(); + if (intel_temp_rate > 0) { + intel_temp0.clear(); + intel_temp1.clear(); + intel_temp_time.clear(); - while (intel_log_temp == true) { - std::this_thread::sleep_for(std::chrono::milliseconds(intel_temp_rate)); + while (intel_log_temp == true) { + std::this_thread::sleep_for(std::chrono::milliseconds(intel_temp_rate)); - gettimeofday(&rawtime, NULL); - temp0 = rapl->get_temp0(); - if (rapl->detect_socket1() == true) - { - temp1 = rapl->get_temp1(); - intel_temp1.push_back(temp1); - } + gettimeofday(&rawtime, NULL); + temp0 = rapl->get_temp0(); + if (rapl->detect_socket1() == true) + { + temp1 = rapl->get_temp1(); + intel_temp1.push_back(temp1); + } - intel_temp_time.push_back(timeval2storage(rawtime)); - intel_temp0.push_back(temp0); - } + intel_temp_time.push_back(timeval2storage(rawtime)); + intel_temp0.push_back(temp0); } + } } #endif // USEIRAPL @@ -353,8 +347,8 @@ Rapl *rapl; std::string utf16ToUtf8(const std::wstring& utf16Str) { - std::wstring_convert> conv; - return conv.to_bytes(utf16Str); + std::wstring_convert> conv; + return conv.to_bytes(utf16Str); } std::vector intel_power_time; @@ -371,51 +365,48 @@ std::vector intel_temp; void intel_log_temp_func() { - // int temp; - timeval rawtime; + // int temp; + timeval rawtime; - if (intel_temp_rate > 0) - { - intel_temp.clear(); - intel_temp_time.clear(); + if (intel_temp_rate > 0) { + intel_temp.clear(); + intel_temp_time.clear(); - while (intel_log_temp == true) - { - std::this_thread::sleep_for(std::chrono::milliseconds(intel_temp_rate)); - intel_temp.push_back(rapl->get_temp0()); - gettimeofday(&rawtime, NULL); - intel_temp_time.push_back(timeval2storage(rawtime)); + while (intel_log_temp == true) { + std::this_thread::sleep_for(std::chrono::milliseconds(intel_temp_rate)); + intel_temp.push_back(rapl->get_temp0()); + gettimeofday(&rawtime, NULL); + intel_temp_time.push_back(timeval2storage(rawtime)); - } } + } } void intel_log_power_func() { - double data[3]; - int nData; - timeval rawtime; - - if (intel_power_rate > 0) - { - for (unsigned int i = 0; i < MSR.size(); i++) { - intel_power[i].clear(); - } - intel_power_time.clear(); - - while (intel_log_power == true) - { - std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate)); - rapl->sample(); - gettimeofday(&rawtime, NULL); - intel_power_time.push_back(timeval2storage(rawtime)); - - for (unsigned int i = 0; i < MSR.size(); i++) { - rapl->GetPowerData(0, MSR.at(i), data, &nData); - intel_power[i].push_back((float)data[0]); - } - } - } + double data[3]; + int nData; + timeval rawtime; + + if (intel_power_rate > 0) + { + for (unsigned int i = 0; i < MSR.size(); i++) { + intel_power[i].clear(); + } + intel_power_time.clear(); + + while (intel_log_power == true) { + std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate)); + rapl->sample(); + gettimeofday(&rawtime, NULL); + intel_power_time.push_back(timeval2storage(rawtime)); + + for (unsigned int i = 0; i < MSR.size(); i++) { + rapl->GetPowerData(0, MSR.at(i), data, &nData); + intel_power[i].push_back((float)data[0]); + } + } + } } @@ -424,6 +415,7 @@ void intel_log_power_func() + #if defined(USENVML) #include bool nvidia_log_power = false; @@ -439,8 +431,7 @@ std::vector nvidia_power_time; void nvidia_log_power_func() { - if (nvidia_power_rate > 0) - { + if (nvidia_power_rate > 0) { unsigned int temp; nvmlReturn_t result; timeval rawtime; @@ -448,8 +439,7 @@ void nvidia_log_power_func() nvidia_power.clear(); nvidia_power_time.clear(); - while (nvidia_log_power == true) - { + while (nvidia_log_power == true) { std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_power_rate)); nvmlDeviceGetPowerUsage(device, &temp); @@ -463,8 +453,7 @@ void nvidia_log_power_func() } } -void nvidia_log_temp_func() -{ +void nvidia_log_temp_func() { if (nvidia_temp_rate > 0) { unsigned int temp; @@ -498,20 +487,20 @@ typedef LONG NTSTATUS, *PNTSTATUS; typedef NTSTATUS(WINAPI* RtlGetVersionPtr)(PRTL_OSVERSIONINFOEXW); RTL_OSVERSIONINFOEXW GetRealOSVersion() { - HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); - if (hMod) { - RtlGetVersionPtr fxPtr = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); - if (fxPtr != nullptr) { - RTL_OSVERSIONINFOEXW rovi = { 0 }; - rovi.dwOSVersionInfoSize = sizeof(rovi); - if (STATUS_SUCCESS == fxPtr(&rovi)) { - return rovi; - } - } - } - - RTL_OSVERSIONINFOEXW rovi = { 0 }; - return rovi; + HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); + if (hMod) { + RtlGetVersionPtr fxPtr = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); + if (fxPtr != nullptr) { + RTL_OSVERSIONINFOEXW rovi = { 0 }; + rovi.dwOSVersionInfoSize = sizeof(rovi); + if (STATUS_SUCCESS == fxPtr(&rovi)) { + return rovi; + } + } + } + + RTL_OSVERSIONINFOEXW rovi = { 0 }; + return rovi; } #else #include @@ -519,881 +508,924 @@ RTL_OSVERSIONINFOEXW GetRealOSVersion() { std::string getOS() { - std::stringstream version; + std::stringstream version; #if defined(_WIN32) - version << "Windows " << GetRealOSVersion().dwMajorVersion << "." << GetRealOSVersion().dwMinorVersion; + version << "Windows " << GetRealOSVersion().dwMajorVersion << "." << GetRealOSVersion().dwMinorVersion; - if (GetRealOSVersion().wProductType == VER_NT_WORKSTATION) { - version << " Workstation"; - } - else { - version << " Server"; - } + if (GetRealOSVersion().wProductType == VER_NT_WORKSTATION) { + version << " Workstation"; + } + else { + version << " Server"; + } #elif defined(__APPLE__) - char line[256]; - string product_name, product_version; - FILE* sw_vers = popen("sw_vers", "r"); - while (fgets(&line[0], sizeof(line), sw_vers) != nullptr) { - if (strncmp(line, "ProductName:", 12) == 0) { - product_name = string(&line[13]); - product_name.pop_back(); // erase the newline - } - else if (strncmp(line, "ProductVersion:", 15) == 0) { - product_version = string(&line[16]); - product_version.pop_back(); // erase the newline - } + char line[256]; + string product_name, product_version; + FILE* sw_vers = popen("sw_vers", "r"); + while (fgets(&line[0], sizeof(line), sw_vers) != nullptr) { + if (strncmp(line, "ProductName:", 12) == 0) { + product_name = string(&line[13]); + product_name.pop_back(); // erase the newline + } + else if (strncmp(line, "ProductVersion:", 15) == 0) { + product_version = string(&line[16]); + product_version.pop_back(); // erase the newline } - pclose(sw_vers); - version << product_name << " " << product_version; + } + pclose(sw_vers); + version << product_name << " " << product_version; #else // linux - struct utsname unameData; - uname(&unameData); - string line; + struct utsname unameData; + uname(&unameData); + string line; - version << unameData.sysname << " "; - - ifstream rel_file("/etc/os-release"); - if (rel_file.is_open()) { - while (rel_file.good()) { - getline(rel_file, line); - if (line.size() >= 1 && line.substr(0, 11) == "PRETTY_NAME") { - version << line.substr(13, line.length() - 14); - break; - } - } + version << unameData.sysname << " "; - rel_file.close(); - } - else { - version << "Unknown Distribution"; + ifstream rel_file("/etc/os-release"); + if (rel_file.is_open()) { + while (rel_file.good()) { + getline(rel_file, line); + if (line.size() >= 1 && line.substr(0, 11) == "PRETTY_NAME") { + version << line.substr(13, line.length() - 14); + break; + } } - version << "/" << unameData.release << "/" << unameData.version; + rel_file.close(); + } + else { + version << "Unknown Distribution"; + } -#endif - - return version.str(); -} + version << "/" << unameData.release << "/" << unameData.version; +#endif -// command line arguments -char const* getCmdOption(char** begin, char** end, std::string const& option) -{ - char** itr = find(begin, end, option); - if (itr != end && ++itr != end) { - return *itr; - } - return 0; + return version.str(); } -bool cmdOptionExists(char** begin, char** end, const std::string& option) -{ - return find(begin, end, option) != end; -} void print_help() { - cout - << "Usage: toolkitICL [options] -c config.h5" << endl - << "Options:" << endl - << " -d device_id: \n" - " Use the device specified by `device_id`." << endl - << " -b: \n" - " Activate the benchmark mode (additional delay before & after runs)." << endl - << " -c config.h5: \n" - " Specify the URL `config.h5` of the HDF5 configuration file." << endl + cout + << "Usage: toolkitICL [options] -c config.h5" << endl + << "Options:" << endl + << " -d device_id: \n" + " Use the device specified by `device_id`." << endl + << " -b: \n" + " Activate the benchmark mode (additional delay before & after runs)." << endl + << " -c config.h5: \n" + " Specify the URL `config.h5` of the HDF5 configuration file." << endl #if defined(USENVML) - << " -nvidia_power sample_rate: \n" - " Log Nvidia GPU power consumption with `sample_rate` (ms)" << endl - << " -nvidia_temp sample_rate: \n" - " Log Nvidia GPU temperature with `sample_rate` (ms)" << endl + << " -nvidia_power sample_rate: \n" + " Log Nvidia GPU power consumption with `sample_rate` (ms)" << endl + << " -nvidia_temp sample_rate: \n" + " Log Nvidia GPU temperature with `sample_rate` (ms)" << endl #endif #if defined(USEIPG) || defined(USEIRAPL) - << " -intel_power sample_rate: \n" - " Log Intel system power consumption with `sample_rate` (ms)" << endl - << " -intel_temp sample_rate: \n" - " Log Intel package temperature with `sample_rate` (ms)" << endl + << " -intel_power sample_rate: \n" + " Log Intel system power consumption with `sample_rate` (ms)" << endl + << " -intel_temp sample_rate: \n" + " Log Intel package temperature with `sample_rate` (ms)" << endl #endif #if defined(USEAMDP) - << " -amd_cpu_power sample_rate: \n" - " Log AMD CPU power consumption with `sample_rate` (ms)" << endl - << " -amd_cpu_temp sample_rate: \n" - " Log AMD CPU temperaturen with `sample_rate` (ms)" << endl + << " -amd_cpu_power sample_rate: \n" + " Log AMD CPU power consumption with `sample_rate` (ms)" << endl + << " -amd_cpu_temp sample_rate: \n" + " Log AMD CPU temperaturen with `sample_rate` (ms)" << endl #endif - << endl; + << endl; } int main(int argc, char *argv[]) { - Timer timer; //used to track performance + Timer timer; //used to track performance - cl_uint deviceIndex = 0; // set default OpenCL Device + ocl_dev_mgr& dev_mgr = ocl_dev_mgr::getInstance(); + cl_uint devices_availble = dev_mgr.get_avail_dev_num(); - ocl_dev_mgr& dev_mgr = ocl_dev_mgr::getInstance(); - cl_uint devices_availble = dev_mgr.get_avail_dev_num(); + cout << "Available OpenCL devices: " << devices_availble << endl; + // default options + cl_uint deviceIndex = 0; + bool benchmark_mode = false; + char const* filename = nullptr; - // parse command line arguments - bool benchmark_mode = false; - if (cmdOptionExists(argv, argv + argc, "-b")) { - benchmark_mode = true; - cout << "Benchmark mode" << endl << endl; + // parse command line arguments starting at index 1 (because toolkitICL is the 0th argument) + for (int option_idx = 1; option_idx < argc; ++option_idx) + { + if (argv[option_idx] == string("-h")) { + print_help(); + return 0; } - else { - cout << "Available OpenCL devices: " << devices_availble << endl; + else if (argv[option_idx] == string("-b")) { + benchmark_mode = true; + cout << "Benchmark mode" << endl << endl; } - - if (cmdOptionExists(argv, argv + argc, "-d")) { - char const* dev_id = getCmdOption(argv, argv + argc, "-d"); - deviceIndex = atoi(dev_id); + else if (argv[option_idx] == string("-d")) { + ++option_idx; + try { + deviceIndex = stoi(argv[option_idx]); + } + catch (const std::exception& e) { + cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; + throw(e); + } } - - if (cmdOptionExists(argv, argv + argc, "-h") || !cmdOptionExists(argv, argv + argc, "-c")) { - print_help(); - return 0; + else if (argv[option_idx] == string("-c")) { + ++option_idx; + filename = argv[option_idx]; } - char const* filename = getCmdOption(argv, argv + argc, "-c"); - #if defined(USENVML) - if (cmdOptionExists(argv, argv + argc, "-nvidia_power")) { - char const* tmp = getCmdOption(argv, argv + argc, "-nvidia_power"); - nvidia_power_rate = atoi(tmp); - nvidia_log_power = true; - } - - if (cmdOptionExists(argv, argv + argc, "-nvidia_temp")) { - char const* tmp = getCmdOption(argv, argv + argc, "-nvidia_temp"); - nvidia_temp_rate = atoi(tmp); - nvidia_log_temp = true; - } -#endif + else if (argv[option_idx] == string("-nvidia_power")) { + ++option_idx; + try { + nvidia_power_rate = stoi(argv[option_idx]); + } + catch (const std::exception& e) { + cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; + throw(e); + } + nvidia_log_power = true; + } + else if (argv[option_idx] == string("-nvidia_temp") || argv[option_idx] == string("-nvidia_temperature")) { + ++option_idx; + try { + nvidia_temp_rate = stoi(argv[option_idx]); + } + catch (const std::exception& e) { + cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; + throw(e); + } + nvidia_log_temp = true; + } +#endif // defined(USENVML) #if defined(USEIPG) || defined(USEIRAPL) - if (cmdOptionExists(argv, argv + argc, "-intel_power")) { - char const* tmp = getCmdOption(argv, argv + argc, "-intel_power"); - intel_power_rate = atoi(tmp); - intel_log_power = true; - } - if (cmdOptionExists(argv, argv + argc, "-intel_temp")) { - char const* tmp = getCmdOption(argv, argv + argc, "-intel_temp"); - intel_temp_rate = atoi(tmp); - intel_log_temp = true; - } -#endif + else if (argv[option_idx] == string("-intel_power")) { + ++option_idx; + try { + intel_power_rate = stoi(argv[option_idx]); + } + catch (const std::exception& e) { + cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; + throw(e); + } + intel_log_power = true; + } + else if (argv[option_idx] == string("-intel_temp") || argv[option_idx] == string("-intel_temperature")) { + ++option_idx; + try { + intel_temp_rate = stoi(argv[option_idx]); + } + catch (const std::exception& e) { + cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; + throw(e); + } + intel_log_temp = true; + } +#endif // defined(USEIPG) || defined(USEIRAPL) #if defined(USEAMDP) - if (cmdOptionExists(argv, argv + argc, "-amd_cpu_power")) { - char const* tmp = getCmdOption(argv, argv + argc, "-amd_cpu_power"); - amd_power_rate = atoi(tmp); - amd_log_power = true; - } - if (cmdOptionExists(argv, argv + argc, "-amd_cpu_temp")) { - char const* tmp = getCmdOption(argv, argv + argc, "-amd_cpu_temp"); - amd_temp_rate = atoi(tmp); - amd_log_temp = true; - } - - if ((amd_log_temp == true) && (amd_log_power == true)) { - cout << endl << "Concurrent logging on AMD systems is not suported, yet!" << endl << endl; - } - -#endif - - cout << dev_mgr.get_avail_dev_info(deviceIndex).name.c_str() << endl; - if (benchmark_mode == false) { - cout << "OpenCL version: " << dev_mgr.get_avail_dev_info(deviceIndex).ocl_version.c_str() << endl; - cout << "Memory limit: " << dev_mgr.get_avail_dev_info(deviceIndex).max_mem << endl; - cout << "WG limit: " << dev_mgr.get_avail_dev_info(deviceIndex).wg_size << endl << endl; - } - dev_mgr.init_device(deviceIndex); - - string kernel_url; - if (h5_check_object(filename, "kernel_url") == true) { - h5_read_string(filename, "kernel_url", kernel_url); - if (benchmark_mode == false) { - cout << "Reading kernel from file: " << kernel_url << "... " << endl; - } - } - else if (h5_check_object(filename, "kernel_source") == true) { - if (benchmark_mode == false) { - cout << "Reading kernel from HDF5 file... " << endl; - } - std::vector kernel_source; - h5_read_strings(filename, "kernel_source", kernel_source); - ofstream tmp_clfile; - tmp_clfile.open("tmp_kernel.cl"); - for (string const& kernel : kernel_source) { - tmp_clfile << kernel << endl; - } - - tmp_clfile.close(); - kernel_url = string("tmp_kernel.cl"); - } + else if (argv[option_idx] == string("-amd_cpu_power")) { + ++option_idx; + try { + amd_power_rate = stoi(argv[option_idx]); + } + catch (const std::exception& e) { + cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; + throw(e); + } + amd_log_power = true; + } + else if (argv[option_idx] == string("-amd_cpu_temp") || argv[option_idx] == string("-amd_cpu_temperature")) { + ++option_idx; + try { + amd_temp_rate = stoi(argv[option_idx]); + } + catch (const std::exception& e) { + cerr << "Error: Could not convert '" << argv[option_idx] << "' to an integer." << endl; + throw(e); + } + amd_log_temp = true; + } +#endif // defined(USEAMDP) else { - cerr << "No kernel information found! " << endl; - return -1; - } - - std::vector kernel_list; - h5_read_strings(filename, "kernels", kernel_list); - - cl_ulong kernel_repetitions = 1; - if (h5_check_object(filename, "settings/kernel_repetitions")) { - kernel_repetitions = h5_read_single(filename, "settings/kernel_repetitions"); - } - if (kernel_repetitions <= 0) { - cout << "Warning: Setting `kernel_repetitions = " << kernel_repetitions << "` implies that no kernels are executed." << endl; - } - - dev_mgr.add_program_url(0, "ocl_Kernel", kernel_url); - - string settings; - h5_read_string(filename, "settings/kernel_settings", settings); - - - uint64_t num_kernels_found = 0; - num_kernels_found = dev_mgr.compile_kernel(0, "ocl_Kernel", settings); - if (num_kernels_found == 0) { - cerr << ERROR_INFO << "No valid kernels found" << endl; - return -1; - } - - std::vector found_kernels; - dev_mgr.get_kernel_names(0, "ocl_Kernel", found_kernels); - if (benchmark_mode == false) { - cout << "Found Kernels: " << found_kernels.size() << endl; + cerr << "Error: Unknown command line option '" << argv[option_idx] << "'." << endl; + print_help(); + return -1; } - if (found_kernels.size() == 0) { - cerr << ERROR_INFO << "No valid kernels found." << endl; - return -1; - } - - cout << "Number of Kernels to execute: " << kernel_list.size() * kernel_repetitions << endl; - - //TODO: Clean up; debug mode? - // for (uint32_t kernel_idx = 0; kernel_idx < kernel_list.size(); kernel_idx++) { - // cout <<"Found : "<< kernel_list.at(kernel_idx) << endl; - // } - - cout << "Ingesting HDF5 config file..." << endl; - - std::vector data_names; - std::vector data_types; - std::vector data_sizes; - h5_get_content(filename, "/data/", data_names, data_types, data_sizes); - - cout << "Creating output HDF5 file..." << endl; - string out_name = "out_" + string(filename); - - if (fileExists(out_name)) { - remove(out_name.c_str()); - cout << "Old HDF5 data file found and deleted!" << endl; - } - - h5_create_dir(out_name, "/settings"); - h5_write_string(out_name, "/settings/kernel_settings", settings); - h5_write_single(out_name, "/settings/kernel_repetitions", kernel_repetitions); - - std::vector data_in; - bool blocking = CL_TRUE; - - //TODO: Implement functionality! Allow other integer types instead of cl_int? - vector data_rw_flags(data_names.size(), 0); - - uint64_t push_time, pull_time; - push_time = timer.getTimeMicroseconds(); - - for (cl_uint i = 0; i < data_names.size(); i++) { - try { - uint8_t *tmp_data = nullptr; - size_t var_size = 0; - - switch (data_types.at(i)) { - case H5_float: - var_size = data_sizes.at(i) * sizeof(float); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (float*)tmp_data); - break; - case H5_double: - var_size = data_sizes.at(i) * sizeof(double); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (double*)tmp_data); - break; - case H5_char: - var_size = data_sizes.at(i) * sizeof(cl_char); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_char*)tmp_data); - break; - case H5_uchar: - var_size = data_sizes.at(i) * sizeof(cl_uchar); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_uchar*)tmp_data); - break; - case H5_short: - var_size = data_sizes.at(i) * sizeof(cl_short); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_short*)tmp_data); - break; - case H5_ushort: - var_size = data_sizes.at(i) * sizeof(cl_ushort); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_ushort*)tmp_data); - break; - case H5_int: - var_size = data_sizes.at(i) * sizeof(cl_int); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_int*)tmp_data); - break; - case H5_uint: - var_size = data_sizes.at(i) * sizeof(cl_uint); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_uint*)tmp_data); - break; - case H5_long: - var_size = data_sizes.at(i) * sizeof(cl_long); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_long*)tmp_data); - break; - case H5_ulong: - var_size = data_sizes.at(i) * sizeof(cl_ulong); - tmp_data = new uint8_t[var_size]; - h5_read_buffer(filename, data_names.at(i).c_str(), (cl_ulong*)tmp_data); - break; - default: - cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; - break; - } - - switch (data_rw_flags.at(i)) { - case 0: - data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, var_size)); - dev_mgr.get_queue(0, 0).enqueueWriteBuffer(data_in.back(), blocking, 0, var_size, tmp_data); - break; - case 1: - data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, var_size)); - dev_mgr.get_queue(0, 0).enqueueWriteBuffer(data_in.back(), blocking, 0, var_size, tmp_data); - break; - case 2: - data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, var_size)); - break; - } - - for (uint32_t kernel_idx = 0; kernel_idx < found_kernels.size(); kernel_idx++) { - dev_mgr.getKernelbyName(0, "ocl_Kernel", found_kernels.at(kernel_idx))->setArg(i, data_in.back()); - } - - if (tmp_data != nullptr) { - delete[] tmp_data; tmp_data = nullptr; - } - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception: " << err.what() << std::endl; - } - } - - dev_mgr.get_queue(0, 0).finish(); // Buffer Copy is asynchronous - - push_time = timer.getTimeMicroseconds() - push_time; - - if (benchmark_mode == false) { - cout << "Setting range..." << endl; - } - - cl::NDRange range_start; - cl::NDRange global_range; - cl::NDRange local_range; + } - //TODO: Allow other integer types instead of cl_int? - cl_int tmp_range[3]; - h5_read_buffer(filename, "/settings/global_range", tmp_range); - global_range = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); - h5_write_buffer(out_name, "/settings/global_range", tmp_range, 3); + // check necessary/incompatible command line arguments + if (filename == nullptr) { + cerr << "Error: A configuration file must be given as command line argument." << endl; + print_help(); + return -1; + } - h5_read_buffer(filename, "/settings/range_start", tmp_range); - range_start = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); - h5_write_buffer(out_name, "/settings/range_start", tmp_range, 3); - - h5_read_buffer(filename, "/settings/local_range", tmp_range); - h5_write_buffer(out_name, "/settings/local_range", tmp_range, 3); - if ((tmp_range[0] == 0) && (tmp_range[1] == 0) && (tmp_range[2] == 0)) { - local_range = cl::NullRange; - } - else { - local_range = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); - } +#if defined(USEAMDP) + if (amd_log_temp && amd_log_power) { + cerr << endl << "Error: Concurrent logging on AMD systems is not suported, yet!" << endl; + return -1; + } +#endif // defined(USEAMDP) + + + cout << dev_mgr.get_avail_dev_info(deviceIndex).name.c_str() << endl; + if (benchmark_mode == false) { + cout << "OpenCL version: " << dev_mgr.get_avail_dev_info(deviceIndex).ocl_version.c_str() << endl; + cout << "Memory limit: " << dev_mgr.get_avail_dev_info(deviceIndex).max_mem << endl; + cout << "WG limit: " << dev_mgr.get_avail_dev_info(deviceIndex).wg_size << endl << endl; + } + dev_mgr.init_device(deviceIndex); + + string kernel_url; + if (h5_check_object(filename, "kernel_url") == true) { + h5_read_string(filename, "kernel_url", kernel_url); + if (benchmark_mode == false) { + cout << "Reading kernel from file: " << kernel_url << "... " << endl; + } + } + else if (h5_check_object(filename, "kernel_source") == true) { + if (benchmark_mode == false) { + cout << "Reading kernel from HDF5 file... " << endl; + } + std::vector kernel_source; + h5_read_strings(filename, "kernel_source", kernel_source); + ofstream tmp_clfile; + tmp_clfile.open("tmp_kernel.cl"); + for (string const& kernel : kernel_source) { + tmp_clfile << kernel << endl; + } + + tmp_clfile.close(); + kernel_url = string("tmp_kernel.cl"); + } + else { + cerr << "No kernel information found! " << endl; + return -1; + } + + std::vector kernel_list; + h5_read_strings(filename, "kernels", kernel_list); + + cl_ulong kernel_repetitions = 1; + if (h5_check_object(filename, "settings/kernel_repetitions")) { + kernel_repetitions = h5_read_single(filename, "settings/kernel_repetitions"); + } + if (kernel_repetitions <= 0) { + cout << "Warning: Setting `kernel_repetitions = " << kernel_repetitions << "` implies that no kernels are executed." << endl; + } + + dev_mgr.add_program_url(0, "ocl_Kernel", kernel_url); + + string settings; + h5_read_string(filename, "settings/kernel_settings", settings); + + + uint64_t num_kernels_found = 0; + num_kernels_found = dev_mgr.compile_kernel(0, "ocl_Kernel", settings); + if (num_kernels_found == 0) { + cerr << ERROR_INFO << "No valid kernels found" << endl; + return -1; + } + + std::vector found_kernels; + dev_mgr.get_kernel_names(0, "ocl_Kernel", found_kernels); + if (benchmark_mode == false) { + cout << "Found Kernels: " << found_kernels.size() << endl; + } + if (found_kernels.size() == 0) { + cerr << ERROR_INFO << "No valid kernels found." << endl; + return -1; + } + + cout << "Number of Kernels to execute: " << kernel_list.size() * kernel_repetitions << endl; + + //TODO: Clean up; debug mode? + // for (uint32_t kernel_idx = 0; kernel_idx < kernel_list.size(); kernel_idx++) { + // cout <<"Found : "<< kernel_list.at(kernel_idx) << endl; + // } + + cout << "Ingesting HDF5 config file..." << endl; + + std::vector data_names; + std::vector data_types; + std::vector data_sizes; + h5_get_content(filename, "/data/", data_names, data_types, data_sizes); + + cout << "Creating output HDF5 file..." << endl; + string out_name = filename; + out_name = "out_" + out_name.substr(out_name.find_last_of("/\\") + 1); + + if (fileExists(out_name)) { + remove(out_name.c_str()); + cout << "Old HDF5 data file found and deleted!" << endl; + } + + h5_create_dir(out_name, "/settings"); + h5_write_string(out_name, "/settings/kernel_settings", settings); + h5_write_single(out_name, "/settings/kernel_repetitions", kernel_repetitions); + + std::vector data_in; + bool blocking = CL_TRUE; + + //TODO: Implement functionality! Allow other integer types instead of cl_int? + vector data_rw_flags(data_names.size(), 0); + + uint64_t push_time, pull_time; + push_time = timer.getTimeMicroseconds(); + + for (cl_uint i = 0; i < data_names.size(); i++) { + try { + uint8_t *tmp_data = nullptr; + size_t var_size = 0; + + switch (data_types.at(i)) { + case H5_float: + var_size = data_sizes.at(i) * sizeof(float); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (float*)tmp_data); + break; + case H5_double: + var_size = data_sizes.at(i) * sizeof(double); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (double*)tmp_data); + break; + case H5_char: + var_size = data_sizes.at(i) * sizeof(cl_char); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_char*)tmp_data); + break; + case H5_uchar: + var_size = data_sizes.at(i) * sizeof(cl_uchar); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_uchar*)tmp_data); + break; + case H5_short: + var_size = data_sizes.at(i) * sizeof(cl_short); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_short*)tmp_data); + break; + case H5_ushort: + var_size = data_sizes.at(i) * sizeof(cl_ushort); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_ushort*)tmp_data); + break; + case H5_int: + var_size = data_sizes.at(i) * sizeof(cl_int); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_int*)tmp_data); + break; + case H5_uint: + var_size = data_sizes.at(i) * sizeof(cl_uint); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_uint*)tmp_data); + break; + case H5_long: + var_size = data_sizes.at(i) * sizeof(cl_long); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_long*)tmp_data); + break; + case H5_ulong: + var_size = data_sizes.at(i) * sizeof(cl_ulong); + tmp_data = new uint8_t[var_size]; + h5_read_buffer(filename, data_names.at(i).c_str(), (cl_ulong*)tmp_data); + break; + default: + cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; + break; + } + + switch (data_rw_flags.at(i)) { + case 0: + data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, var_size)); + dev_mgr.get_queue(0, 0).enqueueWriteBuffer(data_in.back(), blocking, 0, var_size, tmp_data); + break; + case 1: + data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, var_size)); + dev_mgr.get_queue(0, 0).enqueueWriteBuffer(data_in.back(), blocking, 0, var_size, tmp_data); + break; + case 2: + data_in.push_back(cl::Buffer(dev_mgr.get_context(0), CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, var_size)); + break; + } + + for (uint32_t kernel_idx = 0; kernel_idx < found_kernels.size(); kernel_idx++) { + dev_mgr.getKernelbyName(0, "ocl_Kernel", found_kernels.at(kernel_idx))->setArg(i, data_in.back()); + } + + if (tmp_data != nullptr) { + delete[] tmp_data; tmp_data = nullptr; + } + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception: " << err.what() << std::endl; + } + } + + dev_mgr.get_queue(0, 0).finish(); // Buffer Copy is asynchronous + + push_time = timer.getTimeMicroseconds() - push_time; + + if (benchmark_mode == false) { + cout << "Setting range..." << endl; + } + + cl::NDRange range_start; + cl::NDRange global_range; + cl::NDRange local_range; + + //TODO: Allow other integer types instead of cl_int? + cl_int tmp_range[3]; + h5_read_buffer(filename, "/settings/global_range", tmp_range); + global_range = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); + h5_write_buffer(out_name, "/settings/global_range", tmp_range, 3); + + h5_read_buffer(filename, "/settings/range_start", tmp_range); + range_start = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); + h5_write_buffer(out_name, "/settings/range_start", tmp_range, 3); + + h5_read_buffer(filename, "/settings/local_range", tmp_range); + h5_write_buffer(out_name, "/settings/local_range", tmp_range, 3); + if ((tmp_range[0] == 0) && (tmp_range[1] == 0) && (tmp_range[2] == 0)) { + local_range = cl::NullRange; + } + else { + local_range = cl::NDRange(tmp_range[0], tmp_range[1], tmp_range[2]); + } #if defined(USEAMDP) - if (amd_log_power || amd_log_temp) - { - cout << "Using AMD Power Profiling interface..." << endl << endl; - h5_create_dir(out_name, "/housekeeping"); - h5_create_dir(out_name, "/housekeeping/amd"); - initAMDPP(amd_power_rate); - } - std::thread amd_log_power_thread(amd_log_power_func); - // std::thread amd_log_temp_thread(amd_log_temp_func); + if (amd_log_power||amd_log_temp) + { + cout << "Using AMD Power Profiling interface..." << endl << endl; + h5_create_dir(out_name, "/housekeeping"); + h5_create_dir(out_name, "/housekeeping/amd"); + initAMDPP(amd_power_rate); + } + std::thread amd_log_power_thread(amd_log_power_func); + // std::thread amd_log_temp_thread(amd_log_temp_func); #endif #if defined(USENVML) #if defined(_WIN32) - if (nvidia_log_power || nvidia_log_temp) - { - //Get Program Files path from system - TCHAR pf[MAX_PATH]; - SHGetSpecialFolderPath(0, pf, CSIDL_PROGRAM_FILES, FALSE); - std::string nvsmi_path; - nvsmi_path.append(pf); - nvsmi_path.append("/NVIDIA Corporation/NVSMI/nvml.dll"); - if (fileExists(nvsmi_path)) { - LoadLibraryEx(nvsmi_path.c_str(), NULL, 0); - } - else { - if (fileExists("nvml.dll")) { - LoadLibraryEx("nvml.dll", NULL, 0); - } - else { - //No NVML found abort - cout << "NVML library not found..." << endl; - nvidia_log_temp = false; - nvidia_log_power = false; - nvidia_power_rate = 0; - nvidia_temp_rate = 0; - } - } - } + if (nvidia_log_power || nvidia_log_temp) + { + //Get Program Files path from system + TCHAR pf[MAX_PATH]; + SHGetSpecialFolderPath(0,pf,CSIDL_PROGRAM_FILES,FALSE); + std::string nvsmi_path; + nvsmi_path.append(pf); + nvsmi_path.append("/NVIDIA Corporation/NVSMI/nvml.dll"); + if (fileExists(nvsmi_path)) { + LoadLibraryEx(nvsmi_path.c_str(), NULL, 0); + }else { + if (fileExists("nvml.dll")) { + LoadLibraryEx("nvml.dll", NULL, 0); + } + else { + //No NVML found abort + cout << "NVML library not found..." << endl; + nvidia_log_temp = false; + nvidia_log_power = false; + nvidia_power_rate = 0; + nvidia_temp_rate = 0; + } + } + } #endif - if (nvidia_log_power || nvidia_log_temp) - { - nvmlReturn_t result; - result = nvmlInit(); - if (NVML_SUCCESS == result) - { - cout << "Using NVML interface..." << endl; - h5_create_dir(out_name, "/housekeeping"); - h5_create_dir(out_name, "/housekeeping/nvidia"); - } - else { - cout << "NVML failure..." << endl; - nvidia_log_temp = false; - nvidia_log_power = false; - } - } - if (nvidia_log_power || nvidia_log_temp) - { - nvmlPciInfo_t nv_pciinfo; - cl_uint nvml_devnum; - cl_int nvml_devid = -1; - - nvmlDeviceGetCount(&nvml_devnum); - - for (cl_uint i = 0; i < nvml_devnum; i++) - { - nvmlDeviceGetHandleByIndex(i, &device); - nvmlDeviceGetPciInfo(device, &nv_pciinfo); - - std::ostringstream tmp_devid; - tmp_devid << nv_pciinfo.domain << ":" << nv_pciinfo.bus << ":" << nv_pciinfo.device; - //cout<< tmp_devid.str() <(out_name, "/housekeeping/intel/TDP" , (float)rapl->get_TDP(), + "Thermal Design Power in watt"); + + int numMsrs = rapl->get_NumMSR(); + + //This is necesarry for initalization + rapl->sample(); + rapl->sample(); + rapl->sample(); + + for (int j = 0; j < numMsrs; j++) { - h5_write_single(out_name, "/housekeeping/intel/TDP", (float)rapl->get_TDP(), - "Thermal Design Power in watt"); - - int numMsrs = rapl->get_NumMSR(); - - //This is necesarry for initalization - rapl->sample(); - rapl->sample(); - rapl->sample(); - - for (int j = 0; j < numMsrs; j++) - { - int funcID; - double data[3]; - int nData; - wchar_t szName[MAX_PATH]; - - rapl->GetMsrFunc(j, &funcID); - rapl->GetMsrName(j, szName); - - if ((funcID == 1)) { - MSR.push_back(j); - if (utf16ToUtf8(szName) == "Processor") { - MSR_names.push_back("package"); - } - else { - if (utf16ToUtf8(szName) == "IA") { - MSR_names.push_back("cores"); - } - else { - MSR_names.push_back(utf16ToUtf8(szName)); - } - } - } + int funcID; + double data[3]; + int nData; + wchar_t szName[MAX_PATH]; + + rapl->GetMsrFunc(j, &funcID); + rapl->GetMsrName(j, szName); + + if ((funcID == 1)) { + MSR.push_back(j); + if (utf16ToUtf8(szName) == "Processor") { + MSR_names.push_back("package"); + } + else { + if (utf16ToUtf8(szName) == "IA") { + MSR_names.push_back("cores"); + } + else { + MSR_names.push_back(utf16ToUtf8(szName)); + } + } + } - //Get Package Power Limit - if ((funcID == 3)) { - double data[3]; - int nData; - rapl->GetPowerData(0, j, data, &nData); - std::string varname = "/housekeeping/intel/" + utf16ToUtf8(szName) + "_power_limit"; - h5_write_single(out_name, varname.c_str(), data[0]); - } + //Get Package Power Limit + if ((funcID == 3) ) { + double data[3]; + int nData; + rapl->GetPowerData(0, j, data, &nData); + std::string varname = "/housekeeping/intel/" + utf16ToUtf8(szName) + "_power_limit"; + h5_write_single(out_name, varname.c_str() , data[0]); + } - } } - std::thread intel_log_power_thread(intel_log_power_func); - std::thread intel_log_temp_thread(intel_log_temp_func); + } + std::thread intel_log_power_thread(intel_log_power_func); + std::thread intel_log_temp_thread(intel_log_temp_func); #endif #if defined(USEIRAPL) - if (intel_log_power || intel_log_temp) - { - cout << "Using Intel MSR interface..." << endl; - h5_create_dir(out_name, "/housekeeping"); - h5_create_dir(out_name, "/housekeeping/intel"); - rapl = new Rapl(); - } - - if (intel_log_power) - { - h5_write_single(out_name, "/housekeeping/intel/TDP", (float)rapl->get_TDP(), - "Thermal Design Power in watt"); - } - - std::thread intel_log_power_thread(intel_log_power_func); - std::thread intel_log_temp_thread(intel_log_temp_func); + if (intel_log_power || intel_log_temp) + { + cout << "Using Intel MSR interface..." << endl; + h5_create_dir(out_name, "/housekeeping"); + h5_create_dir(out_name, "/housekeeping/intel"); + rapl = new Rapl(); + } + + if (intel_log_power) + { + h5_write_single(out_name, "/housekeeping/intel/TDP", (float)rapl->get_TDP(), + "Thermal Design Power in watt"); + } + + std::thread intel_log_power_thread(intel_log_power_func); + std::thread intel_log_temp_thread(intel_log_temp_func); #endif - if (benchmark_mode == true) { - cout << "Sleeping for 4s" << endl << endl; - std::chrono::milliseconds timespan(4000); - std::this_thread::sleep_for(timespan); - } + if (benchmark_mode == true) { + cout << "Sleeping for 4s" << endl << endl; + std::chrono::milliseconds timespan(4000); + std::this_thread::sleep_for(timespan); + } - cout << "Launching kernel..." << endl; + cout << "Launching kernel..." << endl; - //get execution timestamp - timeval start_timeinfo; - gettimeofday(&start_timeinfo, NULL); + //get execution timestamp + timeval start_timeinfo; + gettimeofday(&start_timeinfo, NULL); - uint64_t exec_time = 0; - uint32_t kernels_run = 0; + uint64_t exec_time = 0; + uint32_t kernels_run = 0; - uint64_t total_exec_time = timer.getTimeMicroseconds(); + uint64_t total_exec_time = timer.getTimeMicroseconds(); - for (cl_ulong repetition = 0; repetition < kernel_repetitions; ++repetition) { - for (string const& kernel_name : kernel_list) { - exec_time = exec_time + dev_mgr.execute_kernelNA(*(dev_mgr.getKernelbyName(0, "ocl_Kernel", kernel_name)), - dev_mgr.get_queue(0, 0), range_start, global_range, local_range); - kernels_run++; - } + for (cl_ulong repetition = 0; repetition < kernel_repetitions; ++repetition) { + for (string const& kernel_name : kernel_list) { + exec_time = exec_time + dev_mgr.execute_kernelNA(*(dev_mgr.getKernelbyName(0, "ocl_Kernel", kernel_name)), + dev_mgr.get_queue(0, 0), range_start, global_range, local_range); + kernels_run++; } + } - total_exec_time = timer.getTimeMicroseconds() - total_exec_time; - h5_create_dir(out_name, "housekeeping"); - h5_write_single(out_name, "/housekeeping/total_execution_time", 1.e-6 * total_exec_time, - "Time in seconds of the total execution (data transfer, kernel, and host code)."); + total_exec_time = timer.getTimeMicroseconds() - total_exec_time; + h5_create_dir(out_name, "housekeeping"); + h5_write_single(out_name, "/housekeeping/total_execution_time", 1.e-6 * total_exec_time, + "Time in seconds of the total execution (data transfer, kernel, and host code)."); - cout << "Kernels executed: " << kernels_run << endl; - cout << "Kernel runtime: " << exec_time / 1000 << " ms" << endl; // TODO: ms or s, int or double? + cout << "Kernels executed: " << kernels_run << endl; + cout << "Kernel runtime: " << exec_time / 1000 << " ms" << endl; // TODO: ms or s, int or double? - if (benchmark_mode == true) { - cout << endl << "Sleeping for 4s" << endl; - std::chrono::milliseconds timespan(4000); + if (benchmark_mode == true) { + cout << endl << "Sleeping for 4s" << endl; + std::chrono::milliseconds timespan(4000); - std::this_thread::sleep_for(timespan); - } + std::this_thread::sleep_for(timespan); + } - cout << "Saving results... " << endl; + cout << "Saving results... " << endl; #if defined(USEAMDP) - amd_log_power = false; - amd_log_temp = false; - amd_log_power_thread.join(); - //amd_log_temp_thread.join(); + amd_log_power = false; + amd_log_temp = false; + amd_log_power_thread.join(); + //amd_log_temp_thread.join(); - if ((amd_power_rate > 0) || (amd_temp_rate > 0)) { - - AMDTPwrStopProfiling(); - } + if ((amd_power_rate > 0) || (amd_temp_rate > 0)) { - if (amd_power_rate > 0) - { - h5_write_buffer(out_name, "/housekeeping/amd/power_time", amd_power_time.data(), amd_power_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + AMDTPwrStopProfiling(); + } - for (size_t i = 0; i < AMDP_names.size(); i++) - { - std::string varname = "/housekeeping/amd/" + AMDP_names.at(i); - //cout<<"Power: "<< AMDP_names.at(i) <(out_name, varname.c_str(), amd_power[i].data(), amd_power[i].size(), - "Power in watt"); - } + if (amd_power_rate > 0) + { + h5_write_buffer(out_name, "/housekeeping/amd/power_time", amd_power_time.data(), amd_power_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - } - if (amd_temp_rate > 0) + for (size_t i = 0; i < AMDP_names.size(); i++) { - //amd_power_time has to be changed to amd_temp_time as soon as simultaneous logging is fixed! - h5_write_buffer(out_name, "/housekeeping/amd/temperature_time", amd_power_time.data(), amd_power_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - for (size_t i = 0; i < AMDT_names.size(); i++) - { - std::string varname = "/housekeeping/amd/" + AMDT_names.at(i); - // cout << "Temp: "<< AMDT_names.at(i) << endl; - h5_write_buffer(out_name, varname.c_str(), amd_temp[i].data(), amd_temp[i].size(), - "Temperatures in degree C"); - } - } + std::string varname = "/housekeeping/amd/" + AMDP_names.at(i); + //cout<<"Power: "<< AMDP_names.at(i) <(out_name, varname.c_str(), amd_power[i].data(), amd_power[i].size(), + "Power in watt"); + } + + } + if (amd_temp_rate > 0) + { + //amd_power_time has to be changed to amd_temp_time as soon as simultaneous logging is fixed! + h5_write_buffer(out_name, "/housekeeping/amd/temperature_time", amd_power_time.data(), amd_power_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + for (size_t i = 0; i < AMDT_names.size(); i++) + { + std::string varname = "/housekeeping/amd/" + AMDT_names.at(i); + // cout << "Temp: "<< AMDT_names.at(i) << endl; + h5_write_buffer(out_name, varname.c_str(), amd_temp[i].data(), amd_temp[i].size(), + "Temperatures in degree C"); + } + } #endif #if defined(USEIRAPL) - intel_log_power = false; - intel_log_power_thread.join(); + intel_log_power = false; + intel_log_power_thread.join(); - intel_log_temp = false; - intel_log_temp_thread.join(); + intel_log_temp = false; + intel_log_temp_thread.join(); - if (intel_power_rate > 0) - { - // size()-1 because differences are computed later - h5_write_buffer(out_name, "/housekeeping/intel/power_time", intel_power_time.data(), intel_power_time.size() - 1, - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + if (intel_power_rate > 0) + { + // size()-1 because differences are computed later + h5_write_buffer(out_name, "/housekeeping/intel/power_time", intel_power_time.data(), intel_power_time.size()-1, + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - std::vector tmp_vector; + std::vector tmp_vector; - size_t max_entries = MSR_names.size(); - if (rapl->detect_igp() == false) { - // no GT data - max_entries--; - } + size_t max_entries = MSR_names.size(); + if (rapl->detect_igp() == false) { + // no GT data + max_entries--; + } - for (size_t i = 0; i < max_entries; i++) - { - tmp_vector.clear(); + for (size_t i = 0; i < max_entries; i++) + { + tmp_vector.clear(); - for (size_t j = 0; j < intel_power0[i].size() - 1; j++) - { - tmp_vector.push_back((rapl->get_e_unit()*(double)(intel_power0[i].at(j + 1) - intel_power0[i].at(j))) / ((double)intel_power_rate*0.001)); - } - std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "0"; - h5_write_buffer(out_name, varname.c_str(), tmp_vector.data(), tmp_vector.size(), - "Power in watt"); - } + for (size_t j = 0; j < intel_power0[i].size()-1; j++) + { + tmp_vector.push_back((rapl->get_e_unit()*(double)(intel_power0[i].at(j+1)-intel_power0[i].at(j))) / ((double)intel_power_rate*0.001)); + } + std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "0"; + h5_write_buffer(out_name, varname.c_str(), tmp_vector.data(), tmp_vector.size(), + "Power in watt"); + } - if (rapl->detect_socket1() == true) - { - for (size_t i = 0; i < max_entries; i++) - { - tmp_vector.clear(); + if (rapl->detect_socket1() == true) + { + for (size_t i = 0; i < max_entries; i++) + { + tmp_vector.clear(); - for (size_t j = 0; j < intel_power1[i].size() - 1; j++) - { - tmp_vector.push_back((rapl->get_e_unit()*(double)(intel_power1[i].at(j + 1) - intel_power1[i].at(j))) / ((double)intel_power_rate*0.001)); - } - std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "1"; - h5_write_buffer(out_name, varname.c_str(), tmp_vector.data(), tmp_vector.size(), - "Power in watt"); - } + for (size_t j = 0; j < intel_power1[i].size()-1; j++) + { + tmp_vector.push_back((rapl->get_e_unit()*(double)(intel_power1[i].at(j+1)-intel_power1[i].at(j)))/((double)intel_power_rate*0.001)); } + std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "1"; + h5_write_buffer(out_name, varname.c_str(), tmp_vector.data(), tmp_vector.size(), + "Power in watt"); + } } + } - if (intel_temp_rate > 0) - { - h5_write_buffer(out_name, "/housekeeping/intel/temperature_time", intel_temp_time.data(), intel_temp_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + if (intel_temp_rate > 0) + { + h5_write_buffer(out_name, "/housekeeping/intel/temperature_time", intel_temp_time.data(), intel_temp_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - h5_write_buffer(out_name, "/housekeeping/intel/package0_temperature", intel_temp0.data(), intel_temp0.size(), - "Temperature in degree Celsius"); + h5_write_buffer(out_name, "/housekeeping/intel/package0_temperature", intel_temp0.data(), intel_temp0.size(), + "Temperature in degree Celsius"); - if (rapl->detect_socket1() == true) - { - h5_write_buffer(out_name, "/housekeeping/intel/package1_temperature", intel_temp1.data(), intel_temp1.size(), - "Temperature in degree Celsius"); - } + if (rapl->detect_socket1() == true) + { + h5_write_buffer(out_name, "/housekeeping/intel/package1_temperature", intel_temp1.data(), intel_temp1.size(), + "Temperature in degree Celsius"); } + } #endif #if defined(USEIPG) - intel_log_power = false; - intel_log_power_thread.join(); + intel_log_power = false; + intel_log_power_thread.join(); - intel_log_temp = false; - intel_log_temp_thread.join(); + intel_log_temp = false; + intel_log_temp_thread.join(); - if (intel_power_rate > 0) - { - h5_write_buffer(out_name, "/housekeeping/intel/power_time", intel_power_time.data(), intel_power_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + if (intel_power_rate > 0) + { + h5_write_buffer(out_name, "/housekeeping/intel/power_time", intel_power_time.data(), intel_power_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - for (size_t i = 0; i < MSR_names.size(); i++) - { - std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "0"; - h5_write_buffer(out_name, varname.c_str(), intel_power[i].data(), intel_power[i].size(), - "Power in watt"); - } + for (size_t i = 0; i < MSR_names.size(); i++) + { + std::string varname = "/housekeeping/intel/" + MSR_names.at(i) + "0"; + h5_write_buffer(out_name, varname.c_str(), intel_power[i].data(), intel_power[i].size(), + "Power in watt"); } + } - if (intel_temp_rate > 0) - { - h5_write_buffer(out_name, "/housekeeping/intel/temperature_time", intel_temp_time.data(), intel_temp_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + if (intel_temp_rate > 0) + { + h5_write_buffer(out_name, "/housekeeping/intel/temperature_time", intel_temp_time.data(), intel_temp_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - h5_write_buffer(out_name, "/housekeeping/intel/package_temperature", intel_temp.data(), intel_temp.size(), - "Temperature in degree Celsius"); - } + h5_write_buffer(out_name, "/housekeeping/intel/package_temperature", intel_temp.data(), intel_temp.size(), + "Temperature in degree Celsius"); + } #endif #if defined(USENVML) - nvidia_log_power = false; - nvidia_log_temp = false; - nvidia_log_power_thread.join(); - nvidia_log_temp_thread.join(); + nvidia_log_power = false; + nvidia_log_temp = false; + nvidia_log_power_thread.join(); + nvidia_log_temp_thread.join(); - if ((nvidia_power_rate > 0) || (nvidia_temp_rate > 0)) - { - nvmlShutdown(); - } + if ((nvidia_power_rate > 0)||(nvidia_temp_rate > 0)) + { + nvmlShutdown(); + } - if (nvidia_power_rate > 0) { + if (nvidia_power_rate > 0) { - h5_write_buffer(out_name, "/housekeeping/nvidia/power", nvidia_power.data(), nvidia_power.size(), - "Power in watt"); + h5_write_buffer(out_name, "/housekeeping/nvidia/power", nvidia_power.data(), nvidia_power.size(), + "Power in watt"); - h5_write_buffer(out_name, "/housekeeping/nvidia/power_time", nvidia_power_time.data(), nvidia_power_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - } + h5_write_buffer(out_name, "/housekeeping/nvidia/power_time", nvidia_power_time.data(), nvidia_power_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + } - if (nvidia_temp_rate > 0) { + if (nvidia_temp_rate > 0) { - h5_write_buffer(out_name, "/housekeeping/nvidia/temperature", nvidia_temp.data(), nvidia_temp.size(), - "Temperature in degree Celsius"); + h5_write_buffer(out_name, "/housekeeping/nvidia/temperature", nvidia_temp.data(), nvidia_temp.size(), + "Temperature in degree Celsius"); - h5_write_buffer(out_name, "/housekeeping/nvidia/temperature_time", nvidia_temp_time.data(), nvidia_temp_time.size(), - "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); - } + h5_write_buffer(out_name, "/housekeeping/nvidia/temperature_time", nvidia_temp_time.data(), nvidia_temp_time.size(), + "POSIX UTC time in seconds since 1970-01-01T00:00.000 (resolution of milliseconds)"); + } #endif - char time_buffer[100]; - time_t tmp_time = start_timeinfo.tv_sec; - strftime(time_buffer, sizeof(time_buffer), "%Y-%m-%dT%H:%M:%S", localtime(&tmp_time)); - sprintf(time_buffer, "%s.%03ld", time_buffer, start_timeinfo.tv_usec / 1000); - h5_create_dir(out_name, "housekeeping"); - h5_write_string(out_name, "housekeeping/kernel_execution_start", time_buffer); - h5_write_single(out_name, "housekeeping/kernel_execution_time", 1.e-6 * exec_time, - "Time in seconds of the kernel execution (no host code)."); - h5_write_single(out_name, "housekeeping/data_load_time", 1.e-6 * push_time, - "Time in seconds of the data transfer: hdf5 input file -> host -> device."); - - h5_create_dir(out_name, "architecture"); - h5_write_string(out_name, "architecture/host_os", getOS().c_str()); - h5_write_string(out_name, "architecture/opencl_device", dev_mgr.get_avail_dev_info(deviceIndex).name.c_str()); - h5_write_string(out_name, "architecture/opencl_platform", dev_mgr.get_avail_dev_info(deviceIndex).platform_name.c_str()); - h5_write_string(out_name, "architecture/opencl_version", dev_mgr.get_avail_dev_info(deviceIndex).ocl_version.c_str()); - - h5_create_dir(out_name, "/data"); - - pull_time = timer.getTimeMicroseconds(); - - uint32_t buffer_counter = 0; - - for (cl_uint i = 0; i < data_names.size(); i++) { - try { - uint8_t *tmp_data = nullptr; - size_t var_size = 0; - - switch (data_types.at(i)) { - case H5_float: var_size = data_sizes.at(i) * sizeof(cl_float); break; - case H5_double: var_size = data_sizes.at(i) * sizeof(cl_double); break; - case H5_char: var_size = data_sizes.at(i) * sizeof(cl_char); break; - case H5_uchar: var_size = data_sizes.at(i) * sizeof(cl_uchar); break; - case H5_short: var_size = data_sizes.at(i) * sizeof(cl_short); break; - case H5_ushort: var_size = data_sizes.at(i) * sizeof(cl_ushort); break; - case H5_int: var_size = data_sizes.at(i) * sizeof(cl_int); break; - case H5_uint: var_size = data_sizes.at(i) * sizeof(cl_uint); break; - case H5_long: var_size = data_sizes.at(i) * sizeof(cl_long); break; - case H5_ulong: var_size = data_sizes.at(i) * sizeof(cl_ulong); break; - default: cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; - } - - tmp_data = new uint8_t[var_size]; - - switch (data_rw_flags.at(buffer_counter)) { - case 0: dev_mgr.get_queue(0, 0).enqueueReadBuffer(data_in.at(buffer_counter), blocking, 0, var_size, tmp_data); break; - case 1: break; - case 2: dev_mgr.get_queue(0, 0).enqueueReadBuffer(data_in.at(buffer_counter), blocking, 0, var_size, tmp_data); break; - } - - dev_mgr.get_queue(0, 0).finish(); //Buffer Copy is asynchronous - - switch (data_types.at(i)) { - case H5_float: h5_write_buffer(out_name, data_names.at(i).c_str(), (float*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_double: h5_write_buffer(out_name, data_names.at(i).c_str(), (double*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_char: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_char*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_uchar: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_uchar*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_short: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_short*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_ushort: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_ushort*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_int: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_int*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_uint: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_uint*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_long: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_long*)tmp_data, data_sizes.at(buffer_counter)); break; - case H5_ulong: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_ulong*)tmp_data, data_sizes.at(buffer_counter)); break; - default: cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; - } - if (tmp_data != nullptr) { - delete[] tmp_data; tmp_data = nullptr; - } - buffer_counter++; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception: " << err.what() << std::endl; - } - } - - pull_time = timer.getTimeMicroseconds() - pull_time; - h5_write_single(out_name, "housekeeping/data_store_time", 1.e-6 * pull_time, - "Time in seconds of the data transfer: device -> host -> hdf5 output file."); - - return 0; + char time_buffer[100]; + time_t tmp_time = start_timeinfo.tv_sec; + strftime(time_buffer, sizeof(time_buffer), "%Y-%m-%dT%H:%M:%S", localtime(&tmp_time)); + sprintf(time_buffer, "%s.%03ld", time_buffer, start_timeinfo.tv_usec / 1000); + h5_create_dir(out_name, "housekeeping"); + h5_write_string(out_name, "housekeeping/kernel_execution_start", time_buffer); + h5_write_single(out_name, "housekeeping/kernel_execution_time", 1.e-6 * exec_time, + "Time in seconds of the kernel execution (no host code)."); + h5_write_single(out_name, "housekeeping/data_load_time", 1.e-6 * push_time, + "Time in seconds of the data transfer: hdf5 input file -> host -> device."); + + h5_create_dir(out_name, "architecture"); + h5_write_string(out_name, "architecture/host_os", getOS().c_str()); + h5_write_string(out_name, "architecture/opencl_device", dev_mgr.get_avail_dev_info(deviceIndex).name.c_str()); + h5_write_string(out_name, "architecture/opencl_platform", dev_mgr.get_avail_dev_info(deviceIndex).platform_name.c_str()); + h5_write_string(out_name, "architecture/opencl_version", dev_mgr.get_avail_dev_info(deviceIndex).ocl_version.c_str()); + + h5_create_dir(out_name, "/data"); + + pull_time = timer.getTimeMicroseconds(); + + uint32_t buffer_counter = 0; + + for (cl_uint i = 0; i < data_names.size(); i++) { + try { + uint8_t *tmp_data = nullptr; + size_t var_size = 0; + + switch (data_types.at(i)) { + case H5_float: var_size = data_sizes.at(i) * sizeof(cl_float); break; + case H5_double: var_size = data_sizes.at(i) * sizeof(cl_double); break; + case H5_char: var_size = data_sizes.at(i) * sizeof(cl_char); break; + case H5_uchar: var_size = data_sizes.at(i) * sizeof(cl_uchar); break; + case H5_short: var_size = data_sizes.at(i) * sizeof(cl_short); break; + case H5_ushort: var_size = data_sizes.at(i) * sizeof(cl_ushort); break; + case H5_int: var_size = data_sizes.at(i) * sizeof(cl_int); break; + case H5_uint: var_size = data_sizes.at(i) * sizeof(cl_uint); break; + case H5_long: var_size = data_sizes.at(i) * sizeof(cl_long); break; + case H5_ulong: var_size = data_sizes.at(i) * sizeof(cl_ulong); break; + default: cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; + } + + tmp_data = new uint8_t[var_size]; + + switch (data_rw_flags.at(buffer_counter)) { + case 0: dev_mgr.get_queue(0, 0).enqueueReadBuffer(data_in.at(buffer_counter), blocking, 0, var_size, tmp_data); break; + case 1: break; + case 2: dev_mgr.get_queue(0, 0).enqueueReadBuffer(data_in.at(buffer_counter), blocking, 0, var_size, tmp_data); break; + } + + dev_mgr.get_queue(0, 0).finish(); //Buffer Copy is asynchronous + + switch (data_types.at(i)) { + case H5_float: h5_write_buffer( out_name, data_names.at(i).c_str(), (float*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_double: h5_write_buffer( out_name, data_names.at(i).c_str(), (double*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_char: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_char*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_uchar: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_uchar*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_short: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_short*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_ushort: h5_write_buffer(out_name, data_names.at(i).c_str(), (cl_ushort*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_int: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_int*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_uint: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_uint*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_long: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_long*)tmp_data, data_sizes.at(buffer_counter)); break; + case H5_ulong: h5_write_buffer( out_name, data_names.at(i).c_str(), (cl_ulong*)tmp_data, data_sizes.at(buffer_counter)); break; + default: cerr << ERROR_INFO << "Data type '" << data_types.at(i) << "' unknown." << endl; + } + if (tmp_data != nullptr) { + delete[] tmp_data; tmp_data = nullptr; + } + buffer_counter++; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception: " << err.what() << std::endl; + } + } + + pull_time = timer.getTimeMicroseconds() - pull_time; + h5_write_single(out_name, "housekeeping/data_store_time", 1.e-6 * pull_time, + "Time in seconds of the data transfer: device -> host -> hdf5 output file."); + + return 0; } diff --git a/src/ocl_dev_mgr.cpp b/src/ocl_dev_mgr.cpp index 8ad97e8..714111e 100644 --- a/src/ocl_dev_mgr.cpp +++ b/src/ocl_dev_mgr.cpp @@ -11,85 +11,85 @@ inline void compile(cl::Program& cl_prog, char const* options) { - std::string compile_options = std::string(" ") + std::string(options); + std::string compile_options = std::string(" ") + std::string(options); try { - cl_prog.build(compile_options.c_str()); + cl_prog.build(compile_options.c_str()); } catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; } catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; } } inline std::string loadProgram(std::string const& input_filename) { - std::ifstream input(input_filename.c_str()); - if (!input.is_open()) { - std::cerr << ERROR_INFO << "Cannot open file '" << input_filename << "'." << std::endl; - exit(1); - } + std::ifstream input(input_filename.c_str()); + if (!input.is_open()) { + std::cerr << ERROR_INFO << "Cannot open file '" << input_filename << "'." << std::endl; + exit(1); + } - return std::string(std::istreambuf_iterator(input), (std::istreambuf_iterator())); + return std::string(std::istreambuf_iterator(input), (std::istreambuf_iterator())); } ocl_dev_mgr::ocl_dev_mgr() { - initialize(); + initialize(); } cl::Kernel* ocl_dev_mgr::getKernelbyName(cl_uint context_idx, std::string const& prog_name, std::string const& kernel_name) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - return nullptr; - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + return nullptr; + } - uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - if (con_list.at(context_idx).kernels.at(idx).size() > 1) { - for (cl_uint i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { - if (kernel_name == con_list.at(context_idx).kernel_names.at(idx).at(i)) { - return &(con_list.at(context_idx).kernels.at(idx).at(i)); - } - } - } + if (con_list.at(context_idx).kernels.at(idx).size() > 1) { + for (cl_uint i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { + if (kernel_name == con_list.at(context_idx).kernel_names.at(idx).at(i)) { + return &(con_list.at(context_idx).kernels.at(idx).at(i)); + } + } + } - return &(con_list.at(context_idx).kernels.at(idx).at(0)); + return &(con_list.at(context_idx).kernels.at(idx).at(0)); } cl::Kernel* ocl_dev_mgr::getKernelbyID(cl_uint context_idx, std::string const& prog_name, cl_ulong kernel_id) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - return nullptr; - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + return nullptr; + } - uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - return &(con_list.at(context_idx).kernels.at(idx).at(kernel_id)); + return &(con_list.at(context_idx).kernels.at(idx).at(kernel_id)); } std::string ocl_dev_mgr::getDeviceType(cl_uint avail_device_idx) { - if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_CPU) { - return(type_cpu_str); - } - else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_GPU) { - return(type_gpu_str); - } - else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_ACCELERATOR) { - return(type_acc_str); - } - else { - return(type_other_str); - } + if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_CPU) { + return(type_cpu_str); + } + else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_GPU) { + return(type_gpu_str); + } + else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_ACCELERATOR) { + return(type_acc_str); + } + else { + return(type_other_str); + } } std::string ocl_dev_mgr::getDevicePCIeID(cl_uint avail_device_idx) @@ -139,267 +139,267 @@ cl_int slot_id; cl_ulong ocl_dev_mgr::getDeviceList(std::vector& devices) { - // Get list of platforms - std::vector platforms; - cl::Platform::get(&platforms); - - // Enumerate devices - for (cl::Platform const& platform : platforms) - { - std::vector plat_devices; - platform.getDevices(CL_DEVICE_TYPE_ALL, &plat_devices); - devices.insert(devices.end(), plat_devices.begin(), plat_devices.end()); - } + // Get list of platforms + std::vector platforms; + cl::Platform::get(&platforms); - return devices.size(); + // Enumerate devices + for (cl::Platform const& platform : platforms) + { + std::vector plat_devices; + platform.getDevices(CL_DEVICE_TYPE_ALL, &plat_devices); + devices.insert(devices.end(), plat_devices.begin(), plat_devices.end()); + } + + return devices.size(); } cl_ulong ocl_dev_mgr::init_device(cl_uint avail_device_idx) { - ocl_context tmp_context; + ocl_context tmp_context; - tmp_context.devices.push_back(available_devices.at(avail_device_idx)); + tmp_context.devices.push_back(available_devices.at(avail_device_idx)); - std::vector tmp_devices; - tmp_devices.push_back(available_devices.at(avail_device_idx).device); + std::vector tmp_devices; + tmp_devices.push_back(available_devices.at(avail_device_idx).device); - cl::Context context(tmp_devices, NULL); - tmp_context.context = context; + cl::Context context(tmp_devices, NULL); + tmp_context.context = context; - tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); + tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); //push second queue for async copy - tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); + tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); - con_list.push_back(tmp_context); + con_list.push_back(tmp_context); - return con_list.size(); + return con_list.size(); } cl::CommandQueue& ocl_dev_mgr::get_queue(cl_uint context_idx, cl_uint queue_idx) { - return con_list.at(context_idx).queues.at(queue_idx); + return con_list.at(context_idx).queues.at(queue_idx); } cl::Context& ocl_dev_mgr::get_context(cl_uint context_idx) { - return con_list.at(context_idx).context; + return con_list.at(context_idx).context; } cl_ulong ocl_dev_mgr::get_avail_dev_num() { - return num_available_devices; + return num_available_devices; } cl_ulong ocl_dev_mgr::get_context_num() { - return con_list.size(); + return con_list.size(); } bool ocl_dev_mgr::add_program_url(cl_uint context_idx, std::string prog_name, std::string const& url) { - if (!fileExists(url)) { - return false; - } + if (!fileExists(url)) { + return false; + } - return add_program_str(context_idx, prog_name, loadProgram(url)); + return add_program_str(context_idx, prog_name, loadProgram(url)); } bool ocl_dev_mgr::add_program_str(cl_uint context_idx, std::string prog_name, std::string kernel) { - con_list.at(context_idx).programs.push_back(cl::Program(con_list.at(context_idx).context, kernel)); - con_list.at(context_idx).prog_names.push_back(prog_name); - con_list.at(context_idx).kernels.resize(con_list.at(context_idx).kernels.size() + 1); - con_list.at(context_idx).kernel_names.resize(con_list.at(context_idx).kernel_names.size() + 1); - return true; + con_list.at(context_idx).programs.push_back(cl::Program(con_list.at(context_idx).context, kernel)); + con_list.at(context_idx).prog_names.push_back(prog_name); + con_list.at(context_idx).kernels.resize(con_list.at(context_idx).kernels.size() + 1); + con_list.at(context_idx).kernel_names.resize(con_list.at(context_idx).kernel_names.size() + 1); + return true; } cl::Program& ocl_dev_mgr::get_program(cl_uint context_idx, std::string const& prog_name) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p != con_list.at(context_idx).prog_names.end()) { - return con_list.at(context_idx).programs.at(distance(con_list.at(context_idx).prog_names.begin(), it_p)); - } - else { - std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; - //TODO: Exception? - return con_list.at(context_idx).programs.at(0); - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p != con_list.at(context_idx).prog_names.end()) { + return con_list.at(context_idx).programs.at(distance(con_list.at(context_idx).prog_names.begin(), it_p)); + } + else { + std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; + //TODO: Exception? + return con_list.at(context_idx).programs.at(0); + } } ocl_dev_mgr::ocl_device_info& ocl_dev_mgr::get_avail_dev_info(cl_uint avail_device_idx) { - return available_devices.at(avail_device_idx); + return available_devices.at(avail_device_idx); } ocl_dev_mgr::ocl_device_info& ocl_dev_mgr::get_context_dev_info(cl_uint context_idx, cl_uint device_idx) { - return con_list.at(context_idx).devices.at(device_idx); + return con_list.at(context_idx).devices.at(device_idx); } // return execution time in µs cl_ulong ocl_dev_mgr::execute_kernel(cl::Kernel& kernel, cl::CommandQueue& queue, - cl::NDRange global_range, cl::NDRange local_range, - std::vector& dev_Buffers) + cl::NDRange global_range, cl::NDRange local_range, + std::vector& dev_Buffers) { - cl::Event event; - cl_ulong time_start, time_end; + cl::Event event; + cl_ulong time_start, time_end; - try { - for (cl_uint i = 0; i < dev_Buffers.size(); i++) { - kernel.setArg(i, *dev_Buffers[i]); - } - - queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, &event); - event.wait(); - event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); - event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + try { + for (cl_uint i = 0; i < dev_Buffers.size(); i++) { + kernel.setArg(i, *dev_Buffers[i]); + } + + queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, &event); + event.wait(); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); + event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } - return (time_end - time_start) / 1000; + return (time_end - time_start) / 1000; } // return execution time in µs cl_ulong ocl_dev_mgr::execute_kernelNA(cl::Kernel& kernel, cl::CommandQueue& queue, - cl::NDRange range_start, cl::NDRange global_range, cl::NDRange local_range) +cl::NDRange range_start, cl::NDRange global_range, cl::NDRange local_range) { - cl::Event event; - cl_ulong time_start, time_end; - - try { - queue.enqueueNDRangeKernel(kernel, range_start, global_range, local_range, NULL, &event); - event.wait(); - event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); - event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } + cl::Event event; + cl_ulong time_start, time_end; + + try { + queue.enqueueNDRangeKernel(kernel, range_start, global_range, local_range, NULL, &event); + event.wait(); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); + event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } - return (time_end - time_start) / 1000; + return (time_end - time_start) / 1000; } // don't return execution time in µs void ocl_dev_mgr::execute_kernel_async(cl::Kernel& kernel, cl::CommandQueue& queue, - cl::NDRange global_range, cl::NDRange local_range, - std::vector& dev_Buffers) + cl::NDRange global_range, cl::NDRange local_range, + std::vector& dev_Buffers) { - try { - for (cl_uint i = 0; i < dev_Buffers.size(); i++) { - kernel.setArg(i, *dev_Buffers[i]); - } + try { + for (cl_uint i = 0; i < dev_Buffers.size(); i++) { + kernel.setArg(i, *dev_Buffers[i]); + } - queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, NULL); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } + queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, NULL); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } } // Compile kernels and return the number of compiled kernels. cl_ulong ocl_dev_mgr::compile_kernel(cl_uint context_idx, std::string const& prog_name, std::string const& options) { - std::string compile_options = std::string(" ") + options; + std::string compile_options = std::string(" ") + options; - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; - //TODO: Exception? - return 0; - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; + //TODO: Exception? + return 0; + } - int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - try { - con_list.at(context_idx).programs.at(idx).build(compile_options.c_str()); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } + try { + con_list.at(context_idx).programs.at(idx).build(compile_options.c_str()); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } - con_list.at(context_idx).programs.at(idx).createKernels(&(con_list.at(context_idx).kernels.at(idx))); + con_list.at(context_idx).programs.at(idx).createKernels(&(con_list.at(context_idx).kernels.at(idx))); - con_list.at(context_idx).kernel_names.at(idx).clear(); //make sure to clear kernel_names list + con_list.at(context_idx).kernel_names.at(idx).clear(); //make sure to clear kernel_names list - for (uint32_t i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { - con_list.at(context_idx).kernel_names.at(idx).push_back(con_list.at(context_idx).kernels.at(idx).at(i).getInfo()); - } + for (uint32_t i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { + con_list.at(context_idx).kernel_names.at(idx).push_back(con_list.at(context_idx).kernels.at(idx).at(i).getInfo()); + } - return con_list.at(context_idx).kernels.at(idx).size(); + return con_list.at(context_idx).kernels.at(idx).size(); } cl_ulong ocl_dev_mgr::get_kernel_names(cl_uint context_idx, std::string const& prog_name, std::vector& found_kernels) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; //TODO: Exception? - return 0; - } + return 0; + } - int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - for (uint32_t kernel_id = 0; kernel_id < con_list.at(context_idx).kernel_names.at(idx).size(); kernel_id++) { - found_kernels.push_back(con_list.at(context_idx).kernel_names.at(idx).at(kernel_id)); - } + for (uint32_t kernel_id = 0; kernel_id < con_list.at(context_idx).kernel_names.at(idx).size(); kernel_id++) { + found_kernels.push_back(con_list.at(context_idx).kernel_names.at(idx).at(kernel_id)); + } - return con_list.at(context_idx).kernel_names.at(idx).size(); + return con_list.at(context_idx).kernel_names.at(idx).size(); } void ocl_dev_mgr::initialize() { - std::vector tmp_devices; - getDeviceList(tmp_devices); - num_available_devices = tmp_devices.size(); - - available_devices = std::vector(num_available_devices); - - for (size_t i = 0; i < tmp_devices.size(); i++) { - - available_devices.at(i).device = tmp_devices.at(i); - std::vector tmp_size; - - available_devices.at(i).device.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &available_devices.at(i).max_mem); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &available_devices.at(i).max_mem_alloc); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &available_devices.at(i).lw_dim); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &available_devices.at(i).wg_size); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES, &tmp_size); - available_devices.at(i).lw_size = tmp_size.at(0); - available_devices.at(i).device.getInfo(CL_DEVICE_NAME, &available_devices.at(i).name); - available_devices.at(i).device.getInfo(CL_DEVICE_VERSION, &available_devices.at(i).ocl_version); - available_devices.at(i).device.getInfo(CL_DEVICE_TYPE, &available_devices.at(i).type); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &available_devices.at(i).compute_units); - available_devices.at(i).device.getInfo(CL_DEVICE_PLATFORM, &available_devices.at(i).platform); + std::vector tmp_devices; + getDeviceList(tmp_devices); + num_available_devices = tmp_devices.size(); + + available_devices = std::vector(num_available_devices); + +for (size_t i = 0; i < tmp_devices.size(); i++) { + + available_devices.at(i).device = tmp_devices.at(i); + std::vector tmp_size; + + available_devices.at(i).device.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &available_devices.at(i).max_mem); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &available_devices.at(i).max_mem_alloc); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &available_devices.at(i).lw_dim); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &available_devices.at(i).wg_size); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES, &tmp_size); + available_devices.at(i).lw_size = tmp_size.at(0); + available_devices.at(i).device.getInfo(CL_DEVICE_NAME, &available_devices.at(i).name); + available_devices.at(i).device.getInfo(CL_DEVICE_VERSION, &available_devices.at(i).ocl_version); + available_devices.at(i).device.getInfo(CL_DEVICE_TYPE, &available_devices.at(i).type); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &available_devices.at(i).compute_units); + available_devices.at(i).device.getInfo(CL_DEVICE_PLATFORM, &available_devices.at(i).platform); available_devices.at(i).device.getInfo(CL_DEVICE_VENDOR, &available_devices.at(i).vendor); - available_devices.at(i).platform.getInfo(CL_PLATFORM_NAME, &available_devices.at(i).platform_name); + available_devices.at(i).platform.getInfo(CL_PLATFORM_NAME, &available_devices.at(i).platform_name); } } @@ -408,5 +408,5 @@ void ocl_dev_mgr::deinitalize() { //Deinitialization should be performed automatically, but there seems to be segfaults //under certain conditions using Windows, hence the vetor is cleared manually - con_list.clear(); + con_list.clear(); } From eaa33a3c957bbc80348f36388797e6445e0dd152 Mon Sep 17 00:00:00 2001 From: Philip Date: Sun, 31 Mar 2019 00:40:33 +0100 Subject: [PATCH 7/7] Cleanup --- src/main.cpp | 2 - src/ocl_dev_mgr.cpp | 446 ++++++++++++++++++++++---------------------- 2 files changed, 222 insertions(+), 226 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 58725c6..ff1a4a5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -433,7 +433,6 @@ void nvidia_log_power_func() { if (nvidia_power_rate > 0) { unsigned int temp; - nvmlReturn_t result; timeval rawtime; nvidia_power.clear(); @@ -457,7 +456,6 @@ void nvidia_log_temp_func() { if (nvidia_temp_rate > 0) { unsigned int temp; - nvmlReturn_t result; timeval rawtime; nvidia_temp.clear(); diff --git a/src/ocl_dev_mgr.cpp b/src/ocl_dev_mgr.cpp index 714111e..afcb41f 100644 --- a/src/ocl_dev_mgr.cpp +++ b/src/ocl_dev_mgr.cpp @@ -4,131 +4,129 @@ #include #include #include -#include +#include #include "util.hpp" #include "ocl_dev_mgr.hpp" inline void compile(cl::Program& cl_prog, char const* options) { - std::string compile_options = std::string(" ") + std::string(options); + std::string compile_options = std::string(" ") + std::string(options); try { - cl_prog.build(compile_options.c_str()); + cl_prog.build(compile_options.c_str()); } catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; } catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; } } inline std::string loadProgram(std::string const& input_filename) { - std::ifstream input(input_filename.c_str()); - if (!input.is_open()) { - std::cerr << ERROR_INFO << "Cannot open file '" << input_filename << "'." << std::endl; - exit(1); - } + std::ifstream input(input_filename.c_str()); + if (!input.is_open()) { + std::cerr << ERROR_INFO << "Cannot open file '" << input_filename << "'." << std::endl; + exit(1); + } - return std::string(std::istreambuf_iterator(input), (std::istreambuf_iterator())); + return std::string(std::istreambuf_iterator(input), (std::istreambuf_iterator())); } ocl_dev_mgr::ocl_dev_mgr() { - initialize(); + initialize(); } cl::Kernel* ocl_dev_mgr::getKernelbyName(cl_uint context_idx, std::string const& prog_name, std::string const& kernel_name) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - return nullptr; - } - - uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - - if (con_list.at(context_idx).kernels.at(idx).size() > 1) { - for (cl_uint i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { - if (kernel_name == con_list.at(context_idx).kernel_names.at(idx).at(i)) { - return &(con_list.at(context_idx).kernels.at(idx).at(i)); - } - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + return nullptr; + } + + uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + + if (con_list.at(context_idx).kernels.at(idx).size() > 1) { + for (cl_uint i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { + if (kernel_name == con_list.at(context_idx).kernel_names.at(idx).at(i)) { + return &(con_list.at(context_idx).kernels.at(idx).at(i)); + } } + } - return &(con_list.at(context_idx).kernels.at(idx).at(0)); + return &(con_list.at(context_idx).kernels.at(idx).at(0)); } cl::Kernel* ocl_dev_mgr::getKernelbyID(cl_uint context_idx, std::string const& prog_name, cl_ulong kernel_id) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - return nullptr; - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + return nullptr; + } - uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + uint32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - return &(con_list.at(context_idx).kernels.at(idx).at(kernel_id)); + return &(con_list.at(context_idx).kernels.at(idx).at(kernel_id)); } std::string ocl_dev_mgr::getDeviceType(cl_uint avail_device_idx) { - if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_CPU) { - return(type_cpu_str); - } - else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_GPU) { - return(type_gpu_str); - } - else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_ACCELERATOR) { - return(type_acc_str); - } - else { - return(type_other_str); - } + if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_CPU) { + return(type_cpu_str); + } + else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_GPU) { + return(type_gpu_str); + } + else if (available_devices.at(avail_device_idx).type == CL_DEVICE_TYPE_ACCELERATOR) { + return(type_acc_str); + } + else { + return(type_other_str); + } } std::string ocl_dev_mgr::getDevicePCIeID(cl_uint avail_device_idx) { -#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 #define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 -#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 typedef union { - struct { cl_uint type; cl_uint data[5]; } raw; - struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; } cl_device_topology_amd; - cl_device_topology_amd amd_topo; + cl_device_topology_amd amd_topo; cl_int bus_id; cl_int slot_id; std::ostringstream tmp_stream; - std::size_t found = 0; - found = available_devices.at(avail_device_idx).vendor.find("NVIDIA"); + std::size_t found = 0; + found = available_devices.at(avail_device_idx).vendor.find("NVIDIA"); + if (found != std::string::npos) { + available_devices.at(avail_device_idx).device.getInfo(CL_DEVICE_PCI_BUS_ID_NV,&bus_id); + available_devices.at(avail_device_idx).device.getInfo(CL_DEVICE_PCI_SLOT_ID_NV, &slot_id); + + cl_uint domain, bus, dev, func; + domain = bus_id >> 8; + bus = bus_id & 0xff; + tmp_stream << domain << ":" << bus << ":" << slot_id; + } + else + { + found = available_devices.at(avail_device_idx).vendor.find("Advanced Micro Devices"); if (found != std::string::npos) { - available_devices.at(avail_device_idx).device.getInfo(CL_DEVICE_PCI_BUS_ID_NV,&bus_id); - available_devices.at(avail_device_idx).device.getInfo(CL_DEVICE_PCI_SLOT_ID_NV, &slot_id); - - cl_uint domain, bus, dev, func; - domain = bus_id >> 8; - bus = bus_id & 0xff; - dev = slot_id >> 3; - func = slot_id & 0x7; - tmp_stream << domain << ":" << bus << ":" << slot_id; - } - else - { - found = available_devices.at(avail_device_idx).vendor.find("Advanced Micro Devices"); - if (found != std::string::npos) { - available_devices.at(avail_device_idx).device.getInfo(CL_DEVICE_TOPOLOGY_AMD, &amd_topo); - tmp_stream << "0:" << (unsigned int)amd_topo.pcie.bus << ":" << (unsigned int)amd_topo.pcie.device; //Domain is not returned? - } + available_devices.at(avail_device_idx).device.getInfo(CL_DEVICE_TOPOLOGY_AMD, &amd_topo); + tmp_stream << "0:" << (unsigned int)amd_topo.pcie.bus << ":" << (unsigned int)amd_topo.pcie.device; //Domain is not returned? } + } return tmp_stream.str(); @@ -139,136 +137,136 @@ cl_int slot_id; cl_ulong ocl_dev_mgr::getDeviceList(std::vector& devices) { - // Get list of platforms - std::vector platforms; - cl::Platform::get(&platforms); - - // Enumerate devices - for (cl::Platform const& platform : platforms) - { - std::vector plat_devices; - platform.getDevices(CL_DEVICE_TYPE_ALL, &plat_devices); - devices.insert(devices.end(), plat_devices.begin(), plat_devices.end()); - } - - return devices.size(); + // Get list of platforms + std::vector platforms; + cl::Platform::get(&platforms); + + // Enumerate devices + for (cl::Platform const& platform : platforms) + { + std::vector plat_devices; + platform.getDevices(CL_DEVICE_TYPE_ALL, &plat_devices); + devices.insert(devices.end(), plat_devices.begin(), plat_devices.end()); + } + + return devices.size(); } cl_ulong ocl_dev_mgr::init_device(cl_uint avail_device_idx) { - ocl_context tmp_context; + ocl_context tmp_context; - tmp_context.devices.push_back(available_devices.at(avail_device_idx)); + tmp_context.devices.push_back(available_devices.at(avail_device_idx)); - std::vector tmp_devices; - tmp_devices.push_back(available_devices.at(avail_device_idx).device); + std::vector tmp_devices; + tmp_devices.push_back(available_devices.at(avail_device_idx).device); - cl::Context context(tmp_devices, NULL); - tmp_context.context = context; + cl::Context context(tmp_devices, NULL); + tmp_context.context = context; - tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); + tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); //push second queue for async copy - tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); + tmp_context.queues.push_back(cl::CommandQueue(tmp_context.context, CL_QUEUE_PROFILING_ENABLE)); - con_list.push_back(tmp_context); + con_list.push_back(tmp_context); - return con_list.size(); + return con_list.size(); } cl::CommandQueue& ocl_dev_mgr::get_queue(cl_uint context_idx, cl_uint queue_idx) { - return con_list.at(context_idx).queues.at(queue_idx); + return con_list.at(context_idx).queues.at(queue_idx); } cl::Context& ocl_dev_mgr::get_context(cl_uint context_idx) { - return con_list.at(context_idx).context; + return con_list.at(context_idx).context; } cl_ulong ocl_dev_mgr::get_avail_dev_num() { - return num_available_devices; + return num_available_devices; } cl_ulong ocl_dev_mgr::get_context_num() { - return con_list.size(); + return con_list.size(); } bool ocl_dev_mgr::add_program_url(cl_uint context_idx, std::string prog_name, std::string const& url) { - if (!fileExists(url)) { - return false; - } + if (!fileExists(url)) { + return false; + } - return add_program_str(context_idx, prog_name, loadProgram(url)); + return add_program_str(context_idx, prog_name, loadProgram(url)); } bool ocl_dev_mgr::add_program_str(cl_uint context_idx, std::string prog_name, std::string kernel) { - con_list.at(context_idx).programs.push_back(cl::Program(con_list.at(context_idx).context, kernel)); - con_list.at(context_idx).prog_names.push_back(prog_name); - con_list.at(context_idx).kernels.resize(con_list.at(context_idx).kernels.size() + 1); - con_list.at(context_idx).kernel_names.resize(con_list.at(context_idx).kernel_names.size() + 1); - return true; + con_list.at(context_idx).programs.push_back(cl::Program(con_list.at(context_idx).context, kernel)); + con_list.at(context_idx).prog_names.push_back(prog_name); + con_list.at(context_idx).kernels.resize(con_list.at(context_idx).kernels.size() + 1); + con_list.at(context_idx).kernel_names.resize(con_list.at(context_idx).kernel_names.size() + 1); + return true; } cl::Program& ocl_dev_mgr::get_program(cl_uint context_idx, std::string const& prog_name) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p != con_list.at(context_idx).prog_names.end()) { - return con_list.at(context_idx).programs.at(distance(con_list.at(context_idx).prog_names.begin(), it_p)); - } - else { - std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; - //TODO: Exception? - return con_list.at(context_idx).programs.at(0); - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p != con_list.at(context_idx).prog_names.end()) { + return con_list.at(context_idx).programs.at(distance(con_list.at(context_idx).prog_names.begin(), it_p)); + } + else { + std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; + //TODO: Exception? + return con_list.at(context_idx).programs.at(0); + } } ocl_dev_mgr::ocl_device_info& ocl_dev_mgr::get_avail_dev_info(cl_uint avail_device_idx) { - return available_devices.at(avail_device_idx); + return available_devices.at(avail_device_idx); } ocl_dev_mgr::ocl_device_info& ocl_dev_mgr::get_context_dev_info(cl_uint context_idx, cl_uint device_idx) { - return con_list.at(context_idx).devices.at(device_idx); + return con_list.at(context_idx).devices.at(device_idx); } // return execution time in µs cl_ulong ocl_dev_mgr::execute_kernel(cl::Kernel& kernel, cl::CommandQueue& queue, - cl::NDRange global_range, cl::NDRange local_range, - std::vector& dev_Buffers) + cl::NDRange global_range, cl::NDRange local_range, + std::vector& dev_Buffers) { - cl::Event event; - cl_ulong time_start, time_end; - - try { - for (cl_uint i = 0; i < dev_Buffers.size(); i++) { - kernel.setArg(i, *dev_Buffers[i]); - } - - queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, &event); - event.wait(); - event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); - event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); + cl::Event event; + cl_ulong time_start, time_end; + + try { + for (cl_uint i = 0; i < dev_Buffers.size(); i++) { + kernel.setArg(i, *dev_Buffers[i]); } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + + queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, &event); + event.wait(); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); + event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } - return (time_end - time_start) / 1000; + return (time_end - time_start) / 1000; } @@ -276,130 +274,130 @@ cl_ulong ocl_dev_mgr::execute_kernel(cl::Kernel& kernel, cl::CommandQueue& queue cl_ulong ocl_dev_mgr::execute_kernelNA(cl::Kernel& kernel, cl::CommandQueue& queue, cl::NDRange range_start, cl::NDRange global_range, cl::NDRange local_range) { - cl::Event event; - cl_ulong time_start, time_end; - - try { - queue.enqueueNDRangeKernel(kernel, range_start, global_range, local_range, NULL, &event); - event.wait(); - event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); - event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); + cl::Event event; + cl_ulong time_start, time_end; + + try { + queue.enqueueNDRangeKernel(kernel, range_start, global_range, local_range, NULL, &event); + event.wait(); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &time_end); + event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &time_start); } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } - - return (time_end - time_start) / 1000; + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } + + return (time_end - time_start) / 1000; } // don't return execution time in µs void ocl_dev_mgr::execute_kernel_async(cl::Kernel& kernel, cl::CommandQueue& queue, - cl::NDRange global_range, cl::NDRange local_range, - std::vector& dev_Buffers) + cl::NDRange global_range, cl::NDRange local_range, + std::vector& dev_Buffers) { - try { - for (cl_uint i = 0; i < dev_Buffers.size(); i++) { - kernel.setArg(i, *dev_Buffers[i]); - } - - queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, NULL); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + try { + for (cl_uint i = 0; i < dev_Buffers.size(); i++) { + kernel.setArg(i, *dev_Buffers[i]); } + + queue.enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, NULL, NULL); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } } // Compile kernels and return the number of compiled kernels. cl_ulong ocl_dev_mgr::compile_kernel(cl_uint context_idx, std::string const& prog_name, std::string const& options) { - std::string compile_options = std::string(" ") + options; + std::string compile_options = std::string(" ") + options; - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; - //TODO: Exception? - return 0; - } + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; + //TODO: Exception? + return 0; + } - int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - try { - con_list.at(context_idx).programs.at(idx).build(compile_options.c_str()); - } - catch (cl::BuildError error) { - std::string log = error.getBuildLog()[0].second; - std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; - } - catch (cl::Error err) { - std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; - } + try { + con_list.at(context_idx).programs.at(idx).build(compile_options.c_str()); + } + catch (cl::BuildError error) { + std::string log = error.getBuildLog()[0].second; + std::cerr << ERROR_INFO << "Build error:\n" << log << std::endl; + } + catch (cl::Error err) { + std::cerr << ERROR_INFO << "Exception:" << err.what() << std::endl; + } - con_list.at(context_idx).programs.at(idx).createKernels(&(con_list.at(context_idx).kernels.at(idx))); + con_list.at(context_idx).programs.at(idx).createKernels(&(con_list.at(context_idx).kernels.at(idx))); - con_list.at(context_idx).kernel_names.at(idx).clear(); //make sure to clear kernel_names list + con_list.at(context_idx).kernel_names.at(idx).clear(); //make sure to clear kernel_names list - for (uint32_t i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { - con_list.at(context_idx).kernel_names.at(idx).push_back(con_list.at(context_idx).kernels.at(idx).at(i).getInfo()); - } + for (uint32_t i = 0; i < con_list.at(context_idx).kernels.at(idx).size(); i++) { + con_list.at(context_idx).kernel_names.at(idx).push_back(con_list.at(context_idx).kernels.at(idx).at(i).getInfo()); + } - return con_list.at(context_idx).kernels.at(idx).size(); + return con_list.at(context_idx).kernels.at(idx).size(); } cl_ulong ocl_dev_mgr::get_kernel_names(cl_uint context_idx, std::string const& prog_name, std::vector& found_kernels) { - auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); - if (it_p == con_list.at(context_idx).prog_names.end()) { - std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; + auto it_p = find(con_list.at(context_idx).prog_names.begin(), con_list.at(context_idx).prog_names.end(), prog_name); + if (it_p == con_list.at(context_idx).prog_names.end()) { + std::cerr << ERROR_INFO << "Program '" << prog_name << "' not found." << std::endl; //TODO: Exception? - return 0; - } + return 0; + } - int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); + int32_t idx = distance(con_list.at(context_idx).prog_names.begin(), it_p); - for (uint32_t kernel_id = 0; kernel_id < con_list.at(context_idx).kernel_names.at(idx).size(); kernel_id++) { - found_kernels.push_back(con_list.at(context_idx).kernel_names.at(idx).at(kernel_id)); - } + for (uint32_t kernel_id = 0; kernel_id < con_list.at(context_idx).kernel_names.at(idx).size(); kernel_id++) { + found_kernels.push_back(con_list.at(context_idx).kernel_names.at(idx).at(kernel_id)); + } - return con_list.at(context_idx).kernel_names.at(idx).size(); + return con_list.at(context_idx).kernel_names.at(idx).size(); } void ocl_dev_mgr::initialize() { - std::vector tmp_devices; - getDeviceList(tmp_devices); - num_available_devices = tmp_devices.size(); + std::vector tmp_devices; + getDeviceList(tmp_devices); + num_available_devices = tmp_devices.size(); - available_devices = std::vector(num_available_devices); + available_devices = std::vector(num_available_devices); for (size_t i = 0; i < tmp_devices.size(); i++) { - available_devices.at(i).device = tmp_devices.at(i); - std::vector tmp_size; - - available_devices.at(i).device.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &available_devices.at(i).max_mem); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &available_devices.at(i).max_mem_alloc); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &available_devices.at(i).lw_dim); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &available_devices.at(i).wg_size); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES, &tmp_size); - available_devices.at(i).lw_size = tmp_size.at(0); - available_devices.at(i).device.getInfo(CL_DEVICE_NAME, &available_devices.at(i).name); - available_devices.at(i).device.getInfo(CL_DEVICE_VERSION, &available_devices.at(i).ocl_version); - available_devices.at(i).device.getInfo(CL_DEVICE_TYPE, &available_devices.at(i).type); - available_devices.at(i).device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &available_devices.at(i).compute_units); - available_devices.at(i).device.getInfo(CL_DEVICE_PLATFORM, &available_devices.at(i).platform); - available_devices.at(i).device.getInfo(CL_DEVICE_VENDOR, &available_devices.at(i).vendor); - available_devices.at(i).platform.getInfo(CL_PLATFORM_NAME, &available_devices.at(i).platform_name); + available_devices.at(i).device = tmp_devices.at(i); + std::vector tmp_size; + + available_devices.at(i).device.getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &available_devices.at(i).max_mem); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &available_devices.at(i).max_mem_alloc); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &available_devices.at(i).lw_dim); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &available_devices.at(i).wg_size); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES, &tmp_size); + available_devices.at(i).lw_size = tmp_size.at(0); + available_devices.at(i).device.getInfo(CL_DEVICE_NAME, &available_devices.at(i).name); + available_devices.at(i).device.getInfo(CL_DEVICE_VERSION, &available_devices.at(i).ocl_version); + available_devices.at(i).device.getInfo(CL_DEVICE_TYPE, &available_devices.at(i).type); + available_devices.at(i).device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &available_devices.at(i).compute_units); + available_devices.at(i).device.getInfo(CL_DEVICE_PLATFORM, &available_devices.at(i).platform); + available_devices.at(i).device.getInfo(CL_DEVICE_VENDOR, &available_devices.at(i).vendor); + available_devices.at(i).platform.getInfo(CL_PLATFORM_NAME, &available_devices.at(i).platform_name); } } @@ -408,5 +406,5 @@ void ocl_dev_mgr::deinitalize() { //Deinitialization should be performed automatically, but there seems to be segfaults //under certain conditions using Windows, hence the vetor is cleared manually - con_list.clear(); + con_list.clear(); }