Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PCIe topology detection #39

Merged
merged 7 commits into from
Mar 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/ocl_dev_mgr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class ocl_dev_mgr {
std::string name;
cl::Platform platform;
std::string platform_name;
std::string vendor;
cl_device_type type;
std::string ocl_version;
cl_ulong max_mem;
Expand All @@ -35,6 +36,7 @@ class ocl_dev_mgr {
cl_uint float_perf;
};

std::string getDevicePCIeID(cl_uint avail_device_idx);
cl_ulong init_device(cl_uint avail_device_idx);
cl::CommandQueue& get_queue(cl_uint context_idx, cl_uint queue_idx);
cl::Context& get_context(cl_uint context_idx);
Expand Down
165 changes: 95 additions & 70 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,7 @@ void amd_log_power_func()
void amd_log_temp_func()
{

while (amd_log_temp == true)
{
while (amd_log_temp == true) {
timeval rawtime;

std::this_thread::sleep_for(std::chrono::milliseconds(amd_temp_rate));
Expand All @@ -228,8 +227,7 @@ void amd_log_temp_func()
gettimeofday(&rawtime, NULL);
hResult = AMDTPwrReadAllEnabledCounters(&nbrSamples, &pSampleData);

if ((nullptr != pSampleData) && (nbrSamples > 0))
{
if ((nullptr != pSampleData) && (nbrSamples > 0)) {
for (size_t i = 0; i < pSampleData[0].m_numOfCounter; i++) //hardcoded to use the first sample returned
{

Expand Down Expand Up @@ -267,8 +265,7 @@ std::vector<cl_ushort> intel_temp1;

Rapl *rapl;

void intel_log_power_func()
{
void intel_log_power_func() {
timeval rawtime;

if (intel_power_rate > 0)
Expand All @@ -288,8 +285,7 @@ void intel_log_power_func()
intel_power1[3].clear();
intel_power_time.clear();

while (intel_log_power == true)
{
while (intel_log_power == true) {
rapl->sample();
std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate / 2));
gettimeofday(&rawtime, NULL);
Expand All @@ -302,8 +298,7 @@ void intel_log_power_func()
intel_power0[2].push_back(dram);
intel_power0[3].push_back(pp1);

if (rapl->detect_socket1() == true)
{
if (rapl->detect_socket1() == true) {
rapl->get_socket1_data(pkg, pp0, pp1, dram);
intel_power1[0].push_back(pkg);
intel_power1[1].push_back(pp0);
Expand All @@ -319,8 +314,7 @@ void intel_log_temp_func()
uint32_t temp0 = 0, temp1 = 0;
timeval rawtime;

if (intel_temp_rate > 0)
{
if (intel_temp_rate > 0) {
intel_temp0.clear();
intel_temp1.clear();
intel_temp_time.clear();
Expand Down Expand Up @@ -374,13 +368,11 @@ void intel_log_temp_func()
// int temp;
timeval rawtime;

if (intel_temp_rate > 0)
{
if (intel_temp_rate > 0) {
intel_temp.clear();
intel_temp_time.clear();

while (intel_log_temp == true)
{
while (intel_log_temp == true) {
std::this_thread::sleep_for(std::chrono::milliseconds(intel_temp_rate));
intel_temp.push_back(rapl->get_temp0());
gettimeofday(&rawtime, NULL);
Expand All @@ -403,8 +395,7 @@ void intel_log_power_func()
}
intel_power_time.clear();

while (intel_log_power == true)
{
while (intel_log_power == true) {
std::this_thread::sleep_for(std::chrono::milliseconds(intel_power_rate));
rapl->sample();
gettimeofday(&rawtime, NULL);
Expand All @@ -424,13 +415,14 @@ void intel_log_power_func()




#if defined(USENVML)
#include <nvml.h>
bool nvidia_log_power = false;
bool nvidia_log_temp = false;
cl_uint nvidia_power_rate = 0;
cl_uint nvidia_temp_rate = 0;

nvmlDevice_t device;
std::vector<cl_ushort> nvidia_temp;
std::vector<double> nvidia_temp_time;

Expand All @@ -439,59 +431,48 @@ std::vector<double> nvidia_power_time;

void nvidia_log_power_func()
{
if (nvidia_power_rate > 0)
{
unsigned int temp;
nvmlReturn_t result;
timeval rawtime;

nvidia_power.clear();
nvidia_power_time.clear();
if (nvidia_power_rate > 0) {
unsigned int temp;
timeval rawtime;

nvmlDevice_t device;
nvmlDeviceGetHandleByIndex(0, &device);
nvidia_power.clear();
nvidia_power_time.clear();

while (nvidia_log_power == true)
{
std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_power_rate));
while (nvidia_log_power == true) {
std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_power_rate));

nvmlDeviceGetPowerUsage(device, &temp);
gettimeofday(&rawtime, NULL);
nvidia_power_time.push_back(timeval2storage(rawtime));
// convert milliwatt to watt
nvidia_power.push_back(1.e-3f * (float)(temp));
}
nvmlDeviceGetPowerUsage(device, &temp);
gettimeofday(&rawtime, NULL);
nvidia_power_time.push_back(timeval2storage(rawtime));
// convert milliwatt to watt
nvidia_power.push_back(1.e-3f * (float)(temp));
}

nvmlShutdown();
}
nvmlShutdown();
}
}

void nvidia_log_temp_func()
{
if (nvidia_temp_rate > 0)
{
unsigned int temp;
nvmlReturn_t result;
timeval rawtime;

nvidia_temp.clear();
nvidia_temp_time.clear();
void nvidia_log_temp_func() {
if (nvidia_temp_rate > 0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't we want to use 2 spaces instead of 4?

{
unsigned int temp;
timeval rawtime;

nvmlDevice_t device;
nvmlDeviceGetHandleByIndex(0, &device);
nvidia_temp.clear();
nvidia_temp_time.clear();

while (nvidia_log_temp == true)
{
std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_temp_rate));
result = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temp);
gettimeofday(&rawtime, NULL);
nvidia_temp_time.push_back(timeval2storage(rawtime));
nvidia_temp.push_back(temp);
}
while (nvidia_log_temp == true)
{
std::this_thread::sleep_for(std::chrono::milliseconds(nvidia_temp_rate));
result = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temp);
gettimeofday(&rawtime, NULL);
nvidia_temp_time.push_back(timeval2storage(rawtime));
nvidia_temp.push_back(temp);
}

nvmlShutdown();
nvmlShutdown();

}
}
}

#endif // USENVML
Expand Down Expand Up @@ -755,18 +736,24 @@ int main(int argc, char *argv[]) {


cout << dev_mgr.get_avail_dev_info(deviceIndex).name.c_str() << endl;
cout << "OpenCL version: " << dev_mgr.get_avail_dev_info(deviceIndex).ocl_version.c_str() << endl;
cout << "Memory limit: " << dev_mgr.get_avail_dev_info(deviceIndex).max_mem << endl;
cout << "WG limit: " << dev_mgr.get_avail_dev_info(deviceIndex).wg_size << endl << endl;
if (benchmark_mode == false) {
cout << "OpenCL version: " << dev_mgr.get_avail_dev_info(deviceIndex).ocl_version.c_str() << endl;
cout << "Memory limit: " << dev_mgr.get_avail_dev_info(deviceIndex).max_mem << endl;
cout << "WG limit: " << dev_mgr.get_avail_dev_info(deviceIndex).wg_size << endl << endl;
}
ranocha marked this conversation as resolved.
Show resolved Hide resolved
dev_mgr.init_device(deviceIndex);

string kernel_url;
if (h5_check_object(filename, "kernel_url") == true) {
h5_read_string(filename, "kernel_url", kernel_url);
cout << "Reading kernel from file: " << kernel_url << "... " << endl;
h5_read_string(filename, "kernel_url", kernel_url);
if (benchmark_mode == false) {
cout << "Reading kernel from file: " << kernel_url << "... " << endl;
}
}
else if (h5_check_object(filename, "kernel_source") == true) {
cout << "Reading kernel from HDF5 file... " << endl;
if (benchmark_mode == false) {
cout << "Reading kernel from HDF5 file... " << endl;
}
ranocha marked this conversation as resolved.
Show resolved Hide resolved
std::vector<std::string> kernel_source;
h5_read_strings(filename, "kernel_source", kernel_source);
ofstream tmp_clfile;
Expand Down Expand Up @@ -809,7 +796,9 @@ int main(int argc, char *argv[]) {

std::vector<std::string> found_kernels;
dev_mgr.get_kernel_names(0, "ocl_Kernel", found_kernels);
cout << "Found Kernels: " << found_kernels.size() << endl;
if (benchmark_mode == false) {
cout << "Found Kernels: " << found_kernels.size() << endl;
}
ranocha marked this conversation as resolved.
Show resolved Hide resolved
if (found_kernels.size() == 0) {
cerr << ERROR_INFO << "No valid kernels found." << endl;
return -1;
Expand Down Expand Up @@ -943,7 +932,9 @@ int main(int argc, char *argv[]) {

push_time = timer.getTimeMicroseconds() - push_time;

cout << "Setting range..." << endl;
if (benchmark_mode == false) {
cout << "Setting range..." << endl;
}
ranocha marked this conversation as resolved.
Show resolved Hide resolved

cl::NDRange range_start;
cl::NDRange global_range;
Expand Down Expand Up @@ -1023,6 +1014,40 @@ int main(int argc, char *argv[]) {
nvidia_log_power = false;
}
}
if (nvidia_log_power || nvidia_log_temp)
{
nvmlPciInfo_t nv_pciinfo;
cl_uint nvml_devnum;
cl_int nvml_devid = -1;

nvmlDeviceGetCount(&nvml_devnum);

for (cl_uint i = 0; i < nvml_devnum; i++)
{
nvmlDeviceGetHandleByIndex(i, &device);
nvmlDeviceGetPciInfo(device, &nv_pciinfo);

std::ostringstream tmp_devid;
tmp_devid << nv_pciinfo.domain << ":" << nv_pciinfo.bus << ":" << nv_pciinfo.device;
//cout<< tmp_devid.str() <<endl;
std::size_t found = dev_mgr.getDevicePCIeID(deviceIndex).find(tmp_devid.str());
if (found != std::string::npos) {
nvml_devid = i;
if (benchmark_mode == false) {
cout << "NVidia OpenCL device " << tmp_devid.str() << " found in NVML device list." << endl;
}
break;
}
}
if (nvml_devid < 0)
{
cout << "NVidia OpenCL device " << dev_mgr.getDevicePCIeID(deviceIndex) << " not found in NVML device list! Aborting!" << endl;
nvmlShutdown();
exit(EXIT_FAILURE);
}

nvmlDeviceGetHandleByIndex(nvml_devid, &device);
}
std::thread nvidia_log_power_thread(nvidia_log_power_func);
std::thread nvidia_log_temp_thread(nvidia_log_temp_func);
#endif
Expand Down
Loading