Skip to content
Snippets Groups Projects
Commit 30c912c2 authored by ns's avatar ns
Browse files

Small fixes

parent 747927ac
No related branches found
No related tags found
No related merge requests found
...@@ -290,7 +290,7 @@ def get_powercap_power(pid_list, logger=None, **kwargs): ...@@ -290,7 +290,7 @@ def get_powercap_power(pid_list, logger=None, **kwargs):
] ]
) )
if total_intel_power < total_attributable_power: if total_intel_power*1.10 < total_attributable_power: # 10% increase to prevent an error where both values are similar, e.g., if only one process is running on the machine
raise ValueError( raise ValueError(
"For some reason the total intel estimated power is less than the attributable power. This " "For some reason the total intel estimated power is less than the attributable power. This "
"means there is an error in computing the attribution. Please re-open " "means there is an error in computing the attribution. Please re-open "
...@@ -550,7 +550,7 @@ def get_rapl_power(pid_list, logger=None, **kwargs): ...@@ -550,7 +550,7 @@ def get_rapl_power(pid_list, logger=None, **kwargs):
] ]
) )
if total_intel_power*1.05 < total_attributable_power: # 5% increase to prevent an error where both values are similar if total_intel_power < total_attributable_power:
raise ValueError( raise ValueError(
"For some reason the total intel estimated power is less than the attributable power. This " "For some reason the total intel estimated power is less than the attributable power. This "
"means there is an error in computing the attribution. Please re-open " "means there is an error in computing the attribution. Please re-open "
......
...@@ -164,7 +164,6 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs): ...@@ -164,7 +164,6 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs):
per_gpu_relative_percent_usage = {} per_gpu_relative_percent_usage = {}
absolute_power = 0 absolute_power = 0
per_gpu_performance_states = {} per_gpu_performance_states = {}
per_gpu_power_draw = {}
for gpu_id, gpu in enumerate(xml.findall("gpu")): for gpu_id, gpu in enumerate(xml.findall("gpu")):
gpu_data = {} gpu_data = {}
...@@ -190,10 +189,8 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs): ...@@ -190,10 +189,8 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs):
gpu_data["utilization"] = {"gpu_util": gpu_util, "memory_util": memory_util} gpu_data["utilization"] = {"gpu_util": gpu_util, "memory_util": memory_util}
# get power # get power
try: # newer CUDA versions try: # For newer CUDA versions
power_readings = gpu.findall("gpu_power_readings")[0] power_readings = gpu.findall("gpu_power_readings")[0]
print("Power readings")
print(power_readings)
except: except:
power_readings = gpu.findall("power_readings")[0] power_readings = gpu.findall("power_readings")[0]
power_draw = power_readings.findall("power_draw")[0].text power_draw = power_readings.findall("power_draw")[0].text
...@@ -212,6 +209,7 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs): ...@@ -212,6 +209,7 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs):
# what's the total absolute SM for this gpu across all accessible processes # what's the total absolute SM for this gpu across all accessible processes
percentage_of_gpu_used_by_all_processes = float(gpu_based_processes["sm"].sum()) percentage_of_gpu_used_by_all_processes = float(gpu_based_processes["sm"].sum())
per_gpu_power_draw = {}
for info in processes.findall("process_info"): for info in processes.findall("process_info"):
pid = info.findall("pid")[0].text pid = info.findall("pid")[0].text
process_name = info.findall("process_name")[0].text process_name = info.findall("process_name")[0].text
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment