From 30c912c2f5f2655f3c4dc40ebd5f7ac67574d0a7 Mon Sep 17 00:00:00 2001 From: ns <ns> Date: Tue, 6 Feb 2024 14:34:56 +0100 Subject: [PATCH] Small fixes --- experiment_impact_tracker/cpu/intel.py | 4 ++-- experiment_impact_tracker/gpu/nvidia.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/experiment_impact_tracker/cpu/intel.py b/experiment_impact_tracker/cpu/intel.py index 2400d07..bdd6a2f 100755 --- a/experiment_impact_tracker/cpu/intel.py +++ b/experiment_impact_tracker/cpu/intel.py @@ -290,7 +290,7 @@ def get_powercap_power(pid_list, logger=None, **kwargs): ] ) - if total_intel_power < total_attributable_power: + if total_intel_power*1.10 < total_attributable_power: # 10% increase to prevent an error where both values are similar, e.g., if only one process is running on the machine raise ValueError( "For some reason the total intel estimated power is less than the attributable power. This " "means there is an error in computing the attribution. Please re-open " @@ -550,7 +550,7 @@ def get_rapl_power(pid_list, logger=None, **kwargs): ] ) - if total_intel_power*1.05 < total_attributable_power: # 5% increase to prevent an error where both values are similar + if total_intel_power < total_attributable_power: raise ValueError( "For some reason the total intel estimated power is less than the attributable power. This " "means there is an error in computing the attribution. Please re-open " diff --git a/experiment_impact_tracker/gpu/nvidia.py b/experiment_impact_tracker/gpu/nvidia.py index c21641f..aa39d7f 100755 --- a/experiment_impact_tracker/gpu/nvidia.py +++ b/experiment_impact_tracker/gpu/nvidia.py @@ -164,7 +164,6 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs): per_gpu_relative_percent_usage = {} absolute_power = 0 per_gpu_performance_states = {} - per_gpu_power_draw = {} for gpu_id, gpu in enumerate(xml.findall("gpu")): gpu_data = {} @@ -190,10 +189,8 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs): gpu_data["utilization"] = {"gpu_util": gpu_util, "memory_util": memory_util} # get power - try: # newer CUDA versions + try: # For newer CUDA versions power_readings = gpu.findall("gpu_power_readings")[0] - print("Power readings") - print(power_readings) except: power_readings = gpu.findall("power_readings")[0] power_draw = power_readings.findall("power_draw")[0].text @@ -212,6 +209,7 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs): # what's the total absolute SM for this gpu across all accessible processes percentage_of_gpu_used_by_all_processes = float(gpu_based_processes["sm"].sum()) + per_gpu_power_draw = {} for info in processes.findall("process_info"): pid = info.findall("pid")[0].text process_name = info.findall("process_name")[0].text -- GitLab