diff --git a/experiment_impact_tracker/cpu/intel.py b/experiment_impact_tracker/cpu/intel.py index 2400d07be36a14314dc6ae2d1704cde6b8d3017e..bdd6a2f241b0cde7d1d64473c0b728a305c9fbbb 100755 --- a/experiment_impact_tracker/cpu/intel.py +++ b/experiment_impact_tracker/cpu/intel.py @@ -290,7 +290,7 @@ def get_powercap_power(pid_list, logger=None, **kwargs): ] ) - if total_intel_power < total_attributable_power: + if total_intel_power*1.10 < total_attributable_power: # 10% increase to prevent an error where both values are similar, e.g., if only one process is running on the machine raise ValueError( "For some reason the total intel estimated power is less than the attributable power. This " "means there is an error in computing the attribution. Please re-open " @@ -550,7 +550,7 @@ def get_rapl_power(pid_list, logger=None, **kwargs): ] ) - if total_intel_power*1.05 < total_attributable_power: # 5% increase to prevent an error where both values are similar + if total_intel_power < total_attributable_power: raise ValueError( "For some reason the total intel estimated power is less than the attributable power. This " "means there is an error in computing the attribution. Please re-open " diff --git a/experiment_impact_tracker/gpu/nvidia.py b/experiment_impact_tracker/gpu/nvidia.py index c21641f83495dbff2c66682c03c7830823519893..aa39d7fc3f6bae1849c74bcb6e33589fd0a8acb2 100755 --- a/experiment_impact_tracker/gpu/nvidia.py +++ b/experiment_impact_tracker/gpu/nvidia.py @@ -164,7 +164,6 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs): per_gpu_relative_percent_usage = {} absolute_power = 0 per_gpu_performance_states = {} - per_gpu_power_draw = {} for gpu_id, gpu in enumerate(xml.findall("gpu")): gpu_data = {} @@ -190,10 +189,8 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs): gpu_data["utilization"] = {"gpu_util": gpu_util, "memory_util": memory_util} # get power - try: # newer CUDA versions + try: # For newer CUDA versions power_readings = gpu.findall("gpu_power_readings")[0] - print("Power readings") - print(power_readings) except: power_readings = gpu.findall("power_readings")[0] power_draw = power_readings.findall("power_draw")[0].text @@ -212,6 +209,7 @@ def get_nvidia_gpu_power(pid_list, logger=None, **kwargs): # what's the total absolute SM for this gpu across all accessible processes percentage_of_gpu_used_by_all_processes = float(gpu_based_processes["sm"].sum()) + per_gpu_power_draw = {} for info in processes.findall("process_info"): pid = info.findall("pid")[0].text process_name = info.findall("process_name")[0].text