Skip to content
Snippets Groups Projects
Commit 82a0b7d8 authored by Jieru Hu's avatar Jieru Hu
Browse files

Address comments

parent ea04ef3e
No related branches found
No related tags found
No related merge requests found
...@@ -103,7 +103,6 @@ def _get_cpu_hours_from_per_process_data(json_array): ...@@ -103,7 +103,6 @@ def _get_cpu_hours_from_per_process_data(json_array):
def gather_additional_info(info, logdir): def gather_additional_info(info, logdir):
df, json_array = load_data_into_frame(logdir) df, json_array = load_data_into_frame(logdir)
print("GATHER ADDITIONAL INFO")
cpu_seconds = _get_cpu_hours_from_per_process_data(json_array) cpu_seconds = _get_cpu_hours_from_per_process_data(json_array)
exp_len = datetime.timestamp(info["experiment_end"]) - datetime.timestamp( exp_len = datetime.timestamp(info["experiment_end"]) - datetime.timestamp(
info["experiment_start"] info["experiment_start"]
...@@ -133,6 +132,7 @@ def gather_additional_info(info, logdir): ...@@ -133,6 +132,7 @@ def gather_additional_info(info, logdir):
len(power_draw_rapl_kw) - 1 len(power_draw_rapl_kw) - 1
] ]
has_gpu = False has_gpu = False
if "gpu_info" in info.keys(): if "gpu_info" in info.keys():
has_gpu = True has_gpu = True
num_gpus = len(info["gpu_info"]) num_gpus = len(info["gpu_info"])
...@@ -151,16 +151,19 @@ def gather_additional_info(info, logdir): ...@@ -151,16 +151,19 @@ def gather_additional_info(info, logdir):
kw_hr_rapl = ( kw_hr_rapl = (
np.multiply(time_differences_in_hours, power_draw_rapl_kw) np.multiply(time_differences_in_hours, power_draw_rapl_kw)
if power_draw_rapl_kw if power_draw_rapl_kw
else np.array([0]) else None
) )
total_power_per_timestep = None
if has_gpu: if has_gpu:
total_power_per_timestep = PUE * (kw_hr_nvidia + kw_hr_rapl) total_power_per_timestep = PUE * (kw_hr_nvidia + kw_hr_rapl)
else: else:
if kw_hr_rapl:
total_power_per_timestep = PUE * (kw_hr_rapl) total_power_per_timestep = PUE * (kw_hr_rapl)
total_power = total_power_per_timestep.sum()
realtime_carbon = None realtime_carbon = None
total_power = None
estimated_carbon_impact_grams = None
if "realtime_carbon_intensity" in df: if "realtime_carbon_intensity" in df:
realtime_carbon = df["realtime_carbon_intensity"] realtime_carbon = df["realtime_carbon_intensity"]
realtime_carbon.loc[len(realtime_carbon)] = realtime_carbon.loc[ realtime_carbon.loc[len(realtime_carbon)] = realtime_carbon.loc[
...@@ -178,28 +181,34 @@ def gather_additional_info(info, logdir): ...@@ -178,28 +181,34 @@ def gather_additional_info(info, logdir):
try: try:
estimated_carbon_impact_grams_per_timestep = np.multiply( estimated_carbon_impact_grams_per_timestep = np.multiply(
total_power_per_timestep, realtime_carbon total_power_per_timestep, realtime_carbon
) ) if total_power_per_timestep else None
except: except:
import pdb import pdb
pdb.set_trace() pdb.set_trace()
estimated_carbon_impact_grams = estimated_carbon_impact_grams_per_timestep.sum() estimated_carbon_impact_grams = estimated_carbon_impact_grams_per_timestep.sum() if estimated_carbon_impact_grams_per_timestep else None
else: else:
if total_power_per_timestep:
total_power = total_power_per_timestep.sum()
estimated_carbon_impact_grams = ( estimated_carbon_impact_grams = (
total_power * info["region_carbon_intensity_estimate"]["carbonIntensity"] total_power * info["region_carbon_intensity_estimate"]["carbonIntensity"])
)
estimated_carbon_impact_kg = estimated_carbon_impact_grams / 1000.0 estimated_carbon_impact_kg = estimated_carbon_impact_grams / 1000.0 if estimated_carbon_impact_grams else None
cpu_hours = cpu_seconds / 3600.0 cpu_hours = cpu_seconds / 3600.0
data = { data = { }
"cpu_hours": cpu_hours,
"estimated_carbon_impact_kg": estimated_carbon_impact_kg, if cpu_hours:
"total_power": total_power, data["cpu_hours"] = cpu_hours
"kw_hr_cpu": kw_hr_rapl.sum(), if estimated_carbon_impact_kg:
"exp_len_hours": exp_len_hours, data["estimated_carbon_impact_kg"] = estimated_carbon_impact_kg
} if total_power:
data["total_power"] = total_power
if kw_hr_rapl:
data["kw_hr_cpu"] = kw_hr_rapl.sum()
if exp_len_hours:
data["exp_len_hours"] = exp_len_hours
if has_gpu: if has_gpu:
# GPU-hours percent utilization * length of time utilized (assumes absolute utliziation) # GPU-hours percent utilization * length of time utilized (assumes absolute utliziation)
......
...@@ -28,7 +28,7 @@ def train(d: str = "cpu", log_dir: str = tempfile.mkdtemp()): ...@@ -28,7 +28,7 @@ def train(d: str = "cpu", log_dir: str = tempfile.mkdtemp()):
w2 = torch.randn(H, D_out, device=device) w2 = torch.randn(H, D_out, device=device)
learning_rate = 1e-6 learning_rate = 1e-6
for t in range(5): for t in range(1000):
# Forward pass: compute predicted y # Forward pass: compute predicted y
h = x.mm(w1) h = x.mm(w1)
h_relu = h.clamp(min=0) h_relu = h.clamp(min=0)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment