diff --git a/.gitignore b/.gitignore index d6fc5341564e225c42664c23384aa4b5a4360437..729b6848085ae0bf766170324e92328a30b00a6d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ **/analysis **/benchmark_models **/benchmark_trajectory_data +**/Pretrained_Model_Trajectory_Data **commit** commit diff --git a/catkin_ws/src/panda_autodynamics/scripts/panda_auto_dynamics_v1.py b/catkin_ws/src/panda_autodynamics/scripts/panda_auto_dynamics_v1.py index c38b68ef1f0b61f539cfafd12cf823f0599508f2..c4c33de3c5f4b40e04d389c2f1db7d375ce3f1f0 100755 --- a/catkin_ws/src/panda_autodynamics/scripts/panda_auto_dynamics_v1.py +++ b/catkin_ws/src/panda_autodynamics/scripts/panda_auto_dynamics_v1.py @@ -191,7 +191,7 @@ def main(): ##########Movement Iteration############## for iter_traj in range(0, args.count): - publish_error_recovery_message() + publish_error_recovery_message() #reset comunication errors with the controller. rospy.sleep(2) #######Set motion params############## diff --git a/docker-compose.yaml b/docker-compose.yaml index 0e6308a2b2148545ae916f42394ce515fea86563..0da578e8c08d35b62aa11e81c84cbe4b50584774 100755 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -168,3 +168,43 @@ services: SWEEP_ID: ${SWEEP_ID} CUDA_DEVICE: ${CUDA_DEVICE} NUM_MODELS: ${NUM_MODELS} + +#########################foundation model############################### + foundation_model2: + build: + context: . + dockerfile: ./dynamics_learning/Dockerfile.foundation_model + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + cpus: '10' + memory: 30G + ipc: host + ulimits: + memlock: + soft: -1 + hard: -1 + stack: + soft: 67108864 + hard: 67108864 + container_name: foundation_model2 # Sets a custom name for the container + stdin_open: true # Keeps the container's standard input open (similar to 'docker run -i') + tty: true # Allocates a pseudo-TTY (like 'docker run -t'), useful for interactive shells + network_mode: host + volumes: + - /home/lgorissen/git/iop/franka_wwl_demonstrator:/app + environment: + COSCINE_API_TOKEN: ${COSCINE_API_TOKEN} + DATASET_TYPE: ${DATASET_TYPE} + WANDB_API_TOKEN: ${WANDB_API_TOKEN} + WANDB_NOTES: ${WANDB_NOTES} + WANDB_ENTITY: ${WANDB_ENTITY} + WANDB_PROJECT: ${WANDB_PROJECT} + ROBOT_UUID: ${ROBOT_UUID} + SWEEP_ID: ${SWEEP_ID} + CUDA_DEVICE: ${CUDA_DEVICE} + NUM_MODELS: ${NUM_MODELS} diff --git a/dynamics_learning/analysis.py b/dynamics_learning/analysis.py new file mode 100644 index 0000000000000000000000000000000000000000..39a6f97caf3831d7ae0b630f14a5eb76f9d6cdce --- /dev/null +++ b/dynamics_learning/analysis.py @@ -0,0 +1,297 @@ +#%% +from pathlib import Path +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from pritty_logger import RichLogger +from rich.progress import track +from scipy.optimize import curve_fit +from sklearn.metrics import r2_score + +# Physical joint limits +q_lim_max_phys = ( + np.array([2.7973, 1.6628, 2.7973, -0.1698, 2.7973, 3.6525, 2.7973]) + 0.1 +) +q_lim_min_phys = ( + np.array([-2.7973, -1.6628, -2.7973, -2.9718, -2.7973, 0.1175, -2.7973]) - 0.1 +) +qd_lim_max_phys = np.array([2.175, 2.175, 2.175, 2.175, 2.61, 2.61, 2.61]) +qd_lim_min_phys = -1 * qd_lim_max_phys +qdd_lim_max_phys = np.array([15, 7.5, 10, 12.5, 15, 20, 20]) +qdd_lim_min_phys = -1 * qdd_lim_max_phys +tau_lim_max_phys = np.array([87, 87, 87, 87, 12, 12, 12]) +tau_lim_min_phys = -1 * tau_lim_max_phys + +physical_limits = { + "q_lim_max_phys": q_lim_max_phys, + "q_lim_min_phys": q_lim_min_phys, + "qd_lim_max_phys": qd_lim_max_phys, + "qd_lim_min_phys": qd_lim_min_phys, + "qdd_lim_max_phys": qdd_lim_max_phys, + "qdd_lim_min_phys": qdd_lim_min_phys, + "tau_lim_max_phys": tau_lim_max_phys, + "tau_lim_min_phys": tau_lim_min_phys, +} + +# Moveit limits +# /opt/ros/noetic/share/franka_description/robots/panda/joint_limits.yaml +# /opt/ros/noetic/share/panda_moveit_config/config/joint_limits.yaml +q_lim_max_moveit = q_lim_max_phys - 0.1 +q_lim_min_moveit = q_lim_min_phys + 0.1 +qd_lim_max_moveit = qd_lim_max_phys +qd_lim_min_moveit = qd_lim_min_phys +qdd_lim_max_moveit = np.array([3.75, 1.875, 2.5, 3.125, 3.75, 5, 5]) +qdd_lim_min_moveit = -1 * qdd_lim_max_moveit +tau_lim_max_moveit = np.array([87, 87, 87, 87, 12, 12, 12]) +tau_lim_min_moveit = -1 * tau_lim_max_phys + +moveit_limits = { + "q_lim_max_moveit": q_lim_max_moveit, + "q_lim_min_moveit": q_lim_min_moveit, + "qd_lim_max_moveit": qd_lim_max_moveit, + "qd_lim_min_moveit": qd_lim_min_moveit, + "qdd_lim_max_moveit": qdd_lim_max_moveit, + "qdd_lim_min_moveit": qdd_lim_min_moveit, + "tau_lim_max_moveit": tau_lim_max_moveit, + "tau_lim_min_moveit": tau_lim_min_moveit, +} + +import csv +def has_more_than_one_line(file_path: str) -> bool: + """Check if the file has more than one line (excluding the header). + + Args: + file_path (str): The path to the file. + + Returns: + bool: True if the file has more than one line (excluding the header), False otherwise. + """ + # Check if the file has more than one line (excluding the header) + with open(file_path, 'r') as file: + reader = csv.reader(file) + # Skip the header + next(reader) + # Check if there are more lines after the header + line_count = sum(1 for _ in reader) + return line_count > 1 + +logger = RichLogger("dynamics_learning-dataset_analysis_logger") + + +def get_file_list(directory: Path, suffix: str) -> list: + """Retrieve a list of files in a directory with a given suffix.""" + return list(sorted([f for f in directory.iterdir() if f.suffix == ".csv" and suffix in f.name])) + + +def validate_and_read_file(file_path: Path) -> np.ndarray: + """Validate that a file has more than one line and read it into a NumPy array.""" + if not has_more_than_one_line(file_path): + logger.warn(f"Skipping {file_path.name} due to insufficient data.") + return np.empty((0,)) + return np.genfromtxt(file_path, dtype=float, delimiter=",") + + +def extract_measurement_data(data: np.ndarray) -> dict: + """Extract measurement-related data from the array.""" + if data.size == 0: + return {} + time_diffs = np.diff(data[1:, 0]) + attained_freq = np.mean(1 / time_diffs) + return { + "t_meas": data[1:, 0].reshape(-1, 1), + "freq": attained_freq, + "duration": data[-1, 0], + "q_meas": data[1:, 1:8], + "qd_meas": data[1:, 8:15], + "tau_meas": data[1:, 15:22], + } + + +def extract_command_data(data: np.ndarray) -> dict: + """Extract command-related data from the array.""" + if data.size == 0: + return {} + return { + "t_command": data[1:, 0:1], + "q_command": data[1:, 1:8], + "qd_command": data[1:, 8:15], + "qdd_command": data[1:, 15:22], + } + + +def analyze_file_pair(meas_file: Path, com_file: Path) -> dict: + """Analyze a pair of measurement and command files.""" + meas_data = validate_and_read_file(meas_file) + com_data = validate_and_read_file(com_file) + + if meas_data.size == 0 or com_data.size == 0: + return {} + + return { + **extract_measurement_data(meas_data), + **extract_command_data(com_data), + } + + +def analyze_trajectories(directory: Path) -> list: + """Analyze all file pairs in the directory.""" + file_list = get_file_list(directory, "meas") + logger.info(f"Found {len(file_list)} measurement files.") + + results = [] + for meas_file in track(file_list, description="Analyzing trajectories..."): + com_file = directory / meas_file.name.replace("meas", "com") + if not com_file.exists(): + logger.warn(f"Missing command file for {meas_file.name}. Skipping...") + continue + result = analyze_file_pair(meas_file, com_file) + if result: + results.append(result) + return results + + +def save_statistics(results: list, output_path: Path): + """Save statistical summaries of the results to a CSV file.""" + durations = [res["duration"] for res in results] + stats = { + "# trajectories": len(durations), + "Duration Sum [s]": np.sum(durations), + "Duration Min [s]": np.min(durations), + "Duration Max [s]": np.max(durations), + "Duration Mean [s]": np.mean(durations), + } + df = pd.DataFrame([stats]) + df.to_csv(output_path, float_format="%.3f") + logger.info(f"Statistics saved to {output_path}") + + +def plot_histogram(data: list, output_path: Path, title: str, xlabel: str): + """Plot and save a histogram for the given data.""" + plt.hist(data, bins=30, density=True, edgecolor="black") + plt.title(title) + plt.xlabel(xlabel) + plt.ylabel("Frequency") + plt.grid(True) + plt.savefig(output_path) + logger.info(f"Histogram saved to {output_path}") + plt.close() + + +def plot_frequency_histogram(results: list, output_path: Path): + """Plot histogram for attained frequencies.""" + frequencies = [res["freq"] for res in results] + plot_histogram(frequencies, output_path, "Frequency Histogram", "Frequency [Hz]") + + +def main(directory: str): + """Main function to run the analysis.""" + dir_path = Path(directory) + output_dir = dir_path / "analysis" + output_dir.mkdir(parents=True, exist_ok=True) + + logger.info(f"Analyzing directory: {dir_path}") + results = analyze_trajectories(dir_path) + + if not results: + logger.warn("No valid trajectories found.") + return + + save_statistics(results, output_dir / "training_trajectories_statistics.csv") + #plot_frequency_histogram(results, output_dir / "frequency_histogram.png") + #plot_histogram(results, output_dir / "training_trajectories_histogram.png", "TITLE", "x") + return results + +def get_qs(results): + qs = {} + for i in range(7): + axis_values = [] + length_per_trajectory = [] + for trajectory in results: + q_commands = np.array(trajectory["q_meas"]) + q_asa = q_commands[:,i] + length_per_trajectory.append(len(q_asa)) + axis_values.append(q_asa) + qs[str(i+1)] = np.hstack(axis_values) + avg_points_per_traj = np.mean(length_per_trajectory) + total_points_per_traj = sum(length_per_trajectory) + return qs, avg_points_per_traj, total_points_per_traj, len(results) + +if __name__ == "__main__": + print("Running analysis...") + #%% +results = main(directory="./f2e72889-c140-4397-809f-fba1b892f17a") +# %% +llt_qs, llt_avgs, llt_totals, llt_num_tras = get_qs(results) + +# %% +results = main(directory="./c9ff52e1-1733-4829-a209-ebd1586a8697") +ita_qs, ita_avgs, ita_totals, ita_num_tras = get_qs(results) +#%% +results = main(directory="./2e60a671-dcc3-4a36-9734-a239c899b57d") +wzl_qs, wzl_avgs, wzl_totals, wzl_num_tras = get_qs(results) + +# %% +print(f""" +\tLLT\tITA\tWZL\tSUM +#TRA\t{llt_num_tras}\t{ita_num_tras}\t{wzl_num_tras}\t{llt_num_tras+ita_num_tras+wzl_num_tras} +AVG\t{int(llt_avgs)}\t{int(ita_avgs)}\t{int(wzl_avgs)}\t{int((llt_avgs+ita_avgs+wzl_avgs)/3)} +TOTAL\t{llt_totals}\t{ita_totals}\t{wzl_totals}\t{llt_totals+ita_totals+wzl_totals} +""") + +# %% +import matplotlib.pyplot as plt +import numpy as np + +plt.rcParams.update({ + 'font.size': 14, # Adjust font size globally + 'axes.titlesize': 14, # Title font size + 'axes.labelsize': 14, # X and Y label font size + 'xtick.labelsize': 14, # X-axis tick labels + 'ytick.labelsize': 14, # Y-axis tick labels + 'legend.fontsize': 14 # Legend font size +}) + +# Example dictionaries (replace with your actual data) +dataset1 = llt_qs +dataset2 = ita_qs +dataset3 = wzl_qs + +# Store datasets in a list +datasets = [dataset1, dataset2, dataset3] +labels = ["Dataset LLT", "Dataset ITA", "Dataset WZL"] +colors = ["blue", "orange", "green"] + +# Create the subplots: 3 rows, 3 columns +fig, axes = plt.subplots(3, 3, figsize=(15, 10)) + +# Plot histograms for each key in the dictionaries +for i in range(7): + row, col = divmod(i, 3) # Determine row and column + for dataset, label, color in zip(datasets, labels, colors): + axes[row, col].hist(dataset[str(i + 1)], bins=30, alpha=1, label=label, color=color, density=True, histtype="step",linewidth=2) + axes[row, col].set_title(f"Axis {i+1}") + axes[row, col].set_xlabel("Joint Position in rad") + axes[row, col].set_ylabel("Density") + axes[row, col].grid(True) + +# Remove the last two subplots (unused space) +for i in range(7, 9): + row, col = divmod(i, 3) + axes[row, col].axis("off") + +# Add the legend in the last subplot space +axes[2, 2].legend( + handles=[plt.Line2D([0], [0], color=color, lw=2) for color in colors], + labels=labels, + loc="center", + fontsize="large" +) +axes[2, 2].axis("off") # Remove axes for the legend space + +# Adjust spacing +fig.tight_layout() + +#plt.show() +plt.savefig("joint_positions_histogram.pdf", format="pdf") + +# %% diff --git a/dynamics_learning/benchmark_number_of_runs.py b/dynamics_learning/benchmark_number_of_runs.py index 3599715501168e0975082604a4f103f2c1ee797b..a55dce56c156b5d8662058ebcbd603995a0be08e 100644 --- a/dynamics_learning/benchmark_number_of_runs.py +++ b/dynamics_learning/benchmark_number_of_runs.py @@ -24,7 +24,7 @@ from keras.models import Sequential logger = RichLogger("dynamics_learning-benchmark_number_of_runs") -THRESHOLD = 50 +THRESHOLD = 0.15 # lower bound MAE due to sensory inaccuracies (Torque in Nm) LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a" ITA_ROBOT_UUID = "c9ff52e1-1733-4829-a209-ebd1586a8697" WZL_ROBOT_UUID = "2e60a671-dcc3-4a36-9734-a239c899b57d" @@ -33,8 +33,8 @@ val_loss = 1000 download_file = False model1 = False model2 = False -model3 = True -model4 = True +model3 = False +model4 = False def prepare_data(directory: Path) -> tuple: @@ -70,20 +70,28 @@ def training_loop( model: Sequential = None, ): global val_loss, runs + val_losses = [] model, history, run, config = train( q_qd_qdd_interpolated_command_input, tau_attained_input, model=model ) - val_loss = float(history.history["val_loss"][-1]) + val_loss = float(min(history.history["val_loss"])) #use smallest validation loss achieved during training + val_losses.append(val_loss) runs += 1 # these values are logged, once a run i finished logger.info(f"runs: {runs}") logger.info(f"val_loss: {val_loss}") + # check if this is the best model so far + if val_loss <= min(val_losses): + best_model = model + + # Check if the threshold has been subceeded - if val_loss < THRESHOLD: + if (val_loss < THRESHOLD) or (runs >= 300): logger.info( - f"Stopping training as val_loss has subceeded the threshold: {THRESHOLD}. The current run will be finished." + f"Stopping training. Validation loss is {val_loss} and {runs} runs where conducted. The current run will be finished." ) + best_model.save(f"/app/dynamics_learning/models/{robot_uuid}/{sweep_id}/{run.id}_native-save.h5") wandb.finish() # Finish the W&B run cleanly # Stop the sweep entirely @@ -92,8 +100,8 @@ def training_loop( parents=True, exist_ok=True ) - file_path = f"/app/dynamics_learning/models/{robot_uuid}/{sweep_id}/{run.id}.h5" - save_model_to_binary_file(model, file_path) + file_path = f"/app/dynamics_learning/models/{robot_uuid}/{sweep_id}/{run.id}_custom-save.h5" + save_model_to_binary_file(best_model, file_path) # send SIGTERM to ensure no further runs are started os.kill(os.getpid(), signal.SIGINT) return runs, val_loss @@ -116,7 +124,7 @@ def train_until_threshold_val_loss( model=model, ) wandb.agent( - sweep_id, training, project=WANDB_PROJECT, entity=WANDB_ENTITY, notes=notes + sweep_id, training, project=WANDB_PROJECT, entity=WANDB_ENTITY ) logger.info(f"""Training concluded. Number of runs: {runs} @@ -126,229 +134,304 @@ Validation Loss: {val_loss} if __name__ == "__main__": - # Remember to set the sweep id as needed: - # g5qxvipa: LLT instance from scratch - # 5vlv6m3t: ITA instance from scratch (used to train LLT instance, not trained using this script) - # 7x5hkf35: Foundation model (used to train LLT instance) - # 42d8t40t: LLT instance based on ITA instance - # fe3gjovo: LLT instance based on ITA instance with known hyper parameters - # 7tglijx8: LLT instance based on foundation model - - # Download Training Data from the server - # if download_file: - # download_resource_content_into_uuid_folders() - # TODO implement max number of trajectories used for benchmark training wandb.login(key=WANDB_API_TOKEN, relogin=True) - ############################################### - ####################Model 1#################### - ############################################### - if model1: - # LLT instance model trained from scratch - robot_uuid = LLT_ROBOT_UUID - directory = Path( - f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}" - ) - # Interpolate Training Data in UUID folders - ( - attained_data, - command_data, - interpolated_command_data, - q_qd_qdd_interpolated_command_input, - tau_attained_input, - ) = prepare_data(directory) - - # ensure that the sweep id is set correctly: g5qxvipa - # assert SWEEP_ID == "g5qxvipa", "Sweep ID is not set correctly. Ensure that the sweep id is set to g5qxvipa" - assert ( - robot_uuid == LLT_ROBOT_UUID - ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" - sweep_id, sweep_config = setup_sweep(create_sweep=True) - - # reset runs counter - runs = 0 - val_loss = 1000 - # Train the model until the threshold validation loss is reached - train_until_threshold_val_loss( - sweep_id=sweep_id, - robot_uuid=robot_uuid, - q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, - tau_attained_input=tau_attained_input, - model=None, - notes="Sweep to train model from scratch. 100 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", - ) - - runs_model1 = runs - val_loss_model1 = val_loss - sweep_id1 = sweep_id - - ############################################### - ####################Model 2#################### - ############################################### - if model2: - # LLT model based on ITA model without known hyperparameters - robot_uuid = LLT_ROBOT_UUID - directory = Path( - f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}" - ) - # Interpolate Training Data in UUID folders - ( - attained_data, - command_data, - interpolated_command_data, - q_qd_qdd_interpolated_command_input, - tau_attained_input, - ) = prepare_data(directory) - # assert SWEEP_ID == "42d8t40t", "Sweep ID is not set correctly. Ensure that the sweep id is set to 42d8t40t" - assert ( - robot_uuid == LLT_ROBOT_UUID - ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" - - sweep_id, sweep_config = setup_sweep(create_sweep=True) - - # reset runs counter - runs = 0 - val_loss = 1000 - - # TODO load ITA model instead of dummy model - model = load_model_from_binary_file( - "/app/dynamics_learning/models/99.99706268310547.h5" - ) - - # Train the model until the threshold validation loss is reached - train_until_threshold_val_loss( - sweep_id=sweep_id, - robot_uuid=robot_uuid, - q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, - tau_attained_input=tau_attained_input, - model=model, - notes="Sweep to train model based on ITA model. 100 Trajectories are avaiulable for training. Training is stoped when the validation loss is below 50.", - ) - - runs_model2 = runs - val_loss_model2 = val_loss - sweep_id2 = sweep_id - - ############################################### - ####################Model 3#################### - ############################################### - if model3: - # LLT model based on ITA model with known hyperparameters - robot_uuid = LLT_ROBOT_UUID - directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}") - # Interpolate Training Data in UUID folders - ( - attained_data, - command_data, - interpolated_command_data, - q_qd_qdd_interpolated_command_input, - tau_attained_input, - ) = prepare_data(directory) - # assert SWEEP_ID == "42d8t40t", "Sweep ID is not set correctly. Ensure that the sweep id is set to 42d8t40t" - assert ( - robot_uuid == LLT_ROBOT_UUID - ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" - - config_data = load_config( - "/app/dynamics_learning/Foundation_Model/models/hyperparameters.json" - ) - - sweep_id, sweep_config = setup_sweep_from_hyperparameters( - config_data=config_data, create_sweep=True - ) - - # reset runs counter - runs = 0 - val_loss = 1000 - - # TODO load ITA model instead of dummy model - model = load_model_from_binary_file( - "/app/dynamics_learning/models/99.99706268310547.h5" - ) - - # Train the model until the threshold validation loss is reached - train_until_threshold_val_loss( - sweep_id=sweep_id, - robot_uuid=robot_uuid, - q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, - tau_attained_input=tau_attained_input, - model=model, - notes="Sweep to train model based on ITA model with known hyperparameters. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", - ) - - runs_model3 = runs - val_loss_model3 = val_loss - sweep_id3 = sweep_id - - # assert ( - # SWEEP_ID == "fe3gjovo" - # ), "Sweep ID is not set correctly. Ensure that the sweep id is set to fe3gjovo" - # assert ( - # robot_uuid == LLT_ROBOT_UUID - # ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT" - - ############################################### - ####################Model 4#################### - ############################################### - if model4: - # LLT model based on foundation model - # assert ( - # SWEEP_ID == "7tglijx8" - # ), "Sweep ID is not set correctly. Ensure that the sweep id is set to 7tglijx8" - # assert ( - # robot_uuid == LLT_ROBOT_UUID - # ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT" - - robot_uuid = LLT_ROBOT_UUID - directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}") - # Interpolate Training Data in UUID folders - ( - attained_data, - command_data, - interpolated_command_data, - q_qd_qdd_interpolated_command_input, - tau_attained_input, - ) = prepare_data(directory) - # assert SWEEP_ID == "42d8t40t", "Sweep ID is not set correctly. Ensure that the sweep id is set to 42d8t40t" - assert ( - robot_uuid == LLT_ROBOT_UUID - ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" - - config_data = load_config( - "/app/dynamics_learning/Foundation_Model/models/hyperparameters.json" - ) - - sweep_id, sweep_config = setup_sweep_from_hyperparameters( - config_data=config_data, create_sweep=True - ) - - # reset runs counter + models = ["LLT_model_scratch", "LLT_model_ITA", "LLT_model_ITA_hyper", "LLT_model_foundation"] + robot_uuid = LLT_ROBOT_UUID + directory = Path( + f"/app/dynamics_learning/Pretrained_Model_Trajectory_Data/train/{robot_uuid}" + + ) + ( + attained_data, + command_data, + interpolated_command_data, + q_qd_qdd_interpolated_command_input, + tau_attained_input, + ) = prepare_data(directory) + for count, value in enumerate(models): + logger.info(f"Training model {count}: {value}") runs = 0 val_loss = 1000 - - model = load_model_from_binary_file( - "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5" - ) - - # Train the model until the threshold validation loss is reached + if count == 0: + continue + model = None + sweep_id, sweep_config = setup_sweep(create_sweep=True) + elif (count == 1): + continue + model = load_model_from_binary_file( + "/app/dynamics_learning/Foundation_Model/models/Instance_model_ITA_2024-11-05_09-30-54_0.2551353871822357.h5" + ) + sweep_id, sweep_config = setup_sweep(create_sweep=True, from_model=True) + elif (count == 2): + model = load_model_from_binary_file( + "/app/dynamics_learning/Foundation_Model/models/Instance_model_ITA_2024-11-05_09-30-54_0.2551353871822357.h5" + ) + config_data = load_config( + "/app/dynamics_learning/Foundation_Model/hyperparameters/hyperparameters_Instance_model_ITA_2024-11-05_09-30-54_0.2551353871822357.json" + ) + logger.info(f"Setting model to \n{model}\nand config_data to \n{config_data}") + sweep_id, sweep_config = setup_sweep_from_hyperparameters( + config_data=config_data, create_sweep=True + ) + elif (count == 3): + model = load_model_from_binary_file( + "/app/dynamics_learning/Foundation_Model/models/Foundation_model_2024-11-04_01-12-00_2.492475748062134.h5" + ) + config_data = load_config( + "/app/dynamics_learning/Foundation_Model/hyperparameters/hyperparameters_Foundation_model_2024-11-04_01-12-00_2.492475748062134.json" + ) + sweep_id, sweep_config = setup_sweep_from_hyperparameters( + config_data=config_data, create_sweep=True + ) + logger.info(f"Setting model to \n{model}\nand sweep_id to \n{sweep_id}\nand sweep_config to \n{sweep_config}") + train_until_threshold_val_loss( sweep_id=sweep_id, robot_uuid=robot_uuid, q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, tau_attained_input=tau_attained_input, - model=model, - notes="Sweep to train model based on foundation model with known hyperparameters. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", + model=model ) - - runs_model4 = runs - val_loss_model4 = val_loss - sweep_id4 = sweep_id - - logger.info(f"""Training concluded. -The first model using sweep {sweep_id1} was trained for {runs_model1} runs and reached a validation loss of {val_loss_model1}. -The second model using sweep {sweep_id2} was trained for {runs_model2} runs and reached a validation loss of {val_loss_model2}. -The third model using sweep {sweep_id3} was trained for {runs_model3} runs and reached a validation loss of {val_loss_model3}. -The fourth model using sweep {sweep_id4} was trained for {runs_model4} runs and reached a validation loss of {val_loss_model4}. -""") - wandb.finish() + logger.info(f"runs:\t{runs}\nval_loss:\t{val_loss}") + + if count==0: + continue + runs_model1 = runs + val_loss_model1 = val_loss + sweep_id1 = sweep_id + elif count==1: + continue + runs_model2 = runs + val_loss_model2 = val_loss + sweep_id2 = sweep_id + elif count==2: + runs_model3 = runs + val_loss_model3 = val_loss + sweep_id3 = sweep_id + elif count==3: + runs_model4 = runs + val_loss_model4 = val_loss + sweep_id4 = sweep_id + + logger.info(f"Benchmark concluded.") + logger.info(f"The first model using sweep {sweep_id1} was trained for {runs_model1} runs and reached a validation loss of {val_loss_model1}.") + logger.info(f"The second model using sweep {sweep_id2} was trained for {runs_model2} runs and reached a validation loss of {val_loss_model2}.") + logger.info(f"The third model using sweep {sweep_id3} was trained for {runs_model3} runs and reached a validation loss of {val_loss_model3}.") + logger.info(f"The fourth model using sweep {sweep_id4} was trained for {runs_model4} runs and reached a validation loss of {val_loss_model4}.") + wandb.finish() + + + + + + +# ############################################### +# ####################Model 1#################### +# ############################################### +# if model1: +# # LLT instance model trained from scratch +# robot_uuid = LLT_ROBOT_UUID +# directory = Path( +# f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}" +# ) +# # Interpolate Training Data in UUID folders +# ( +# attained_data, +# command_data, +# interpolated_command_data, +# q_qd_qdd_interpolated_command_input, +# tau_attained_input, +# ) = prepare_data(directory) + +# # ensure that the sweep id is set correctly: g5qxvipa +# # assert SWEEP_ID == "g5qxvipa", "Sweep ID is not set correctly. Ensure that the sweep id is set to g5qxvipa" +# assert ( +# robot_uuid == LLT_ROBOT_UUID +# ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" +# sweep_id, sweep_config = setup_sweep(create_sweep=True) + +# # reset runs counter +# runs = 0 +# val_loss = 1000 +# # Train the model until the threshold validation loss is reached +# train_until_threshold_val_loss( +# sweep_id=sweep_id, +# robot_uuid=robot_uuid, +# q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, +# tau_attained_input=tau_attained_input, +# model=None, +# notes="Sweep to train model from scratch. 100 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", +# ) + +# runs_model1 = runs +# val_loss_model1 = val_loss +# sweep_id1 = sweep_id + +# ############################################### +# ####################Model 2#################### +# ############################################### +# if model2: +# # LLT model based on ITA model without known hyperparameters +# robot_uuid = LLT_ROBOT_UUID +# directory = Path( +# f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}" +# ) +# # Interpolate Training Data in UUID folders +# ( +# attained_data, +# command_data, +# interpolated_command_data, +# q_qd_qdd_interpolated_command_input, +# tau_attained_input, +# ) = prepare_data(directory) +# # assert SWEEP_ID == "42d8t40t", "Sweep ID is not set correctly. Ensure that the sweep id is set to 42d8t40t" +# assert ( +# robot_uuid == LLT_ROBOT_UUID +# ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" + +# sweep_id, sweep_config = setup_sweep(create_sweep=True) + +# # reset runs counter +# runs = 0 +# val_loss = 1000 + +# model = load_model_from_binary_file( +# "/app/dynamics_learning/models/99.99706268310547.h5" +# ) + +# # Train the model until the threshold validation loss is reached +# train_until_threshold_val_loss( +# sweep_id=sweep_id, +# robot_uuid=robot_uuid, +# q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, +# tau_attained_input=tau_attained_input, +# model=model, +# notes="Sweep to train model based on ITA model. 100 Trajectories are avaiulable for training. Training is stoped when the validation loss is below 50.", +# ) + +# runs_model2 = runs +# val_loss_model2 = val_loss +# sweep_id2 = sweep_id + +# ############################################### +# ####################Model 3#################### +# ############################################### +# if model3: +# # LLT model based on ITA model with known hyperparameters +# robot_uuid = LLT_ROBOT_UUID +# directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}") +# # Interpolate Training Data in UUID folders +# ( +# attained_data, +# command_data, +# interpolated_command_data, +# q_qd_qdd_interpolated_command_input, +# tau_attained_input, +# ) = prepare_data(directory) +# # assert SWEEP_ID == "42d8t40t", "Sweep ID is not set correctly. Ensure that the sweep id is set to 42d8t40t" +# assert ( +# robot_uuid == LLT_ROBOT_UUID +# ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" + +# config_data = load_config( +# "/app/dynamics_learning/Foundation_Model/models/hyperparameters.json" +# ) + +# sweep_id, sweep_config = setup_sweep_from_hyperparameters( +# config_data=config_data, create_sweep=True +# ) + +# # reset runs counter +# runs = 0 +# val_loss = 1000 + +# model = load_model_from_binary_file( +# "/app/dynamics_learning/models/99.99706268310547.h5" +# ) + +# # Train the model until the threshold validation loss is reached +# train_until_threshold_val_loss( +# sweep_id=sweep_id, +# robot_uuid=robot_uuid, +# q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, +# tau_attained_input=tau_attained_input, +# model=model, +# notes="Sweep to train model based on ITA model with known hyperparameters. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", +# ) + +# runs_model3 = runs +# val_loss_model3 = val_loss +# sweep_id3 = sweep_id + +# # assert ( +# # SWEEP_ID == "fe3gjovo" +# # ), "Sweep ID is not set correctly. Ensure that the sweep id is set to fe3gjovo" +# # assert ( +# # robot_uuid == LLT_ROBOT_UUID +# # ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT" + +# ############################################### +# ####################Model 4#################### +# ############################################### +# if model4: +# # LLT model based on foundation model +# # assert ( +# # SWEEP_ID == "7tglijx8" +# # ), "Sweep ID is not set correctly. Ensure that the sweep id is set to 7tglijx8" +# # assert ( +# # robot_uuid == LLT_ROBOT_UUID +# # ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT" + +# robot_uuid = LLT_ROBOT_UUID +# directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}") +# # Interpolate Training Data in UUID folders +# ( +# attained_data, +# command_data, +# interpolated_command_data, +# q_qd_qdd_interpolated_command_input, +# tau_attained_input, +# ) = prepare_data(directory) +# # assert SWEEP_ID == "42d8t40t", "Sweep ID is not set correctly. Ensure that the sweep id is set to 42d8t40t" +# assert ( +# robot_uuid == LLT_ROBOT_UUID +# ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" + +# config_data = load_config( +# "/app/dynamics_learning/Foundation_Model/models/hyperparameters.json" +# ) + +# sweep_id, sweep_config = setup_sweep_from_hyperparameters( +# config_data=config_data, create_sweep=True +# ) + +# # reset runs counter +# runs = 0 +# val_loss = 1000 + +# model = load_model_from_binary_file( +# "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5" +# ) + +# # Train the model until the threshold validation loss is reached +# train_until_threshold_val_loss( +# sweep_id=sweep_id, +# robot_uuid=robot_uuid, +# q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, +# tau_attained_input=tau_attained_input, +# model=model, +# notes="Sweep to train model based on foundation model with known hyperparameters. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", +# ) + +# runs_model4 = runs +# val_loss_model4 = val_loss +# sweep_id4 = sweep_id + +# logger.info(f"""Training concluded. +# The first model using sweep {sweep_id1} was trained for {runs_model1} runs and reached a validation loss of {val_loss_model1}. +# The second model using sweep {sweep_id2} was trained for {runs_model2} runs and reached a validation loss of {val_loss_model2}. +# The third model using sweep {sweep_id3} was trained for {runs_model3} runs and reached a validation loss of {val_loss_model3}. +# The fourth model using sweep {sweep_id4} was trained for {runs_model4} runs and reached a validation loss of {val_loss_model4}. +# """) diff --git a/dynamics_learning/dynamics_learning/environment.py b/dynamics_learning/dynamics_learning/environment.py index 8de427351e6ad4015460410cfdfdc7c0fd4cece7..3dc15513c5d10412d29f900abc2a1e8e025622a3 100644 --- a/dynamics_learning/dynamics_learning/environment.py +++ b/dynamics_learning/dynamics_learning/environment.py @@ -83,11 +83,11 @@ except (EnvironmentError, ValueError) as e: logger.log(e, "warning") logger.log(f"Setting NUM_MODELS to {NUM_MODELS}") -# Suppress CUDA and cuDNN logs +# # Suppress CUDA and cuDNN logs try: if "TF_CPP_MIN_LOG_LEVEL" not in os.environ: os.environ["TF_CPP_MIN_LOG_LEVEL"] = ( - "3" # Suppress TensorFlow logs (0 = all logs, 1 = INFO, 2 = WARNING, 3 = ERROR) + "2" # Suppress TensorFlow logs (0 = all logs, 1 = INFO, 2 = WARNING, 3 = ERROR) ) logger.info("Suppressing TensorFlow logs...") else: @@ -106,16 +106,16 @@ except Exception as e: logger.error(f"Error setting CUDA_DEVICE_ORDER: {e}") # Set CUDA visible devices to 1 as 0 will run continously in the beginning. -try: - if "CUDA_VISIBLE_DEVICES" not in os.environ: - os.environ["CUDA_VISIBLE_DEVICES"] = str( - CUDA_DEVICE - ) # Use only the first GPU, if you have multiple GPUs - logger.info(f"Setting CUDA_VISIBLE_DEVICES to {str(CUDA_DEVICE)}...") - else: - logger.debug("CUDA_VISIBLE_DEVICES is already set.") -except Exception as e: - logger.error(f"Error setting CUDA_VISIBLE_DEVICES: {e}") +# try: +# if "CUDA_VISIBLE_DEVICES" not in os.environ: +# os.environ["CUDA_VISIBLE_DEVICES"] = str( +# CUDA_DEVICE +# ) # Use only the first GPU, if you have multiple GPUs +# logger.info(f"Setting CUDA_VISIBLE_DEVICES to {str(CUDA_DEVICE)}...") +# else: +# logger.debug("CUDA_VISIBLE_DEVICES is already set.") +# except Exception as e: +# logger.error(f"Error setting CUDA_VISIBLE_DEVICES: {e}") # Allow GPU memory growth try: diff --git a/dynamics_learning/dynamics_learning/preprocessing/dataset_analysis.py b/dynamics_learning/dynamics_learning/preprocessing/dataset_analysis.py index 1c8775c1a5867086bfb83d97d7e583ff5c9079aa..830e064b0b5509a825233078239f34a71d0f24cd 100644 --- a/dynamics_learning/dynamics_learning/preprocessing/dataset_analysis.py +++ b/dynamics_learning/dynamics_learning/preprocessing/dataset_analysis.py @@ -98,7 +98,7 @@ def analyze(directory: str) -> Tuple[AttainedDataDict, CommandDataDict]: directory (str): The directory containing the dataset. Returns: - Tuple[AttainedDataDict, CommandDataDict]: A tuple containing the attained and command data dictionaries. + Tuple[AttainedDataDict, CommandDataDict]: A tuple containing the attained and command data dictionaries. Data is in unaltered SI Units. - AttainedDataDict: A dictionary containing the attained data: - t_attained (np.ndarray): The time data for the attained data. diff --git a/dynamics_learning/dynamics_learning/preprocessing/trajectory_interpolation.py b/dynamics_learning/dynamics_learning/preprocessing/trajectory_interpolation.py index 7fc0b73d4c7833a674701f4c35319ea5206dbe6d..4df7e37d6b7959294e52e9bbf939baedd3c7bc18 100644 --- a/dynamics_learning/dynamics_learning/preprocessing/trajectory_interpolation.py +++ b/dynamics_learning/dynamics_learning/preprocessing/trajectory_interpolation.py @@ -22,12 +22,19 @@ def normalize_data( """ Normalize the data using given min and max values for each parameter. + This function applies min-max normalization or scaling to the data, transforming it to a specified range (min to max, which defaults to -1 to 1). The terms "normalization" and "scaling" can be used interchangeably in this context, as it involves both: + + Normalization: It adjusts the data to fit within the new specified range. + Scaling: Specifically, it linearly scales the data points between min and max, as defined by the provided minimum (min_vals) and maximum (max_vals) values for each parameter. + + In summary, this function both normalizes and scales the data to the defined range. + Args: data (np.ndarray): The data to normalize. min_vals (List[float]): The minimum values for each parameter. max_vals (List[float]): The maximum values for each parameter. - min (float): The new minimum value for normalization. - max (float): The new maximum value for normalization. + min (float): The new minimum value for normalization. Defaults to -1. + max (float): The new maximum value for normalization. Defaults to -1. Returns: np.ndarray: The normalized data. @@ -147,7 +154,7 @@ def interpolate( not has_more_than_one_line(str(filename2))): logger.warn(f"Skipping {filename} and {filename2} because one or both have 1 or fewer lines (excluding header).") continue - interpolated_data = process_file(data_input_path, str(filename)) + interpolated_data = process_file(data_input_path, str(filename)) # Process the file and normalize it. t_interpolated_command = np.concatenate( (t_interpolated_command, interpolated_data[:, 0:1]) ) diff --git a/dynamics_learning/dynamics_learning/sweep/setup.py b/dynamics_learning/dynamics_learning/sweep/setup.py index 6336cb372b287ce9e353acf7819237f6d3736a23..be4f5097aec67033e76d8710c725fb138060406d 100644 --- a/dynamics_learning/dynamics_learning/sweep/setup.py +++ b/dynamics_learning/dynamics_learning/sweep/setup.py @@ -15,6 +15,7 @@ from dynamics_learning.environment import ( ) from dynamics_learning.sweep.sweep_config import ( sweep_config, + sweep_config_from_model, sweep_config_from_hyperparameters, ) @@ -22,7 +23,7 @@ from dynamics_learning.sweep.sweep_config import ( logger = RichLogger("dynamics_learning-sweep_setup_logger") -def setup_sweep(create_sweep: bool = False) -> tuple[str, dict[str, Any]]: +def setup_sweep(create_sweep: bool = False, from_model: bool = False) -> tuple[str, dict[str, Any]]: """Sets up a Sweep from the given sweep_config. Args: @@ -34,16 +35,18 @@ def setup_sweep(create_sweep: bool = False) -> tuple[str, dict[str, Any]]: global SWEEP_ID if create_sweep: SWEEP_ID = None - if not SWEEP_ID: + if not SWEEP_ID and from_model: + logger.log("Running the sweep with this config:") + logger.log(sweep_config_from_model) + sweep_id = wandb.sweep(sweep_config, project=WANDB_PROJECT, entity=WANDB_ENTITY) + elif not SWEEP_ID: logger.log("Running the sweep with this config:") logger.log(sweep_config) sweep_id = wandb.sweep(sweep_config, project=WANDB_PROJECT, entity=WANDB_ENTITY) else: sweep_id = SWEEP_ID - sweep_config["sweep_id"] = sweep_id sweep_config["project"] = WANDB_PROJECT sweep_config["entity"] = WANDB_ENTITY - sweep_config["notes"] = WANDB_NOTES logger.log(f"Running sweep_id {sweep_id}") return sweep_id, sweep_config @@ -74,11 +77,9 @@ def setup_sweep_from_hyperparameters( "max": min(config_data["learning_rate"] * 1.5, 0.9), }, "window_size": {"value": config_data["window_size"]}, - "batch_size": {"value": config_data["batch_size"]}, + "batch_size": {"values": [1024, 2048, 4096]}, "units": { - "distribution": "int_uniform", - "min": max(1, config_data["units"] - 10), - "max": min(config_data["units"] + 10, 100), + "value": config_data["units"] }, "dropout": { "distribution": "log_uniform_values", @@ -86,11 +87,9 @@ def setup_sweep_from_hyperparameters( "max": min(config_data["dropout"] * 1.5, 1), }, "layers": { - "distribution": "int_uniform", - "min": max(1, config_data["layers"] - 10), - "max": min(config_data["layers"] + 10, 100), + "value": config_data["layers"] }, # {"value": 10}, - "epochs": {"value": config_data["epochs"]}, + "epochs": {"value": 1000}, } sweep_config_from_hyperparameters["parameters"] = ( parameters_dict_from_hyperparameters @@ -108,9 +107,7 @@ def setup_sweep_from_hyperparameters( ) else: sweep_id = SWEEP_ID - sweep_config_from_hyperparameters["sweep_id"] = sweep_id sweep_config_from_hyperparameters["project"] = WANDB_PROJECT sweep_config_from_hyperparameters["entity"] = WANDB_ENTITY - sweep_config_from_hyperparameters["notes"] = WANDB_NOTES logger.log(f"Running sweep_id {sweep_id}") return sweep_id, sweep_config_from_hyperparameters diff --git a/dynamics_learning/dynamics_learning/sweep/sweep_config.py b/dynamics_learning/dynamics_learning/sweep/sweep_config.py index b913243f20b3388f11ad60069e3ba69a79f533ed..b5e643aae85306cac067a9ecf34a7ed2a92f9c79 100644 --- a/dynamics_learning/dynamics_learning/sweep/sweep_config.py +++ b/dynamics_learning/dynamics_learning/sweep/sweep_config.py @@ -26,12 +26,31 @@ parameters_dict = { "batch_size": {"values": [1024, 2048, 4096]}, "units": {"distribution": "int_uniform", "min": 1, "max": 100}, "dropout": {"distribution": "log_uniform_values", "min": 1e-5, "max": 1}, - "layers": {"distribution": "int_uniform", "min": 1, "max": 100}, # {"value": 10}, - "epochs": {"value": 2}, + "layers": {"distribution": "int_uniform", "min": 1, "max": 10}, # {"value": 10}, + "epochs": {"value": 1000}, } sweep_config["parameters"] = parameters_dict +sweep_config_from_model = {"method": "bayes"} +sweep_config_from_model["metric"] = metric +sweep_config_from_model["early_terminate"] = early_terminate + +# values specified here as value are not used, as they are defined through the model architecture. +parameters_dict = { + "optimizer": {"value": "adam"}, + "clipnorm": {"distribution": "log_uniform_values", "min": 1, "max": 10}, + "learning_rate": {"distribution": "log_uniform_values", "min": 1e-7, "max": 0.9}, + "window_size": {"value": 3}, + "batch_size": {"values": [1024, 2048, 4096]}, + "units": {"value": 100}, + "dropout": {"value": 1}, + "layers": {"value": 10}, + "epochs": {"value": 1000}, +} + +sweep_config_from_model["parameters"] = parameters_dict + sweep_config_from_hyperparameters = {"method": "bayes"} sweep_config_from_hyperparameters["metric"] = metric sweep_config_from_hyperparameters["early_terminate"] = early_terminate diff --git a/dynamics_learning/dynamics_learning/training/__init__.py b/dynamics_learning/dynamics_learning/training/__init__.py index f0286f9a3a0b00d35fa08121b317b5c50468c484..46c16671631b11a8e6af523a761a832c237d1baa 100644 --- a/dynamics_learning/dynamics_learning/training/__init__.py +++ b/dynamics_learning/dynamics_learning/training/__init__.py @@ -5,16 +5,17 @@ # import json from pathlib import Path from typing import Dict, Tuple +from datetime import datetime import tensorflow as tf from keras import optimizers from keras.callbacks import Callback from keras.layers import LSTM, Dense, Dropout -from keras.losses import MeanSquaredError -from keras.metrics import Accuracy, KLDivergence, MeanAbsolutePercentageError +from keras.losses import MeanAbsoluteError +#from keras.metrics import MeanAbsoluteError from keras.models import Sequential from pritty_logger import RichLogger -from tensorflow.keras.callbacks import History +from tensorflow.keras.callbacks import History, EarlyStopping from tensorflow.keras.models import Model # , Sequential import tensorflow_text as tf_text @@ -79,8 +80,8 @@ def make_model(config: dict, X_train: tf.Tensor, Y_train: tf.Tensor) -> Sequenti opt = optimizers.Adam(learning_rate=config.learning_rate, clipvalue=config.clipnorm) model.compile( optimizer=opt, - loss=MeanSquaredError(), - metrics=[Accuracy(), KLDivergence(), MeanAbsolutePercentageError()], + loss=MeanAbsoluteError(), + #metrics=[MeanAbsoluteError()], run_eagerly=True, ) model.build(X_train.shape) @@ -91,6 +92,14 @@ def make_model(config: dict, X_train: tf.Tensor, Y_train: tf.Tensor) -> Sequenti class Metrics(Callback): def on_epoch_end(self, batch, logs={}): return None + +early_stopping = EarlyStopping( + monitor="val_loss", # Metric to monitor + patience=15, # Number of epochs with no improvement after which training stops + min_delta=101.71/10000, # Minimum change to qualify as improvement: 1/100 percent of the maximum possible MAE + mode="min", # Mode for minimizing the monitored metric + verbose=1 +) def train( @@ -118,7 +127,6 @@ def train( # Take window size from the loaded model's input shape model_input_shape = model.layers[0].input_shape window_size = model_input_shape[1] - batch_size = model_input_shape[0] else: # Use the window size defined in config if no model is loaded model_input_shape = ( @@ -127,7 +135,7 @@ def train( q_qd_qdd_interpolated_command_input.shape[-1], ) window_size = config.window_size - batch_size = config.batch_size + batch_size = config.batch_size # Split Dataset to train and validation logger.info("Splitting the dataset into train and validation sets.") @@ -141,7 +149,7 @@ def train( x_train, x_val = tf.split( q_qd_qdd_interpolated_command_input, [train_size, val_size], axis=0 - ) + ) # this is still min-mex normalized logger.info(f"x_train shape: {x_train.shape}") logger.info(f"tau_attained_input shape: {tau_attained_input.shape}") _, y_train, _, y_val = tf.split( @@ -153,7 +161,7 @@ def train( val_size - window_size + 1, ], 0, - ) + ) # this is still non-normalized logger.info(f"y_train shape: {y_train.shape}") # Sliding Window for input interpolated command joint data @@ -164,7 +172,7 @@ def train( # if a model is passed, use it, else create a new model if model: - model = model + new_model = model logger.info(f"model shape is {model.layers[0].input_shape}") # Define how many layers you want to keep (e.g., exclude last 2 layers) # Get the number of layers in the model @@ -176,81 +184,134 @@ def train( # Define how many layers you want to remove LAYERS_TO_REMOVE = 5 - # Check if you can remove five layers; if not, adjust to delete fewer layers - layers_to_remove = min(LAYERS_TO_REMOVE, total_layers - 2) + if (len(model.layers)) > LAYERS_TO_REMOVE: + for layer in model.layers: + layer.trainable = False - # If it's a Sequential model - if isinstance(model, Sequential): - # Remove the specified number of layers - for _ in range(layers_to_remove): - model.pop() - new_model = model + # Unfreeze only the last 5 layers + for layer in model.layers[-5:]: + layer.trainable = True else: - # If it's a Functional model - # Truncate the model to remove the specified number of layers - new_model = Model( - inputs=model.input, - outputs=model.layers[-(layers_to_remove + 1)].output, - ) - - for layer in new_model.layers: - layer.trainable = False - - # Add three LSTM layers, a dropout layer, and a dense layer - model.add( - LSTM( - units=config.units, - return_sequences=True, - input_shape=(x_train.shape[1], x_train.shape[2]), - activation="tanh", - recurrent_activation="sigmoid", - recurrent_dropout=0, - unroll=False, - use_bias=True, - name="LSTM_1", - ) - ) - model.add( - LSTM( - units=config.units, - return_sequences=True, - input_shape=(x_train.shape[1], x_train.shape[2]), - activation="tanh", - recurrent_activation="sigmoid", - recurrent_dropout=0, - unroll=False, - use_bias=True, - name="LSTM_2", - ) - ) - model.add(LSTM(units=config.units, return_sequences=False, name="LSTM_3")) - model.add(Dropout(config.dropout)) - model.add(Dense(units=y_train.shape[1])) + # If there are 5 or fewer layers, make all layers trainable + for layer in model.layers: + layer.trainable = True + + # # Check if you can remove five layers; if not, adjust to delete fewer layers + # layers_to_remove = min(LAYERS_TO_REMOVE, total_layers - 2) + + # # If it's a Sequential model + # if isinstance(model, Sequential): + # # Remove the specified number of layers + # for _ in range(layers_to_remove): + # model.pop() + # new_model = model + # else: + # # If it's a Functional model + # # Truncate the model to remove the specified number of layers + # new_model = Model( + # inputs=model.input, + # outputs=model.layers[-(layers_to_remove + 1)].output, + # ) + + # for layer in new_model.layers: + # layer.trainable = True + + # if layers_to_remove == LAYERS_TO_REMOVE: + # # Add three LSTM layers, a dropout layer, and a dense layer + # new_model.add( + # LSTM( + # units=config.units, + # return_sequences=True, + # input_shape=(x_train.shape[1], x_train.shape[2]), + # activation="tanh", + # recurrent_activation="sigmoid", + # recurrent_dropout=0, + # unroll=False, + # use_bias=True, + # name="LSTM_1", + # ) + # ) + # new_model.add( + # LSTM( + # units=config.units, + # return_sequences=True, + # input_shape=(x_train.shape[1], x_train.shape[2]), + # activation="tanh", + # recurrent_activation="sigmoid", + # recurrent_dropout=0, + # unroll=False, + # use_bias=True, + # name="LSTM_2", + # ) + # ) + # new_model.add(LSTM(units=config.units, return_sequences=False, name="LSTM_3")) + # new_model.add(Dropout(config.dropout)) + # new_model.add(Dense(units=y_train.shape[1])) # Compile the model opt = optimizers.Adam( learning_rate=config.learning_rate, clipvalue=config.clipnorm ) - model.compile( + # In Keras, the Mean Absolute Error (MAE) loss is computed by averaging the absolute differences between the actual values (y) and the predicted values (y_pred). Given that you have 2000x7 tensors for both y and y_pred, let's break down how Keras would calculate the MAE based on the linked definition: + + # Element-wise Absolute Difference: Keras computes the absolute difference between each element in y and y_pred, resulting in a 2000x7 tensor of absolute errors. + # abs_error=∣y−y_pred∣ + # abs_error=∣y−y_pred∣ + + # Mean Across Features: Next, Keras takes the mean of the absolute errors along each row (the 7 features per sample), resulting in a 2000-element vector where each element represents the average error per feature for each sample. + # mean_error_per_sample=17∑j=17abs_error[i,j] + # mean_error_per_sample=71j=1∑7abs_error[i,j] + + # Mean Across All Samples: Finally, it takes the mean of these per-sample errors across all 2000 samples to get a single scalar value representing the Mean Absolute Error across the entire batch. + # MAE=12000∑i=12000mean_error_per_sample[i] + # MAE=20001i=1∑2000mean_error_per_sample[i] + + # Summary + + # The MAE loss in this case is a single scalar value, calculated as the average of the mean absolute errors per feature across all samples in your 2000x7 tensors. + + # THAT MEANS THAT + +# To determine the maximum possible Mean Absolute Error (MAE) for your y values with the specified limits, consider the scenario where the predicted values (y_pred) deviate from the true values (y) by the maximum possible amount in each feature. + +# Maximum Possible Error per Feature: +# For the first four features (with limits [−80,80][−80,80]), the maximum absolute difference is ∣80−(−80)∣=160∣80−(−80)∣=160. +# For the last three features (with limits [−12,12][−12,12]), the maximum absolute difference is ∣12−(−12)∣=24∣12−(−12)∣=24. + +# Average Error per Sample: +# The maximum MAE per sample would be the average of the maximum errors across all features. +# max_error_per_sample=4⋅160+3⋅247=640+727=7127=101.71 +# max_error_per_sample=74⋅160+3⋅24=7640+72=7712=101.71 + +# Maximum MAE Across All Samples: +# Since MAE is averaged across samples, if every sample in the 2000x7 tensor had this maximum error, the maximum MAE for the entire dataset would still be 101.71101.71. + +# Conclusion + +# The theoretical maximum MAE for your dataset, given these limits, is 101.71. This value provides a useful perspective for interpreting your MAE values in terms of the worst-case scenario for error magnitude across all features. + + new_model.compile( optimizer=opt, - loss=MeanSquaredError(), - metrics=[Accuracy(), KLDivergence(), MeanAbsolutePercentageError()], + loss=MeanAbsoluteError(), + #metrics=[MeanAbsoluteError()], run_eagerly=True, ) - model.build(x_train.shape) + new_model.build(x_train.shape) + logger.info(f"{new_model.summary()}") else: - model = make_model(config, x_train, y_train) + logger.info("Creating a new model.") + new_model = make_model(config, x_train, y_train) from wandb.integration.keras import WandbCallback - history = model.fit( + history = new_model.fit( x_train, y_train, epochs=config.epochs, shuffle=True, batch_size=batch_size, - verbose=2, + verbose=1, validation_data=(x_val, y_val), callbacks=[ Metrics(), @@ -260,18 +321,24 @@ def train( save_model=False, validation_data=(x_val, y_val), ), + early_stopping ], ) - return model, history, run, config + return new_model, history, run, config def upload(model: Sequential, history: History, config: Dict): - decision_metric = float(history.history["val_loss"][-1]) + decision_metric = float(min(history.history["val_loss"])) # save the model in a binary model so that it can be uploaded to coscine + now = datetime.now() save_model_to_binary_file( - model, "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5" + model, + f"/app/dynamics_learning/Foundation_Model/models/Foundation_model_{now.strftime('%Y-%m-%d_%H-%M-%S')}_{decision_metric}.h5", + ) + save_model_to_binary_file( + model, f"/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5" ) # TODO implement another naming schema, as soon as a custom application profiles for the models is available in coscine. # get resource contents @@ -296,117 +363,279 @@ def upload(model: Sequential, history: History, config: Dict): "/app/dynamics_learning/Foundation_Model/hyperparameters/hyperparameters.json" ) save_config(config, config_local_file_name) + config_local_file_name = ( + f"/app/dynamics_learning/Foundation_Model/hyperparameters/hyperparameters_{now.strftime('%Y-%m-%d_%H-%M-%S')}_{decision_metric}.json" + ) + save_config(config, config_local_file_name) - logger.info("Checking if a model exists in the resource.") - # if no model exits, upload the current model to coscine - if (_resource_files == []) or (not any("models" in s for s in _resource_files)): - logger.info("The resource is empty.") - update_existing_file( - "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5", - metadataform, - resource, - ) - metadataform = resource.metadata_form() - metadataform["Title"] = "hyperparameters" - update_existing_file(config_local_file_name, metadataform, resource) - metadataform = resource.metadata_form() - for _path in Path( - "/app/dynamics_learning/Trajectory Data/train/analysis" - ).rglob("*"): - if _path.is_file(): - coscine_path = "train/analysis/" + _path.name - # logger.info("Deleting old analysis file.") - metadataform["Title"] = _path.name - # logger.warn(file) - # Check if the file already exists and delete it - for file in resource.files(recursive=True): - if _path.name in file.name: - logger.info( - f"File found. Deleting it:\nFile name\t{file.name}\ncoscine path\t{coscine_path}\n_path.name\t{_path.name}" - ) - resource.file(coscine_path).delete() - # upload a new file - with open(_path, "rb") as file: - logger.info("Uploading file") - resource.upload( - coscine_path, # The file path in the coscine resource - file.read(), # the binary file contents - metadataform, # the metadataobject - ) - logger.info("Done updating analysis file.") - return model - - # else check if the current model outperforms the existing model and if so upload the current model, return the model from the function - else: - substrings = [ - "Foundation_model", - "Foundation_model/models", - "Foundation_model/analysis", - "png", - "csv", - "json", - "hyperparam", - ] - lowercase_substrings = [substring.lower() for substring in substrings] - _resource_files, resource_objects = get_resource_content( - resource, path="models/" - ) - logger.log("A model previously existed") - for file in resource_objects: - if "analysis" in file.path or "hyperparameters" in file.path: - continue - title = file.metadata_form()["Title"][0] - lowercase_title = title.lower() - if any(substring in lowercase_title for substring in lowercase_substrings): - continue - metric_value, metric_name = file.metadata_form()["Title"][0].split("_", 1) - logger.log(f"Model in resource has {metric_value} {metric_name}.") - if float(metric_value) >= decision_metric: - logger.log( - "Currently trained model performed better and will be uploaded." - ) - update_existing_file( - "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5", - metadataform, - resource, - ) - metadataform = resource.metadata_form() - metadataform["Title"] = "hyperparameters" - update_existing_file(config_local_file_name, metadataform, resource) - resource = PROJECT.resource("Trajectory Data") - for _path in Path( - "/app/dynamics_learning/Trajectory Data/train/analysis" - ).rglob("*"): - if _path.is_file(): - # file = str(_path) - coscine_path = "train/analysis/" + _path.name - # logger.info("Deleting old analysis file.") - metadataform["Title"] = _path.name - logger.info( - f"seeing if file: {_path} is in resource and needs to be deleted" - ) - # Check if the file already exists and delete it - for file in resource.files(recursive=True): - if (file.name in _path.name) or (_path.name in file.name): - logger.info( - f"File found. Deleting it:\nFile name\t{file.name}\ncoscine path\t{coscine_path}\n_path.name\t{_path.name}" - ) - resource.file(coscine_path).delete() - # upload a new file - with open(_path, "rb") as file: - logger.info(f"updating file {_path}") - resource.upload( - coscine_path, # The file path in the coscine resource - file.read(), # the binary file contents - metadataform, # the metadataobject - ) - logger.info("Done updating analysis file.") - return model - else: - logger.log( - f"The current model has a {decision_metric} {metric_name}. Thus it performs worse and is not uploaded" - ) - return None + + # logger.info("Checking if a model exists in the resource.") + # # if no model exits, upload the current model to coscine + # if (_resource_files == []) or (not any("models" in s for s in _resource_files)): + # logger.info("The resource is empty.") + # update_existing_file( + # "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5", + # metadataform, + # resource, + # ) + # metadataform = resource.metadata_form() + # metadataform["Title"] = "hyperparameters" + # update_existing_file(config_local_file_name, metadataform, resource) + # metadataform = resource.metadata_form() + # for _path in Path( + # "/app/dynamics_learning/Trajectory Data/train/analysis" + # ).rglob("*"): + # if _path.is_file(): + # coscine_path = "train/analysis/" + _path.name + # # logger.info("Deleting old analysis file.") + # metadataform["Title"] = _path.name + # # logger.warn(file) + # # Check if the file already exists and delete it + # for file in resource.files(recursive=True): + # if _path.name in file.name: + # logger.info( + # f"File found. Deleting it:\nFile name\t{file.name}\ncoscine path\t{coscine_path}\n_path.name\t{_path.name}" + # ) + # resource.file(coscine_path).delete() + # # upload a new file + # with open(_path, "rb") as file: + # logger.info("Uploading file") + # resource.upload( + # coscine_path, # The file path in the coscine resource + # file.read(), # the binary file contents + # metadataform, # the metadataobject + # ) + # logger.info("Done updating analysis file.") + # return model + + # # else check if the current model outperforms the existing model and if so upload the current model, return the model from the function + # else: + # substrings = [ + # "Foundation_model", + # "Foundation_model/models", + # "Foundation_model/analysis", + # "png", + # "csv", + # "json", + # "hyperparam", + # ] + # lowercase_substrings = [substring.lower() for substring in substrings] + # _resource_files, resource_objects = get_resource_content( + # resource, path="models/" + # ) + # logger.log("A model previously existed") + # for file in resource_objects: + # if "analysis" in file.path or "hyperparameters" in file.path: + # continue + # title = file.metadata_form()["Title"][0] + # lowercase_title = title.lower() + # if any(substring in lowercase_title for substring in lowercase_substrings): + # continue + # metric_value, metric_name = file.metadata_form()["Title"][0].split("_", 1) + # logger.log(f"Model in resource has {metric_value} {metric_name}.") + # if float(metric_value) >= decision_metric: + # logger.log( + # "Currently trained model performed better and will be uploaded." + # ) + # update_existing_file( + # "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5", + # metadataform, + # resource, + # ) + # metadataform = resource.metadata_form() + # metadataform["Title"] = "hyperparameters" + # update_existing_file(config_local_file_name, metadataform, resource) + # resource = PROJECT.resource("Trajectory Data") + # for _path in Path( + # "/app/dynamics_learning/Trajectory Data/train/analysis" + # ).rglob("*"): + # if _path.is_file(): + # # file = str(_path) + # coscine_path = "train/analysis/" + _path.name + # # logger.info("Deleting old analysis file.") + # metadataform["Title"] = _path.name + # logger.info( + # f"seeing if file: {_path} is in resource and needs to be deleted" + # ) + # # Check if the file already exists and delete it + # for file in resource.files(recursive=True): + # if (file.name in _path.name) or (_path.name in file.name): + # logger.info( + # f"File found. Deleting it:\nFile name\t{file.name}\ncoscine path\t{coscine_path}\n_path.name\t{_path.name}" + # ) + # resource.file(coscine_path).delete() + # # upload a new file + # with open(_path, "rb") as file: + # logger.info(f"updating file {_path}") + # resource.upload( + # coscine_path, # The file path in the coscine resource + # file.read(), # the binary file contents + # metadataform, # the metadataobject + # ) + # logger.info("Done updating analysis file.") + # return model + # else: + # logger.log( + # f"The current model has a {decision_metric} {metric_name}. Thus it performs worse and is not uploaded" + # ) + # return None + return None + + + +def instance_model(model: Sequential, history: History, config: Dict): + decision_metric = float(history.history["val_loss"][-1]) + + # save the model in a binary model so that it can be uploaded to coscine + now = datetime.now() + save_model_to_binary_file( + model, + f"/app/dynamics_learning/Foundation_Model/models/Instance_model_ITA_{now.strftime('%Y-%m-%d_%H-%M-%S')}_{decision_metric}.h5", + ) + save_model_to_binary_file( + model, f"/app/dynamics_learning/Foundation_Model/models/Instance_model_ITA.h5" + ) # TODO implement another naming schema, as soon as a custom application profiles for the models is available in coscine. + + # # get resource contents + # resource = PROJECT.resource("Foundation_Model") + # metadataform = resource.metadata_form() + # metadataform["Title"] = ( + # f"{decision_metric}_val_loss" # TODO implement custom coscine application profile for the models based on MITM base profile + # ) + # logger.info(f"The following meta data is saved\n{metadataform}") + # _resource_files, resource_objects = get_resource_content(resource, path="models/") + + # Set model name + _model_name = "models/Foundation_model.h5" # f"Foundation-Model_test_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}_{decision_metric}.h5" + Path("/app/dynamics_learning/Foundation_Model/models").mkdir( + parents=True, exist_ok=True + ) + _config_coscine_file_name = "hyperparameters/hyperparameters.json" + Path("/app/dynamics_learning/Foundation_Model/hyperparameters").mkdir( + parents=True, exist_ok=True + ) + config_local_file_name = ( + "/app/dynamics_learning/Foundation_Model/hyperparameters/hyperparameters_Instance_model_ITA.json" + ) + save_config(config, config_local_file_name) + + config_local_file_name = ( + f"/app/dynamics_learning/Foundation_Model/hyperparameters/hyperparameters_Instance_model_ITA_{now.strftime('%Y-%m-%d_%H-%M-%S')}_{decision_metric}.json" + ) + save_config(config, config_local_file_name) + + return model + + # logger.info("Checking if a model exists in the resource.") + # # if no model exits, upload the current model to coscine + # if (_resource_files == []) or (not any("models" in s for s in _resource_files)): + # logger.info("The resource is empty.") + # update_existing_file( + # "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5", + # metadataform, + # resource, + # ) + # metadataform = resource.metadata_form() + # metadataform["Title"] = "hyperparameters" + # update_existing_file(config_local_file_name, metadataform, resource) + # metadataform = resource.metadata_form() + # for _path in Path( + # "/app/dynamics_learning/Trajectory Data/train/analysis" + # ).rglob("*"): + # if _path.is_file(): + # coscine_path = "train/analysis/" + _path.name + # # logger.info("Deleting old analysis file.") + # metadataform["Title"] = _path.name + # # logger.warn(file) + # # Check if the file already exists and delete it + # for file in resource.files(recursive=True): + # if _path.name in file.name: + # logger.info( + # f"File found. Deleting it:\nFile name\t{file.name}\ncoscine path\t{coscine_path}\n_path.name\t{_path.name}" + # ) + # resource.file(coscine_path).delete() + # # upload a new file + # with open(_path, "rb") as file: + # logger.info("Uploading file") + # resource.upload( + # coscine_path, # The file path in the coscine resource + # file.read(), # the binary file contents + # metadataform, # the metadataobject + # ) + # logger.info("Done updating analysis file.") + # return model + + # # else check if the current model outperforms the existing model and if so upload the current model, return the model from the function + # else: + # substrings = [ + # "Foundation_model", + # "Foundation_model/models", + # "Foundation_model/analysis", + # "png", + # "csv", + # "json", + # "hyperparam", + # ] + # lowercase_substrings = [substring.lower() for substring in substrings] + # _resource_files, resource_objects = get_resource_content( + # resource, path="models/" + # ) + # logger.log("A model previously existed") + # for file in resource_objects: + # if "analysis" in file.path or "hyperparameters" in file.path: + # continue + # title = file.metadata_form()["Title"][0] + # lowercase_title = title.lower() + # if any(substring in lowercase_title for substring in lowercase_substrings): + # continue + # metric_value, metric_name = file.metadata_form()["Title"][0].split("_", 1) + # logger.log(f"Model in resource has {metric_value} {metric_name}.") + # if float(metric_value) >= decision_metric: + # logger.log( + # "Currently trained model performed better and will be uploaded." + # ) + # update_existing_file( + # "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5", + # metadataform, + # resource, + # ) + # metadataform = resource.metadata_form() + # metadataform["Title"] = "hyperparameters" + # update_existing_file(config_local_file_name, metadataform, resource) + # resource = PROJECT.resource("Trajectory Data") + # for _path in Path( + # "/app/dynamics_learning/Trajectory Data/train/analysis" + # ).rglob("*"): + # if _path.is_file(): + # # file = str(_path) + # coscine_path = "train/analysis/" + _path.name + # # logger.info("Deleting old analysis file.") + # metadataform["Title"] = _path.name + # logger.info( + # f"seeing if file: {_path} is in resource and needs to be deleted" + # ) + # # Check if the file already exists and delete it + # for file in resource.files(recursive=True): + # if (file.name in _path.name) or (_path.name in file.name): + # logger.info( + # f"File found. Deleting it:\nFile name\t{file.name}\ncoscine path\t{coscine_path}\n_path.name\t{_path.name}" + # ) + # resource.file(coscine_path).delete() + # # upload a new file + # with open(_path, "rb") as file: + # logger.info(f"updating file {_path}") + # resource.upload( + # coscine_path, # The file path in the coscine resource + # file.read(), # the binary file contents + # metadataform, # the metadataobject + # ) + # logger.info("Done updating analysis file.") + # return model + # else: + # logger.log( + # f"The current model has a {decision_metric} {metric_name}. Thus it performs worse and is not uploaded" + # ) + # return None if __name__ == "__main__": diff --git a/dynamics_learning/foundation_model.py b/dynamics_learning/foundation_model.py index 9603c04b22fdad75b57f396bd7e52aa2f0cd5761..d6596bb6e96c2b7b46d9318c6e0166a372161620 100644 --- a/dynamics_learning/foundation_model.py +++ b/dynamics_learning/foundation_model.py @@ -55,7 +55,7 @@ if __name__ == "__main__": wandb.login(key=WANDB_API_TOKEN, relogin=True) # check if sweep_id is set, if not create a new sweep - sweep_id, sweep_config = setup_sweep() + sweep_id, sweep_config = setup_sweep(create_sweep=False) def train_save_upload( q_qd_qdd_interpolated_command_input: tf.Tensor, tau_attained_input: tf.Tensor @@ -93,5 +93,5 @@ if __name__ == "__main__": train_save_upload_with_args, project=WANDB_PROJECT, entity=WANDB_ENTITY, - # count=NUM_MODELS, + count=100, ) diff --git a/dynamics_learning/test.py b/dynamics_learning/test.py index 729d9cc9b5238b0492d08dfa86dc0849182f5a7e..eaaf30fafa7efd60b6d5931ef95386a62f6ed45d 100644 --- a/dynamics_learning/test.py +++ b/dynamics_learning/test.py @@ -37,9 +37,13 @@ def download_resource_content_into_uuid_folders(): if file.is_folder: continue logger.info(f"File: {file.name}") - robot_uuid = file.metadata_form()["Robot UUID"][0] - Path(f"./Trajectory Data/train/{robot_uuid}").mkdir(parents=True, exist_ok=True) - file.download(f"./Trajectory Data/train/{robot_uuid}/{file.name}") + try: + robot_uuid = file.metadata_form()["Robot UUID"][0] + Path(f"/app/dynamics_learning/Pretrained_Model_Trajectory_Data/train/{robot_uuid}").mkdir(parents=True, exist_ok=True) + file.download(f"/app/dynamics_learning/Pretrained_Model_Trajectory_Data/train/{robot_uuid}/{file.name}") + except IndexError: + logger.info(f"No Robot UUID found for file {file.name}.") + continue # logger.info(f"Keeping only 50 trajectories per robot.") # delete_files(50, robot_uuid) @@ -83,136 +87,3 @@ def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None: if __name__ == "__main__": download_resource_content_into_uuid_folders() - - # %% - num_trajectories_to_keep = 10 - LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a" - robot_uuid = LLT_ROBOT_UUID - # %% - delete_files(num_trajectories_to_keep, robot_uuid) - - -# %% -from datetime import datetime, timedelta - -root = Path("/app/dynamics_learning/dummy") - - -def create_txt_files(directory: Path = root) -> None: - current_time = datetime.now() - - for index in range(1001): - adjusted_time = current_time + timedelta(seconds=index) - file_name = f"{adjusted_time.strftime('%Y%m%d_%H%M%S')}.txt" - file_path = directory / file_name - with file_path.open("w") as file: - file.write(str(index)) - - -# Run the function to create the files -create_txt_files(root) -# %% -files = [ - str(file) - for file in Path("/app/dynamics_learning/dummy").iterdir() - if file.is_file() and str(file).endswith(".txt") -] -files.sort(reverse=True) -files -# %% -for file in files[10:]: - Path(file).unlink() -files = [ - str(file) - for file in Path("/app/dynamics_learning/dummy").iterdir() - if file.is_file() and str(file).endswith(".txt") -] -files.sort(reverse=True) -files - - -# %% -# %% -def delete_files( - num_trajectories_to_keep: int, - robot_uuid: str = "c9ff52e1-1733-4829-a209-ebd1586a8697", -) -> None: - """Delete files from the training data directory. - - Files are sorted by date and the newest files are kept. - robot_uuid = "c9ff52e1-1733-4829-a209-ebd1586a8697" for ITA - robot_uuid = "f2e72889-c140-4397-809f-fba1b892f17a" for LLT - robot_uuid = "2e60a671-dcc3-4a36-9734-a239c899b57d" for WZL - - Args: - num_trajectories_to_keep (int): Number of trajectories to keep. - robot_uuid (str): Robot UUID. Defaults to ITA. - - Returns: - None: This function does not return anything. - """ - files = [ - str(file) - for file in Path( - f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}" - ).iterdir() - if file.is_file() and str(file).endswith("meas.csv") - ] - files.sort(reverse=True) - for file in files[num_trajectories_to_keep:]: - Path(file).unlink() - try: - file = file.replace("meas.csv", "com.csv") - Path(file).unlink() - except FileNotFoundError: - # logger.info("No com.csv file found.") - pass - try: - file = file.replace("com.csv", "interp_com.csv") - Path(file).unlink() - except FileNotFoundError: - # logger.info("No interp_com.csv file found.") - pass - return None - - -delete_files(100, "c9ff52e1-1733-4829-a209-ebd1586a8697") # ITA -delete_files(100, "f2e72889-c140-4397-809f-fba1b892f17a") # LLT -# %% -import os - - -def check_file_pairs(directory): - # Get all files in the directory - files = os.listdir(directory) - - # Split files into two groups based on their suffix - com_files = set(f[:-8] for f in files if f.endswith("_com.csv")) - meas_files = set(f[:-9] for f in files if f.endswith("_meas.csv")) - - print(len(com_files)) - print(com_files) - print(meas_files) - - # Find unmatched files - unmatched_com = com_files - meas_files - unmatched_meas = meas_files - com_files - - # Report results - if unmatched_com or unmatched_meas: - print("Unmatched files found:") - for name in unmatched_com: - print(f"No matching _meas.csv file for: {name}_com.csv") - for name in unmatched_meas: - print(f"No matching _com.csv file for: {name}_meas.csv") - else: - print("All files are properly paired.") - - -# Example usage -LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a" -ITA_ROBOT_UUID = "c9ff52e1-1733-4829-a209-ebd1586a8697" -directory = f"/app/dynamics_learning/Trajectory Data/train/{ITA_ROBOT_UUID}" -check_file_pairs(directory) - -# %% diff --git a/dynamics_learning/train_instance.py b/dynamics_learning/train_instance.py index 32cf2883c736c5ff9d27dc863ab000212df27502..fe6f87c6004fc609d2812352eb40f5c26e96edf6 100644 --- a/dynamics_learning/train_instance.py +++ b/dynamics_learning/train_instance.py @@ -24,7 +24,7 @@ from dynamics_learning.preprocessing.trajectory_interpolation import interpolate from dynamics_learning.sweep.setup import setup_sweep # from dynamics_learning.data_retrieval import download_resource_content -from dynamics_learning.training import train +from dynamics_learning.training import train, instance_model # Suppress FutureWarning for the specific deprecation warning in pandas warnings.simplefilter(action="ignore", category=FutureWarning) @@ -35,33 +35,44 @@ logger = RichLogger("dynamics_learning-instance_model") if __name__ == "__main__": # download not existing data # local_resource_path = download_resource_content() - # TODO download data from ita / use data from ita - local_resource_path = Path("/app/dynamics_learning/Trajectory Data") + # ITA data: c9ff52e1-1733-4829-a209-ebd1586a8697 + local_resource_path = Path("/app/dynamics_learning/Pretrained_Model_Trajectory_Data/train/c9ff52e1-1733-4829-a209-ebd1586a8697") # preprocess data - attained_data, command_data = analyze(local_resource_path / "train") - interpolated_command_data = interpolate(local_resource_path / "train") + attained_data, command_data = analyze(local_resource_path) + norm_interpolated_command_data = interpolate(local_resource_path) # build input and cross validation tensors interpolated_command_input = np.column_stack( ( - interpolated_command_data["q_interpolated_command"], - interpolated_command_data["qd_interpolated_command"], - interpolated_command_data["qdd_interpolated_command"], + norm_interpolated_command_data["q_interpolated_command"], + norm_interpolated_command_data["qd_interpolated_command"], + norm_interpolated_command_data["qdd_interpolated_command"], ) ) q_qd_qdd_interpolated_command_input = tf.convert_to_tensor( interpolated_command_input ) + # min-max normalized q_qd_qdd_interpolated_command_input. This is done, as the joint limits are not zero centered. + tau_attained_input = tf.convert_to_tensor(attained_data["tau_attained"]) + # not normalized tau_attained_input + # not normalized so that the actual values are being predicted and thus can be compared to conventional inverse dynamics computation. + # we could scale it back using panda limits, if we want to compute a normalized loss rather than a non-normalized loss. wandb.login(key=WANDB_API_TOKEN, relogin=True) # check if sweep_id is set, if not create a new sweep - sweep_id, sweep_config = setup_sweep() + sweep_id, sweep_config = setup_sweep(create_sweep=False) def train_save_upload( q_qd_qdd_interpolated_command_input: tf.Tensor, tau_attained_input: tf.Tensor ): + """_summary_ + + Args: + q_qd_qdd_interpolated_command_input (tf.Tensor): normalized interpolated command joint positions and derivates. + tau_attained_input (tf.Tensor): Non normalized attained torques. + """ model, history, _run, config = train( q_qd_qdd_interpolated_command_input, tau_attained_input ) @@ -69,11 +80,11 @@ if __name__ == "__main__": "\n=====================================\nModel trained\n=====================================\n" ) # TODO save model and hyperparameters locally - # model = upload(model, history, config) - # logger.info( - # "\n=====================================\nModel uploaded\n=====================================\n" - # ) - # # if a model was return it is the best performing model + model = instance_model(model, history, config) + logger.info( + "\n=====================================\nModel uploaded\n=====================================\n" + ) + # if a model was return it is the best performing model # if model: # logger.info("Evaluating current model with test data.") # test_dataset = Dataset(type="test") @@ -83,7 +94,7 @@ if __name__ == "__main__": # "\n=====================================\nModel evaluated\n=====================================\n" # else: # logger.info("Model is not being evaluated.") - return None + # return None train_save_upload_with_args = partial( train_save_upload, @@ -96,6 +107,5 @@ if __name__ == "__main__": train_save_upload_with_args, project=WANDB_PROJECT, entity=WANDB_ENTITY, - # count=NUM_MODELS, - notes="Instance model training using ita data.", + count=51, )