diff --git a/.gitignore b/.gitignore index acbf51292469935bbd05cf70cc53c90b18eb6181..3f347a65cf8e2c12a45294705ce3cd6e51973f9f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ **/test **/wandb **/Trajectory Data +**/Trajectory_Data **/Foundation_Model **/models **/analysis diff --git a/coscine_watchdog/coscine_app_profile.ttl b/coscine_watchdog/coscine_app_profile-Trajectory-data.ttl similarity index 100% rename from coscine_watchdog/coscine_app_profile.ttl rename to coscine_watchdog/coscine_app_profile-Trajectory-data.ttl diff --git a/coscine_watchdog/requirements.txt b/coscine_watchdog/requirements.txt index b8158ff26ddac1b773f6b0b2286f2d062ae9e4f1..0b79f27058e56dfbcf59d0927f60789811ae3750 100644 --- a/coscine_watchdog/requirements.txt +++ b/coscine_watchdog/requirements.txt @@ -12,3 +12,4 @@ furo==2024.5.6 # sphinx documentation theme myst-parser==3.0.1 # sphinx plugin for markdown support sphinx-copybutton==0.5.2 # inserts buttons to copy sourcecode in docs flask==3.0.3 +coscine>=0.11.5 #for the coscine library. versions lower than 0.11.5 only download 1000 files. \ No newline at end of file diff --git a/dynamics_learning/Dockerfile.foundation_model b/dynamics_learning/Dockerfile.foundation_model index fa2479404af6bb8221c4047a0c63233439f2d4d0..a4d2c6e571e0731f5da196ef3e60603dccd21d00 100644 --- a/dynamics_learning/Dockerfile.foundation_model +++ b/dynamics_learning/Dockerfile.foundation_model @@ -1,5 +1,25 @@ FROM nvcr.io/nvidia/tensorflow:23.05-tf2-py3 +# Install required packages for setting up the locale and timezone +RUN apt-get update && apt-get install -y \ + locales \ + tzdata + +# Set the timezone to Europe/Berlin +RUN ln -fs /usr/share/zoneinfo/Europe/Berlin /etc/localtime && \ + dpkg-reconfigure -f noninteractive tzdata + +# Set the locale to en_US.UTF-8 +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ + locale-gen en_US.UTF-8 && \ + update-locale LANG=en_US.UTF-8 + +# Set environment variables for timezone and locale +ENV LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en \ + LC_ALL=en_US.UTF-8 \ + TZ=Europe/Berlin + WORKDIR /app COPY . . diff --git a/dynamics_learning/Dockerfile.foundation_model_cron_job b/dynamics_learning/Dockerfile.foundation_model_cron_job index 23236cf1960bd3ad3bcc61892777629b60fc1cef..0e5d7ff65288e895b220bd6ed44d3ff4681c0736 100644 --- a/dynamics_learning/Dockerfile.foundation_model_cron_job +++ b/dynamics_learning/Dockerfile.foundation_model_cron_job @@ -1,5 +1,25 @@ FROM nvcr.io/nvidia/tensorflow:23.05-tf2-py3 +# Install required packages for setting up the locale and timezone +RUN apt-get update && apt-get install -y \ + locales \ + tzdata + +# Set the timezone to Europe/Berlin +RUN ln -fs /usr/share/zoneinfo/Europe/Berlin /etc/localtime && \ + dpkg-reconfigure -f noninteractive tzdata + +# Set the locale to en_US.UTF-8 +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ + locale-gen en_US.UTF-8 && \ + update-locale LANG=en_US.UTF-8 + +# Set environment variables for timezone and locale +ENV LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en \ + LC_ALL=en_US.UTF-8 \ + TZ=Europe/Berlin + WORKDIR /app COPY . . diff --git a/dynamics_learning/benchmark_model_accuracy.py b/dynamics_learning/benchmark_model_accuracy.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d4e9016ea4837671541a6378e99ae3f4919e893c 100644 --- a/dynamics_learning/benchmark_model_accuracy.py +++ b/dynamics_learning/benchmark_model_accuracy.py @@ -0,0 +1,47 @@ +from pathlib import Path +from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders + + +def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None: + """Delete files from the training data directory. + + Files are sorted by date and the newest files are kept. + + Args: + num_trajectories_to_keep (int): Number of trajectories to keep. + robot_uuid (str): Robot UUID. + + Returns: + None: This function does not return anything. + """ + files = [ + str(file) + for file in Path( + f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}" + ).iterdir() + if file.is_file() and str(file).endswith("meas.csv") + ] + files.sort(reverse=True) + for file in files[num_trajectories_to_keep:]: + Path(file).unlink() + try: + file = file.replace("meas.csv", "com.csv") + Path(file).unlink() + except FileNotFoundError: + # logger.info("No com.csv file found.") + pass + try: + file = file.replace("com.csv", "interp_com.csv") + Path(file).unlink() + except FileNotFoundError: + # logger.info("No interp_com.csv file found.") + pass + return None + + +if __name__ == "__main__": + download_resource_content_into_uuid_folders() + num_trajectories_to_keep = 10 + LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a" + robot_uuid = LLT_ROBOT_UUID + delete_files(num_trajectories_to_keep, robot_uuid) diff --git a/dynamics_learning/benchmark_number_of_runs.py b/dynamics_learning/benchmark_number_of_runs.py index 950a7ac41309396d95b0397206f7e753aefd9144..8fd86a35d11336cf4038fa68f61d42fe735faad5 100644 --- a/dynamics_learning/benchmark_number_of_runs.py +++ b/dynamics_learning/benchmark_number_of_runs.py @@ -3,15 +3,15 @@ from dynamics_learning.preprocessing.dataset_analysis import analyze from dynamics_learning.preprocessing.trajectory_interpolation import interpolate from dynamics_learning.environment import ( WANDB_API_TOKEN, - SWEEP_ID, WANDB_PROJECT, WANDB_ENTITY, ) -from dynamics_learning.sweep.setup import setup_sweep +from dynamics_learning.sweep.setup import setup_sweep, setup_sweep_from_hyperparameters from dynamics_learning.training import train from dynamics_learning.model_io import ( save_model_to_binary_file, load_model_from_binary_file, + load_config, ) from pathlib import Path from pritty_logger import RichLogger @@ -25,8 +25,7 @@ from keras.models import Sequential logger = RichLogger("dynamics_learning-benchmark_number_of_runs") -THRESHOLD = 160 -NUMBER_OF_TRAJECTORIES = 100 +THRESHOLD = 50 LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a" ITA_ROBOT_UUID = "c9ff52e1-1733-4829-a209-ebd1586a8697" WZL_ROBOT_UUID = "2e60a671-dcc3-4a36-9734-a239c899b57d" @@ -34,9 +33,9 @@ runs = 0 val_loss = 1000 download_file = False model1 = False -model2 = True -model3 = False -model4 = False +model2 = False +model3 = True +model4 = True def prepare_data(directory: Path) -> tuple: @@ -107,6 +106,7 @@ def train_until_threshold_val_loss( q_qd_qdd_interpolated_command_input: tf.Tensor, tau_attained_input: tf.Tensor, model: Sequential = None, + notes: str = "", ): training = partial( training_loop, @@ -117,10 +117,7 @@ def train_until_threshold_val_loss( model=model, ) wandb.agent( - sweep_id, - training, - project=WANDB_PROJECT, - entity=WANDB_ENTITY, + sweep_id, training, project=WANDB_PROJECT, entity=WANDB_ENTITY, notes=notes ) logger.info(f"""Training concluded. Number of runs: {runs} @@ -141,6 +138,7 @@ if __name__ == "__main__": # Download Training Data from the server if download_file: download_resource_content_into_uuid_folders() + # TODO implement max number of trajectories used for benchmark training wandb.login(key=WANDB_API_TOKEN, relogin=True) @@ -177,10 +175,12 @@ if __name__ == "__main__": q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, tau_attained_input=tau_attained_input, model=None, + notes="Sweep to train model from scratch. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", ) runs_model1 = runs val_loss_model1 = val_loss + sweep_id1 = sweep_id ############################################### ####################Model 2#################### @@ -220,39 +220,132 @@ if __name__ == "__main__": q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, tau_attained_input=tau_attained_input, model=model, + notes="Sweep to train model based on ITA model. 50 Trajectories are avaiulable for training. Training is stoped when the validation loss is below 50.", ) runs_model2 = runs val_loss_model2 = val_loss + sweep_id2 = sweep_id ############################################### ####################Model 3#################### ############################################### if model3: # LLT model based on ITA model with known hyperparameters - assert ( - SWEEP_ID == "fe3gjovo" - ), "Sweep ID is not set correctly. Ensure that the sweep id is set to fe3gjovo" + robot_uuid = LLT_ROBOT_UUID + directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}") + # Interpolate Training Data in UUID folders + ( + attained_data, + command_data, + interpolated_command_data, + q_qd_qdd_interpolated_command_input, + tau_attained_input, + ) = prepare_data(directory) + # assert SWEEP_ID == "42d8t40t", "Sweep ID is not set correctly. Ensure that the sweep id is set to 42d8t40t" assert ( robot_uuid == LLT_ROBOT_UUID - ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT" + ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" + + config_data = load_config( + "/app/dynamics_learning/Foundation_Model/models/hyperparameters.json" + ) + + sweep_id, sweep_config = setup_sweep_from_hyperparameters( + config_data=config_data, create_sweep=True + ) + + # reset runs counter + runs = 0 + val_loss = 1000 + + # TODO load ITA model instead of dummy model + model = load_model_from_binary_file( + "/app/dynamics_learning/models/99.99706268310547.h5" + ) + + # Train the model until the threshold validation loss is reached + train_until_threshold_val_loss( + sweep_id=sweep_id, + robot_uuid=robot_uuid, + q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, + tau_attained_input=tau_attained_input, + model=model, + notes="Sweep to train model based on ITA model with known hyperparameters. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", + ) + + runs_model3 = runs + val_loss_model3 = val_loss + sweep_id3 = sweep_id - # TODO load LLT Data - # TODO load ITA model - # TODO set hyperparameters for LLT model based on ITA model with known hyperparameters + # assert ( + # SWEEP_ID == "fe3gjovo" + # ), "Sweep ID is not set correctly. Ensure that the sweep id is set to fe3gjovo" + # assert ( + # robot_uuid == LLT_ROBOT_UUID + # ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT" ############################################### ####################Model 4#################### ############################################### if model4: # LLT model based on foundation model - assert ( - SWEEP_ID == "7tglijx8" - ), "Sweep ID is not set correctly. Ensure that the sweep id is set to 7tglijx8" + # assert ( + # SWEEP_ID == "7tglijx8" + # ), "Sweep ID is not set correctly. Ensure that the sweep id is set to 7tglijx8" + # assert ( + # robot_uuid == LLT_ROBOT_UUID + # ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT" + + robot_uuid = LLT_ROBOT_UUID + directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}") + # Interpolate Training Data in UUID folders + ( + attained_data, + command_data, + interpolated_command_data, + q_qd_qdd_interpolated_command_input, + tau_attained_input, + ) = prepare_data(directory) + # assert SWEEP_ID == "42d8t40t", "Sweep ID is not set correctly. Ensure that the sweep id is set to 42d8t40t" assert ( robot_uuid == LLT_ROBOT_UUID - ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT" + ), "Robot UUID is not set correctly. Ensure that the robot uuid is set to LLT_ROBOT_UUID" + + config_data = load_config( + "/app/dynamics_learning/Foundation_Model/models/hyperparameters.json" + ) + + sweep_id, sweep_config = setup_sweep_from_hyperparameters( + config_data=config_data, create_sweep=True + ) - # TODO load LLT Data - # TODO load foundation model - # TODO set hyperparameters for LLT model based on foundation model + # reset runs counter + runs = 0 + val_loss = 1000 + + model = load_model_from_binary_file( + "/app/dynamics_learning/Foundation_Model/models/Foundation_model.h5" + ) + + # Train the model until the threshold validation loss is reached + train_until_threshold_val_loss( + sweep_id=sweep_id, + robot_uuid=robot_uuid, + q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, + tau_attained_input=tau_attained_input, + model=model, + notes="Sweep to train model based on foundation model with known hyperparameters. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", + ) + + runs_model4 = runs + val_loss_model4 = val_loss + sweep_id4 = sweep_id + + logger.info(f"""Training concluded. +The first model using sweep {sweep_id1} was trained for {runs_model1} runs and reached a validation loss of {val_loss_model1}. +The second model using sweep {sweep_id2} was trained for {runs_model2} runs and reached a validation loss of {val_loss_model2}. +The third model using sweep {sweep_id3} was trained for {runs_model3} runs and reached a validation loss of {val_loss_model3}. +The fourth model using sweep {sweep_id4} was trained for {runs_model4} runs and reached a validation loss of {val_loss_model4}. +""") + wandb.finish() diff --git a/dynamics_learning/dynamics_learning/data_retrieval/__init__.py b/dynamics_learning/dynamics_learning/data_retrieval/__init__.py index 3cf38bfdff501772e99815bb6955f882c7f3a597..6e4571d6e2d0d07bb86f323020e0818cddb66cfb 100644 --- a/dynamics_learning/dynamics_learning/data_retrieval/__init__.py +++ b/dynamics_learning/dynamics_learning/data_retrieval/__init__.py @@ -4,7 +4,7 @@ # Released under MIT License # Contact us for other licensing options. -import sys + from pathlib import Path from typing import List, Tuple @@ -13,11 +13,14 @@ import coscine.resource from pritty_logger import RichLogger from rich.progress import track + from dynamics_learning.environment import COSCINE_API_TOKEN logger = RichLogger("dynamics_learning-coscine_data_retrieval") -CLIENT = coscine.ApiClient(COSCINE_API_TOKEN, timeout=120, retries=5) +CLIENT = coscine.ApiClient( + COSCINE_API_TOKEN, timeout=120, retries=5, verbose=False, enable_caching=True +) PROJECT = CLIENT.project( "IoP Ws A.III FER WWL Demo" ) # Connect to the specified Coscine project @@ -29,8 +32,45 @@ RESOURCE = PROJECT.resource( ) # Address the specific resource in the project +def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None: + """Delete files from the training data directory. + + Files are sorted by date and the newest files are kept. + + Args: + num_trajectories_to_keep (int): Number of trajectories to keep. + robot_uuid (str): Robot UUID. + + Returns: + None: This function does not return anything. + """ + files = [ + str(file) + for file in Path( + f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}" + ).iterdir() + if file.is_file() and str(file).endswith("meas.csv") + ] + files.sort(reverse=True) + for file in files[num_trajectories_to_keep:]: + Path(file).unlink() + try: + file = file.replace("meas.csv", "com.csv") + Path(file).unlink() + except FileNotFoundError: + logger.info("No com.csv file found.") + try: + file = file.replace("com.csv", "interp_com.csv") + Path(file).unlink() + except FileNotFoundError: + logger.info("No interp_com.csv file found.") + return None + + def download_resource_content_into_uuid_folders(): + """Download the resource content into folders named after the robot UUID. Keeps only 50 newest trajectories per robot.""" files = RESOURCE.files(path="train", recursive=True, with_metadata=True) + logger.info(f"Attempting to download {len(files)} files:") # \n{files} for file in track(files): if file.is_folder: continue @@ -38,45 +78,86 @@ def download_resource_content_into_uuid_folders(): robot_uuid = file.metadata_form()["Robot UUID"][0] Path(f"./Trajectory Data/train/{robot_uuid}").mkdir(parents=True, exist_ok=True) file.download(f"./Trajectory Data/train/{robot_uuid}/{file.name}") + # logger.info(f"Keeping only 50 trajectories per robot.") + # delete_files(50, robot_uuid) def download_resource_content(resource: coscine.resource.Resource = RESOURCE) -> Path: - """Downloads none-existend contents of the resource. + logger.info(f"Downloading files from resource. This may take a while.\n{resource}") + resource.download() + # logger.info(f"Downloading files from resource\n{resource}") + # files = resource.files(path="train",recursive=True, with_metadata=True) + # logger.info(f"Attempting to download {len(files)} files.") + # current_iteration = 0 + # num_currently_downloaded_files = 0 + # robots = {} + # for file in track(files): + # logger.info(f"File: {file.name}") + # logger.info(f"Robot: {file.metadata_form()['Robot UUID'][0]}") + # try: + # robots[f"{file.metadata_form()['Robot UUID'][0]}"] = robots[f"{file.metadata_form()['Robot UUID'][0]}"] + 1 + # except KeyError: + # robots[f"{file.metadata_form()['Robot UUID'][0]}"] = 1 + # if file.is_folder: + # logger.info("Skipping folder.") + # current_iteration += 1 + # continue + # file.download(f"./{resource.name}/{file.directory}{file.name}") + # logger.info(f"Downloaded file to: ./{resource.name}/{file.directory}{file.name}") + # current_iteration += 1 + # if current_iteration % 490 == 0: + # logger.info("Sleeping for 60 seconds to not interfere with rate limit.") + # logger.info(f"Current iteration: {current_iteration}") + # time.sleep(60) + # logger.info((robots)) + # return Path(f"./{resource.name}") - Args: - resource (coscine.resource.Resource, optional): The resource to download from. Defaults to RESOURCE. - """ - path = Path(f"/app/dynamics_learning/{RESOURCE.name}") - resource_files, resource_objects = get_resource_content(resource) - resource_files = set(resource_files) - existing_files = set(get_downloaded_files()) - unique_elements = resource_files.symmetric_difference(existing_files) - missing_files = list(unique_elements) - if missing_files: - logger.log( - f"Files not yet downloaded are {missing_files}. These files will now be downloaded." - ) - for file in track(missing_files): - file_path = path / file - try: - resource.file(file).download( - path=f"/app/dynamics_learning/{RESOURCE.name}/{file}" - ) - except Exception as e: - logger.log(f"{e}", "warning") - logger.log( - "Likely you have a local file that is not in coscine resource." - ) - logger.log("Checking if file exists in local directory.") - logger.log(f"File: {file_path}") - logger.log(f"Exists: {file_path.exists()}") - if file_path.exists(): - continue - else: - sys.exit(1) - else: - logger.log("All files are already downloaded.") - return path + +# def download_resource_content(resource: coscine.resource.Resource = RESOURCE) -> Path: +# """Downloads none-existent contents of the resource. + +# Args: +# resource (coscine.resource.Resource, optional): The resource to download from. Defaults to RESOURCE. +# """ +# path = Path(f"/app/dynamics_learning/{RESOURCE.name}") +# path.mkdir(parents=True, exist_ok=True) +# path = path / "train" +# path.mkdir(parents=True, exist_ok=True) +# path = path / "test" +# path.mkdir(parents=True, exist_ok=True) +# path = Path(f"/app/dynamics_learning/{RESOURCE.name}") +# resource_files, resource_objects = get_resource_content(resource) +# resource_files = set(resource_files) +# existing_files = set(get_downloaded_files()) +# unique_elements = resource_files.symmetric_difference(existing_files) +# missing_files = list(unique_elements) +# if missing_files: +# logger.log( +# f"Files not yet downloaded are {missing_files}. These files will now be downloaded." +# ) +# for file in track(missing_files): +# file_path = path / file +# try: +# if resource.file(file).is_folder(): +# continue +# resource.file(file).download( +# path=f"/app/dynamics_learning/{RESOURCE.name}/{file}" +# ) +# except Exception as e: +# logger.log(f"{e}", "warning") +# logger.log( +# "Likely you have a local file that is not in coscine resource." +# ) +# logger.log("Checking if file exists in local directory.") +# logger.log(f"File: {file_path}") +# logger.log(f"Exists: {file_path.exists()}") +# if file_path.exists(): +# continue +# else: +# sys.exit(1) +# else: +# logger.log("All files are already downloaded.") +# return path def get_resource_content( diff --git a/dynamics_learning/dynamics_learning/sweep/setup.py b/dynamics_learning/dynamics_learning/sweep/setup.py index a72c7ad8a4cb78793319bcefc797780ff4774a69..6db09d052d7321f4855623ad63e3c177b950381b 100644 --- a/dynamics_learning/dynamics_learning/sweep/setup.py +++ b/dynamics_learning/dynamics_learning/sweep/setup.py @@ -48,7 +48,7 @@ def setup_sweep(create_sweep: bool = False) -> tuple[str, dict[str, Any]]: def setup_sweep_from_hyperparameters( - config_data: Any, create_sweep: bool = False + config_data: dict, create_sweep: bool = False ) -> tuple[str, dict[str, Any]]: """Sets up a Sweep from given hyperparameters within a sain range. @@ -61,35 +61,35 @@ def setup_sweep_from_hyperparameters( """ global SWEEP_ID parameters_dict_from_hyperparameters = { - "optimizer": config_data.optimizer, + "optimizer": {"value": config_data["optimizer"]}, "clipnorm": { "distribution": "log_uniform_values", - "min": config_data.clipnorm * 0.5, - "max": config_data.clipnorm * 1.5, + "min": config_data["clipnorm"] * 0.5, + "max": config_data["clipnorm"] * 1.5, }, "learning_rate": { "distribution": "log_uniform_values", - "min": config_data.learning_rate * 0.5, - "max": min(config_data.learning_rate * 1.5, 0.9), + "min": config_data["learning_rate"] * 0.5, + "max": min(config_data["learning_rate"] * 1.5, 0.9), }, - "window_size": {"value": config_data.window_size}, - "batch_size": {"value": config_data.batch_size}, + "window_size": {"value": config_data["window_size"]}, + "batch_size": {"value": config_data["batch_size"]}, "units": { "distribution": "int_uniform", - "min": max(1, config_data.units - 10), - "max": min(config_data.units + 10, 100), + "min": max(1, config_data["units"] - 10), + "max": min(config_data["units"] + 10, 100), }, "dropout": { "distribution": "log_uniform_values", - "min": config_data.dropout * 0.5, - "max": min(config_data.dropout * 1.5, 1), + "min": config_data["dropout"] * 0.5, + "max": min(config_data["dropout"] * 1.5, 1), }, "layers": { "distribution": "int_uniform", - "min": max(1, config_data.layers - 10), - "max": min(config_data.layers + 10, 100), + "min": max(1, config_data["layers"] - 10), + "max": min(config_data["layers"] + 10, 100), }, # {"value": 10}, - "epochs": {"value": config_data.epochs}, + "epochs": {"value": config_data["epochs"]}, } sweep_config_from_hyperparameters["parameters"] = ( parameters_dict_from_hyperparameters diff --git a/dynamics_learning/dynamics_learning/training/__init__.py b/dynamics_learning/dynamics_learning/training/__init__.py index 18505eb88b520125e82889840175e30b46a50d02..335551b39bd8ac21c8f9d2d49c0ecb6a7a3e2058 100644 --- a/dynamics_learning/dynamics_learning/training/__init__.py +++ b/dynamics_learning/dynamics_learning/training/__init__.py @@ -164,10 +164,10 @@ def train( logger.info(f"Total layers in the model: {total_layers}") # Define how many layers you want to remove - layers_to_remove = 5 + LAYERS_TO_REMOVE = 5 # Check if you can remove five layers; if not, adjust to delete fewer layers - layers_to_remove = min(layers_to_remove, total_layers - 2) + layers_to_remove = min(LAYERS_TO_REMOVE, total_layers - 2) # If it's a Sequential model if isinstance(model, Sequential): diff --git a/dynamics_learning/requirements.txt b/dynamics_learning/requirements.txt index 201d0c1505efc2c32b1f17cedb3e1fbc1fb2f860..6454a395d31bb40fea5d70f6d379f7ff79cbfc5e 100644 --- a/dynamics_learning/requirements.txt +++ b/dynamics_learning/requirements.txt @@ -1,4 +1,4 @@ -coscine==0.10.7 +coscine>=0.11.5 wandb==0.17.3 tensorflow_text==2.12.0 tf_keras==2.14.1 diff --git a/dynamics_learning/train_instance.py b/dynamics_learning/train_instance.py index 38fb8ccaa481e7019ed1fa082a353ffe8caa2a39..32cf2883c736c5ff9d27dc863ab000212df27502 100644 --- a/dynamics_learning/train_instance.py +++ b/dynamics_learning/train_instance.py @@ -1 +1,101 @@ -# 5vlv6m3t: ITA instance from scratch (used to train LLT instance) +# download data from ita +# train model like foundation model, but with other data +# save model locally + + +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright Leon Gorissen +# Released under MIT License +import warnings +from functools import partial +from pathlib import Path +from dynamics_learning.environment import WANDB_API_TOKEN, WANDB_ENTITY, WANDB_PROJECT + +import numpy as np +import tensorflow as tf +from pritty_logger import RichLogger + +# import env variables and set tensorflow variables +import wandb + +from dynamics_learning.preprocessing.dataset_analysis import analyze +from dynamics_learning.preprocessing.trajectory_interpolation import interpolate +from dynamics_learning.sweep.setup import setup_sweep + +# from dynamics_learning.data_retrieval import download_resource_content +from dynamics_learning.training import train + +# Suppress FutureWarning for the specific deprecation warning in pandas +warnings.simplefilter(action="ignore", category=FutureWarning) + +logger = RichLogger("dynamics_learning-instance_model") + + +if __name__ == "__main__": + # download not existing data + # local_resource_path = download_resource_content() + # TODO download data from ita / use data from ita + local_resource_path = Path("/app/dynamics_learning/Trajectory Data") + + # preprocess data + attained_data, command_data = analyze(local_resource_path / "train") + interpolated_command_data = interpolate(local_resource_path / "train") + + # build input and cross validation tensors + interpolated_command_input = np.column_stack( + ( + interpolated_command_data["q_interpolated_command"], + interpolated_command_data["qd_interpolated_command"], + interpolated_command_data["qdd_interpolated_command"], + ) + ) + q_qd_qdd_interpolated_command_input = tf.convert_to_tensor( + interpolated_command_input + ) + tau_attained_input = tf.convert_to_tensor(attained_data["tau_attained"]) + + wandb.login(key=WANDB_API_TOKEN, relogin=True) + # check if sweep_id is set, if not create a new sweep + sweep_id, sweep_config = setup_sweep() + + def train_save_upload( + q_qd_qdd_interpolated_command_input: tf.Tensor, tau_attained_input: tf.Tensor + ): + model, history, _run, config = train( + q_qd_qdd_interpolated_command_input, tau_attained_input + ) + logger.info( + "\n=====================================\nModel trained\n=====================================\n" + ) + # TODO save model and hyperparameters locally + # model = upload(model, history, config) + # logger.info( + # "\n=====================================\nModel uploaded\n=====================================\n" + # ) + # # if a model was return it is the best performing model + # if model: + # logger.info("Evaluating current model with test data.") + # test_dataset = Dataset(type="test") + # test_dataset.download() + # interpolate("/app/dynamics_learning/Trajectory Data/test") + # model_analysis(test_dataset, model) + # "\n=====================================\nModel evaluated\n=====================================\n" + # else: + # logger.info("Model is not being evaluated.") + return None + + train_save_upload_with_args = partial( + train_save_upload, + q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, + tau_attained_input=tau_attained_input, + ) + + wandb.agent( + sweep_id, + train_save_upload_with_args, + project=WANDB_PROJECT, + entity=WANDB_ENTITY, + # count=NUM_MODELS, + notes="Instance model training using ita data.", + )