From a81cc8cae60492e94227a987981f45043857623c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leon=20Michel=20Gori=C3=9Fen?= <leon.gorissen@llt.rwth-aachen.de> Date: Fri, 25 Oct 2024 14:19:55 +0200 Subject: [PATCH] Update .gitignore, Dockerfiles, and data retrieval logic; refactor benchmarks - Added new paths (**/benchmark_models, **/benchmark_trajectory_data) to .gitignore - Updated Dockerfiles for foundation model and cron job: - Set DEBIAN_FRONTEND to non-interactive to avoid tzdata prompts during installation - Preconfigured tzdata settings for consistent timezone configuration - Modified docker-compose.yaml: - Renamed and added sections for foundation model services - Deleted obsolete benchmark_model_accuracy.py script - Refactored benchmark_number_of_runs.py: - Adjusted paths to benchmark data directories - Updated training notes and increased trajectory limits - Improved data_retrieval module: - Added default robot UUID for delete_files function - Refined download logging messages - Commented out unused logger statements - Minor adjustments to foundation_model.py: - Replaced dynamic download paths with static path reference --- .gitignore | 2 + docker-compose.yaml | 3 +- dynamics_learning/Dockerfile.foundation_model | 9 +- .../Dockerfile.foundation_model_cron_job | 10 +- dynamics_learning/benchmark_model_accuracy.py | 47 ---- dynamics_learning/benchmark_number_of_runs.py | 19 +- .../data_retrieval/__init__.py | 22 +- dynamics_learning/foundation_model.py | 7 +- dynamics_learning/playground.py | 127 +++++++++++ dynamics_learning/test.py | 212 ++++++++++++++++++ 10 files changed, 389 insertions(+), 69 deletions(-) delete mode 100644 dynamics_learning/benchmark_model_accuracy.py create mode 100644 dynamics_learning/playground.py create mode 100644 dynamics_learning/test.py diff --git a/.gitignore b/.gitignore index 3f347a6..d6fc534 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ **/Foundation_Model **/models **/analysis +**/benchmark_models +**/benchmark_trajectory_data **commit** commit diff --git a/docker-compose.yaml b/docker-compose.yaml index 13ac5bf..01f0f7c 100755 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -91,7 +91,7 @@ services: network_mode: host # network_mode: pandavlan # Uses the host's network stack, not creating a separate network namespace -#########################foundation_model############################## +#########################foundation model cron########################## foundation_model_cron_job: build: context: . @@ -129,6 +129,7 @@ services: volumes: - /home/lgorissen/git/iop/franka_wwl_demonstrator:/app +#########################foundation model############################### foundation_model: build: context: . diff --git a/dynamics_learning/Dockerfile.foundation_model b/dynamics_learning/Dockerfile.foundation_model index a4d2c6e..ecc5c40 100644 --- a/dynamics_learning/Dockerfile.foundation_model +++ b/dynamics_learning/Dockerfile.foundation_model @@ -1,7 +1,12 @@ FROM nvcr.io/nvidia/tensorflow:23.05-tf2-py3 -# Install required packages for setting up the locale and timezone -RUN apt-get update && apt-get install -y \ +# Set non-interactive frontend and preconfigure tzdata +ENV DEBIAN_FRONTEND=noninteractive + +# Preconfigure tzdata to avoid prompts +RUN echo "tzdata tzdata/Areas select Europe" | debconf-set-selections && \ + echo "tzdata tzdata/Zones/Europe select Berlin" | debconf-set-selections && \ + apt-get update && apt-get install -y \ locales \ tzdata diff --git a/dynamics_learning/Dockerfile.foundation_model_cron_job b/dynamics_learning/Dockerfile.foundation_model_cron_job index 0e5d7ff..e793b26 100644 --- a/dynamics_learning/Dockerfile.foundation_model_cron_job +++ b/dynamics_learning/Dockerfile.foundation_model_cron_job @@ -1,7 +1,12 @@ FROM nvcr.io/nvidia/tensorflow:23.05-tf2-py3 -# Install required packages for setting up the locale and timezone -RUN apt-get update && apt-get install -y \ +# Set non-interactive frontend and preconfigure tzdata +ENV DEBIAN_FRONTEND=noninteractive + +# Preconfigure tzdata to avoid prompts +RUN echo "tzdata tzdata/Areas select Europe" | debconf-set-selections && \ + echo "tzdata tzdata/Zones/Europe select Berlin" | debconf-set-selections && \ + apt-get update && apt-get install -y \ locales \ tzdata @@ -20,6 +25,7 @@ ENV LANG=en_US.UTF-8 \ LC_ALL=en_US.UTF-8 \ TZ=Europe/Berlin + WORKDIR /app COPY . . diff --git a/dynamics_learning/benchmark_model_accuracy.py b/dynamics_learning/benchmark_model_accuracy.py deleted file mode 100644 index d4e9016..0000000 --- a/dynamics_learning/benchmark_model_accuracy.py +++ /dev/null @@ -1,47 +0,0 @@ -from pathlib import Path -from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders - - -def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None: - """Delete files from the training data directory. - - Files are sorted by date and the newest files are kept. - - Args: - num_trajectories_to_keep (int): Number of trajectories to keep. - robot_uuid (str): Robot UUID. - - Returns: - None: This function does not return anything. - """ - files = [ - str(file) - for file in Path( - f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}" - ).iterdir() - if file.is_file() and str(file).endswith("meas.csv") - ] - files.sort(reverse=True) - for file in files[num_trajectories_to_keep:]: - Path(file).unlink() - try: - file = file.replace("meas.csv", "com.csv") - Path(file).unlink() - except FileNotFoundError: - # logger.info("No com.csv file found.") - pass - try: - file = file.replace("com.csv", "interp_com.csv") - Path(file).unlink() - except FileNotFoundError: - # logger.info("No interp_com.csv file found.") - pass - return None - - -if __name__ == "__main__": - download_resource_content_into_uuid_folders() - num_trajectories_to_keep = 10 - LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a" - robot_uuid = LLT_ROBOT_UUID - delete_files(num_trajectories_to_keep, robot_uuid) diff --git a/dynamics_learning/benchmark_number_of_runs.py b/dynamics_learning/benchmark_number_of_runs.py index 8fd86a3..3599715 100644 --- a/dynamics_learning/benchmark_number_of_runs.py +++ b/dynamics_learning/benchmark_number_of_runs.py @@ -1,4 +1,3 @@ -from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders from dynamics_learning.preprocessing.dataset_analysis import analyze from dynamics_learning.preprocessing.trajectory_interpolation import interpolate from dynamics_learning.environment import ( @@ -136,9 +135,9 @@ if __name__ == "__main__": # 7tglijx8: LLT instance based on foundation model # Download Training Data from the server - if download_file: - download_resource_content_into_uuid_folders() - # TODO implement max number of trajectories used for benchmark training + # if download_file: + # download_resource_content_into_uuid_folders() + # TODO implement max number of trajectories used for benchmark training wandb.login(key=WANDB_API_TOKEN, relogin=True) @@ -148,7 +147,9 @@ if __name__ == "__main__": if model1: # LLT instance model trained from scratch robot_uuid = LLT_ROBOT_UUID - directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}") + directory = Path( + f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}" + ) # Interpolate Training Data in UUID folders ( attained_data, @@ -175,7 +176,7 @@ if __name__ == "__main__": q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, tau_attained_input=tau_attained_input, model=None, - notes="Sweep to train model from scratch. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", + notes="Sweep to train model from scratch. 100 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.", ) runs_model1 = runs @@ -188,7 +189,9 @@ if __name__ == "__main__": if model2: # LLT model based on ITA model without known hyperparameters robot_uuid = LLT_ROBOT_UUID - directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}") + directory = Path( + f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}" + ) # Interpolate Training Data in UUID folders ( attained_data, @@ -220,7 +223,7 @@ if __name__ == "__main__": q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input, tau_attained_input=tau_attained_input, model=model, - notes="Sweep to train model based on ITA model. 50 Trajectories are avaiulable for training. Training is stoped when the validation loss is below 50.", + notes="Sweep to train model based on ITA model. 100 Trajectories are avaiulable for training. Training is stoped when the validation loss is below 50.", ) runs_model2 = runs diff --git a/dynamics_learning/dynamics_learning/data_retrieval/__init__.py b/dynamics_learning/dynamics_learning/data_retrieval/__init__.py index 6e4571d..9838f98 100644 --- a/dynamics_learning/dynamics_learning/data_retrieval/__init__.py +++ b/dynamics_learning/dynamics_learning/data_retrieval/__init__.py @@ -4,7 +4,7 @@ # Released under MIT License # Contact us for other licensing options. - +# %% from pathlib import Path from typing import List, Tuple @@ -32,14 +32,20 @@ RESOURCE = PROJECT.resource( ) # Address the specific resource in the project -def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None: +def delete_files( + num_trajectories_to_keep: int, + robot_uuid: str = "c9ff52e1-1733-4829-a209-ebd1586a8697", +) -> None: """Delete files from the training data directory. Files are sorted by date and the newest files are kept. + robot_uuid = "c9ff52e1-1733-4829-a209-ebd1586a8697" for ITA + robot_uuid = "f2e72889-c140-4397-809f-fba1b892f17a" for LLT + robot_uuid = "2e60a671-dcc3-4a36-9734-a239c899b57d" for WZL Args: num_trajectories_to_keep (int): Number of trajectories to keep. - robot_uuid (str): Robot UUID. + robot_uuid (str): Robot UUID. Defaults to ITA. Returns: None: This function does not return anything. @@ -58,12 +64,14 @@ def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None: file = file.replace("meas.csv", "com.csv") Path(file).unlink() except FileNotFoundError: - logger.info("No com.csv file found.") + # logger.info("No com.csv file found.") + pass try: file = file.replace("com.csv", "interp_com.csv") Path(file).unlink() except FileNotFoundError: - logger.info("No interp_com.csv file found.") + # logger.info("No interp_com.csv file found.") + pass return None @@ -83,7 +91,7 @@ def download_resource_content_into_uuid_folders(): def download_resource_content(resource: coscine.resource.Resource = RESOURCE) -> Path: - logger.info(f"Downloading files from resource. This may take a while.\n{resource}") + logger.info("Downloading files from resource. This may take a while.") resource.download() # logger.info(f"Downloading files from resource\n{resource}") # files = resource.files(path="train",recursive=True, with_metadata=True) @@ -291,3 +299,5 @@ if __name__ == "__main__": download_resource_content() # get_resource_content() # get_downloaded_files() + +# %% diff --git a/dynamics_learning/foundation_model.py b/dynamics_learning/foundation_model.py index ddc7859..dc83a1d 100644 --- a/dynamics_learning/foundation_model.py +++ b/dynamics_learning/foundation_model.py @@ -4,6 +4,7 @@ # Released under MIT License import warnings from functools import partial +from pathlib import Path from dynamics_learning.environment import WANDB_API_TOKEN, WANDB_ENTITY, WANDB_PROJECT @@ -14,7 +15,7 @@ from pritty_logger import RichLogger # import env variables and set tensorflow variables import wandb -from dynamics_learning.data_retrieval import download_resource_content +# from dynamics_learning.data_retrieval import download_resource_content from dynamics_learning.preprocessing.dataset_analysis import analyze from dynamics_learning.preprocessing.trajectory_interpolation import interpolate from dynamics_learning.sweep.setup import setup_sweep @@ -31,8 +32,8 @@ logger = RichLogger("dynamics_learning-foundation_model") if __name__ == "__main__": # download not existing data - local_resource_path = download_resource_content() - # local_resource_path = Path("/app/dynamics_learning/Trajectory Data") + # local_resource_path = download_resource_content() + local_resource_path = Path("/app/dynamics_learning/Trajectory Data") # preprocess data attained_data, command_data = analyze(local_resource_path / "train") diff --git a/dynamics_learning/playground.py b/dynamics_learning/playground.py new file mode 100644 index 0000000..8cdf22e --- /dev/null +++ b/dynamics_learning/playground.py @@ -0,0 +1,127 @@ +#%% +from pathlib import Path +from typing import List, Tuple + +import coscine +import coscine.resource +from pritty_logger import RichLogger +from rich.progress import track + + +from dynamics_learning.environment import COSCINE_API_TOKEN + +logger = RichLogger("dynamics_learning-coscine_data_retrieval") + +CLIENT = coscine.ApiClient( + COSCINE_API_TOKEN, timeout=120, retries=5, verbose=False, enable_caching=True +) + +projects = CLIENT.projects() +for project in projects: + print(project.name) + +PROJECT = CLIENT.project("IoP Ws A.III Franka Emika Robot World Wide Lab Demonstrator", coscine.Project.name) +logger.info(str(PROJECT)) +resources = PROJECT.resources() +for resource in resources: + logger.info(resource.name) + +RESOURCE = PROJECT.resource("Trajectory Data") +metadata = RESOURCE.metadata_form() +#metadata["Title"] = "Bla bla bla" +#metadata["Creator"] = "Me" +#RESOURCE.upload("app/MyTest.txt", "Linked Data Content as string or bytes", metadata) + +# Replace 'YOUR_SPECIFIC_PATH' with the actual path you want to filter for +QUERY = ( + """ + SELECT ?path WHERE { + ?path dcterms:creator ?value . + } LIMIT 10 + """ +) +logger.info(QUERY) +try: + files = RESOURCE.query(QUERY) + for file in files: + print(file.path) +except Exception as e: + print(f"Error: {e}") + +# %% +QUERY = ( + """ + SELECT ?path WHERE { + ?path dcterms:created ?value . + } ORDER BY DESC(?value) + """ +) +logger.info(QUERY) +try: + files = RESOURCE.query(QUERY) + for file in files: + print(file.path) +except Exception as e: + print(f"Error: {e}") +# %% +import time +# ITA ROBOT DATA query +ITA_QUERY = ( + """ + PREFIX fwwl: <https://franka-wwl-demonstrator-iop-workstreams-ws-a3-55c043b308e72e51f.pages.git-ce.rwth-aachen.de/metadata.html#> + PREFIX dcterms: <http://purl.org/dc/terms/> + SELECT ?path WHERE { + ?path dcterms:created ?date ; + fwwl:robot-uuid "c9ff52e1-1733-4829-a209-ebd1586a8697" . + FILTER (CONTAINS(STR(?path), "train")) + } ORDER BY DESC(?date) + LIMIT 100 + """ +) + +ITA_QUERY = ( + """ + PREFIX fwwl: <https://franka-wwl-demonstrator-iop-workstreams-ws-a3-55c043b308e72e51f.pages.git-ce.rwth-aachen.de/metadata.html#> + PREFIX dcterms: <http://purl.org/dc/terms/> + SELECT ?path WHERE { + ?path dcterms:created ?date . + FILTER (CONTAINS(STR(?path), "train") && !CONTAINS(STR(?path), "analysis")) + } ORDER BY DESC(?date) + LIMIT 100 + """ +) + +# LLT ROBOT DATA query +LLT_QUERY = ( + """ + PREFIX fwwl: <https://franka-wwl-demonstrator-iop-workstreams-ws-a3-55c043b308e72e51f.pages.git-ce.rwth-aachen.de/metadata.html#> + PREFIX dcterms: <http://purl.org/dc/terms/> + SELECT ?path WHERE { + ?path dcterms:created ?date ; + fwwl:robot-uuid "f2e72889-c140-4397-809f-fba1b892f17a" . + FILTER (CONTAINS(STR(?path), "train") && !CONTAINS(STR(?path), "analysis")) + } ORDER BY DESC(?date) + LIMIT 100 + """ +) + +# Querying ITA ROBOT DATA +logger.info(ITA_QUERY) +t_start = time.time() +ita_files = resource.query(ITA_QUERY) +t_end = time.time() +logger.info(f"Time taken: {t_end - t_start}") +for file in ita_files: + logger.info(f"ITA File Path: {file.path}") + +# Querying LLT ROBOT DATA +# logger.info(LLT_QUERY) +# t_start = time.time() +# llt_files = resource.query(LLT_QUERY) +# t_end = time.time() +# logger.info(f"Time taken: {t_end - t_start}") +# for file in llt_files: +# logger.info("LLT File Path:", file.path) + + +# %% diff --git a/dynamics_learning/test.py b/dynamics_learning/test.py new file mode 100644 index 0000000..9115dff --- /dev/null +++ b/dynamics_learning/test.py @@ -0,0 +1,212 @@ +#%% +from pathlib import Path +#from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders + +from pathlib import Path +from typing import List, Tuple + +import coscine +import coscine.resource +from pritty_logger import RichLogger +from rich.progress import track + + +from dynamics_learning.environment import COSCINE_API_TOKEN + +logger = RichLogger("dynamics_learning-coscine_data_retrieval") + +CLIENT = coscine.ApiClient( + COSCINE_API_TOKEN, timeout=120, retries=5, verbose=False, enable_caching=True +) +PROJECT = CLIENT.project( + "IoP Ws A.III FER WWL Demo" +) # Connect to the specified Coscine project +#print(PROJECT) + +# logger.log( +# f"Connecting to Coscine Project\n{PROJECT}" +# ) # Log the connection to the project +RESOURCE = PROJECT.resource( + "Trajectory Data" +) # Address the specific resource in the project + +def download_resource_content_into_uuid_folders(): + """Download the resource content into folders named after the robot UUID. Keeps only 50 newest trajectories per robot.""" + files = RESOURCE.files(path="train", recursive=True, with_metadata=True) + logger.info(f"Attempting to download {len(files)} files:") # \n{files} + for file in track(files): + if file.is_folder: + continue + logger.info(f"File: {file.name}") + robot_uuid = file.metadata_form()["Robot UUID"][0] + Path(f"./Trajectory Data/train/{robot_uuid}").mkdir(parents=True, exist_ok=True) + file.download(f"./Trajectory Data/train/{robot_uuid}/{file.name}") + # logger.info(f"Keeping only 50 trajectories per robot.") + # delete_files(50, robot_uuid) + +def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None: + """Delete files from the training data directory. + + Files are sorted by date and the newest files are kept. + + Args: + num_trajectories_to_keep (int): Number of trajectories to keep. + robot_uuid (str): Robot UUID. + + Returns: + None: This function does not return anything. + """ + files = [ + str(file) + for file in Path( + f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}" + ).iterdir() + if file.is_file() and str(file).endswith("meas.csv") + ] + files.sort(reverse=True) + for file in files[num_trajectories_to_keep:]: + Path(file).unlink() + try: + file = file.replace("meas.csv", "com.csv") + Path(file).unlink() + except FileNotFoundError: + # logger.info("No com.csv file found.") + pass + try: + file = file.replace("com.csv", "interp_com.csv") + Path(file).unlink() + except FileNotFoundError: + # logger.info("No interp_com.csv file found.") + pass + return None + + +if __name__ == "__main__": + download_resource_content_into_uuid_folders() + + #%% + num_trajectories_to_keep = 10 + LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a" + robot_uuid = LLT_ROBOT_UUID + #%% + delete_files(num_trajectories_to_keep, robot_uuid) + + +#%% +from pathlib import Path +from datetime import datetime, timedelta + +root = Path("/app/dynamics_learning/dummy") +def create_txt_files(directory: Path = root) -> None: + current_time = datetime.now() + + for index in range(1001): + adjusted_time = current_time + timedelta(seconds=index) + file_name = f"{adjusted_time.strftime('%Y%m%d_%H%M%S')}.txt" + file_path = directory / file_name + with file_path.open('w') as file: + file.write(str(index)) + +# Run the function to create the files +create_txt_files(root) +#%% +files = [ + str(file) + for file in Path( + f"/app/dynamics_learning/dummy" + ).iterdir() + if file.is_file() and str(file).endswith(".txt") +] +files.sort(reverse=True) +files +#%% +for file in files[10:]: + Path(file).unlink() +files = [ + str(file) + for file in Path( + f"/app/dynamics_learning/dummy" + ).iterdir() + if file.is_file() and str(file).endswith(".txt") +] +files.sort(reverse=True) +files +# %% +# %% +def delete_files(num_trajectories_to_keep: int, robot_uuid: str="c9ff52e1-1733-4829-a209-ebd1586a8697") -> None: + """Delete files from the training data directory. + + Files are sorted by date and the newest files are kept. + robot_uuid = "c9ff52e1-1733-4829-a209-ebd1586a8697" for ITA + robot_uuid = "f2e72889-c140-4397-809f-fba1b892f17a" for LLT + robot_uuid = "2e60a671-dcc3-4a36-9734-a239c899b57d" for WZL + + Args: + num_trajectories_to_keep (int): Number of trajectories to keep. + robot_uuid (str): Robot UUID. Defaults to ITA. + + Returns: + None: This function does not return anything. + """ + files = [ + str(file) + for file in Path( + f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}" + ).iterdir() + if file.is_file() and str(file).endswith("meas.csv") + ] + files.sort(reverse=True) + for file in files[num_trajectories_to_keep:]: + Path(file).unlink() + try: + file = file.replace("meas.csv", "com.csv") + Path(file).unlink() + except FileNotFoundError: + # logger.info("No com.csv file found.") + pass + try: + file = file.replace("com.csv", "interp_com.csv") + Path(file).unlink() + except FileNotFoundError: + # logger.info("No interp_com.csv file found.") + pass + return None + +delete_files(100, "c9ff52e1-1733-4829-a209-ebd1586a8697") # ITA +delete_files(100, "f2e72889-c140-4397-809f-fba1b892f17a") # LLT +# %% +import os + +def check_file_pairs(directory): + # Get all files in the directory + files = os.listdir(directory) + + # Split files into two groups based on their suffix + com_files = set(f[:-8] for f in files if f.endswith('_com.csv')) + meas_files = set(f[:-9] for f in files if f.endswith('_meas.csv')) + + print(len(com_files)) + print(com_files) + print(meas_files) + + # Find unmatched files + unmatched_com = com_files - meas_files + unmatched_meas = meas_files - com_files + + # Report results + if unmatched_com or unmatched_meas: + print("Unmatched files found:") + for name in unmatched_com: + print(f"No matching _meas.csv file for: {name}_com.csv") + for name in unmatched_meas: + print(f"No matching _com.csv file for: {name}_meas.csv") + else: + print("All files are properly paired.") + +# Example usage +LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a" +ITA_ROBOT_UUID = "c9ff52e1-1733-4829-a209-ebd1586a8697" +directory = f"/app/dynamics_learning/Trajectory Data/train/{ITA_ROBOT_UUID}" +check_file_pairs(directory) + +# %% -- GitLab