Update .gitignore, Dockerfiles, and data retrieval logic; refactor benchmarks

- Added new paths (**/benchmark_models, **/benchmark_trajectory_data) to .gitignore - Updated Dockerfiles for foundation model and cron job: - Set DEBIAN_FRONTEND to non-interactive to avoid tzdata prompts during installation - Preconfigured tzdata settings for consistent timezone configuration - Modified docker-compose.yaml: - Renamed and added sections for foundation model services - Deleted obsolete benchmark_model_accuracy.py script - Refactored benchmark_number_of_runs.py: - Adjusted paths to benchmark data directories - Updated training notes and increased trajectory limits - Improved data_retrieval module: - Added default robot UUID for delete_files function - Refined download logging messages - Commented out unused logger statements - Minor adjustments to foundation_model.py: - Replaced dynamic download paths with static path reference

Update .gitignore, Dockerfiles, and data retrieval logic; refactor benchmarks
a81cc8ca · Leon Michel Gorißen · 5a51c7ce · a81cc8ca · a81cc8ca · a81cc8ca
Commit a81cc8ca authored 9 months ago by Leon Michel Gorißen
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@
 **/Foundation_Model
 **/models
 **/analysis
+**/benchmark_models
+**/benchmark_trajectory_data

 **commit**
 commit

--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -91,7 +91,7 @@ services:
      network_mode: host
      # network_mode: pandavlan # Uses the host's network stack, not creating a separate network namespace

-#########################foundation_model##############################
+#########################foundation model cron##########################
  foundation_model_cron_job:
    build:
      context: .
@@ -129,6 +129,7 @@ services:
    volumes:
      - /home/lgorissen/git/iop/franka_wwl_demonstrator:/app

+#########################foundation model###############################
  foundation_model:
    build:
      context: .

--- a/dynamics_learning/Dockerfile.foundation_model
+++ b/dynamics_learning/Dockerfile.foundation_model
 FROM nvcr.io/nvidia/tensorflow:23.05-tf2-py3

-# Install required packages for setting up the locale and timezone
-RUN apt-get update && apt-get install -y \
+# Set non-interactive frontend and preconfigure tzdata
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Preconfigure tzdata to avoid prompts
+RUN echo "tzdata tzdata/Areas select Europe" | debconf-set-selections && \
+    echo "tzdata tzdata/Zones/Europe select Berlin" | debconf-set-selections && \
+    apt-get update && apt-get install -y \
    locales \
    tzdata


--- a/dynamics_learning/Dockerfile.foundation_model_cron_job
+++ b/dynamics_learning/Dockerfile.foundation_model_cron_job
 FROM nvcr.io/nvidia/tensorflow:23.05-tf2-py3

-# Install required packages for setting up the locale and timezone
-RUN apt-get update && apt-get install -y \
+# Set non-interactive frontend and preconfigure tzdata
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Preconfigure tzdata to avoid prompts
+RUN echo "tzdata tzdata/Areas select Europe" | debconf-set-selections && \
+    echo "tzdata tzdata/Zones/Europe select Berlin" | debconf-set-selections && \
+    apt-get update && apt-get install -y \
    locales \
    tzdata

@@ -20,6 +25,7 @@ ENV LANG=en_US.UTF-8 \
    LC_ALL=en_US.UTF-8 \
    TZ=Europe/Berlin

+
 WORKDIR /app

 COPY . .

--- a/dynamics_learning/benchmark_model_accuracy.py
+++ b/dynamics_learning/benchmark_model_accuracy.py
-from pathlib import Path
-from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders
-
-
-def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None:
-    """Delete files from the training data directory.
-
-    Files are sorted by date and the newest files are kept.
-
-    Args:
-        num_trajectories_to_keep (int): Number of trajectories to keep.
-        robot_uuid (str): Robot UUID.
-
-    Returns:
-        None: This function does not return anything.
-    """
-    files = [
-        str(file)
-        for file in Path(
-            f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}"
-        ).iterdir()
-        if file.is_file() and str(file).endswith("meas.csv")
-    ]
-    files.sort(reverse=True)
-    for file in files[num_trajectories_to_keep:]:
-        Path(file).unlink()
-        try:
-            file = file.replace("meas.csv", "com.csv")
-            Path(file).unlink()
-        except FileNotFoundError:
-            # logger.info("No com.csv file found.")
-            pass
-        try:
-            file = file.replace("com.csv", "interp_com.csv")
-            Path(file).unlink()
-        except FileNotFoundError:
-            # logger.info("No interp_com.csv file found.")
-            pass
-    return None
-
-
-if __name__ == "__main__":
-    download_resource_content_into_uuid_folders()
-    num_trajectories_to_keep = 10
-    LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a"
-    robot_uuid = LLT_ROBOT_UUID
-    delete_files(num_trajectories_to_keep, robot_uuid)
--- a/dynamics_learning/benchmark_number_of_runs.py
+++ b/dynamics_learning/benchmark_number_of_runs.py
-from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders
 from dynamics_learning.preprocessing.dataset_analysis import analyze
 from dynamics_learning.preprocessing.trajectory_interpolation import interpolate
 from dynamics_learning.environment import (
@@ -136,8 +135,8 @@ if __name__ == "__main__":
    # 7tglijx8: LLT instance based on foundation model

    # Download Training Data from the server
-    if download_file:
-        download_resource_content_into_uuid_folders()
+    # if download_file:
+    #    download_resource_content_into_uuid_folders()
    # TODO implement max number of trajectories used for benchmark training

    wandb.login(key=WANDB_API_TOKEN, relogin=True)
@@ -148,7 +147,9 @@ if __name__ == "__main__":
    if model1:
        # LLT instance model trained from scratch
        robot_uuid = LLT_ROBOT_UUID
-        directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}")
+        directory = Path(
+            f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}"
+        )
        # Interpolate Training Data in UUID folders
        (
            attained_data,
@@ -175,7 +176,7 @@ if __name__ == "__main__":
            q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input,
            tau_attained_input=tau_attained_input,
            model=None,
-            notes="Sweep to train model from scratch. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.",
+            notes="Sweep to train model from scratch. 100 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.",
        )

        runs_model1 = runs
@@ -188,7 +189,9 @@ if __name__ == "__main__":
    if model2:
        # LLT model based on ITA model without known hyperparameters
        robot_uuid = LLT_ROBOT_UUID
-        directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}")
+        directory = Path(
+            f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}"
+        )
        # Interpolate Training Data in UUID folders
        (
            attained_data,
@@ -220,7 +223,7 @@ if __name__ == "__main__":
            q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input,
            tau_attained_input=tau_attained_input,
            model=model,
-            notes="Sweep to train model based on ITA model. 50 Trajectories are avaiulable for training. Training is stoped when the validation loss is below 50.",
+            notes="Sweep to train model based on ITA model. 100 Trajectories are avaiulable for training. Training is stoped when the validation loss is below 50.",
        )

        runs_model2 = runs

--- a/dynamics_learning/dynamics_learning/data_retrieval/__init__.py
+++ b/dynamics_learning/dynamics_learning/data_retrieval/__init__.py
@@ -4,7 +4,7 @@
 # Released under MIT License
 # Contact us for other licensing options.

-
+# %%
 from pathlib import Path
 from typing import List, Tuple

@@ -32,14 +32,20 @@ RESOURCE = PROJECT.resource(
 )  # Address the specific resource in the project


-def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None:
+def delete_files(
+    num_trajectories_to_keep: int,
+    robot_uuid: str = "c9ff52e1-1733-4829-a209-ebd1586a8697",
+) -> None:
    """Delete files from the training data directory.

    Files are sorted by date and the newest files are kept.
+    robot_uuid = "c9ff52e1-1733-4829-a209-ebd1586a8697" for ITA
+    robot_uuid = "f2e72889-c140-4397-809f-fba1b892f17a" for LLT
+    robot_uuid = "2e60a671-dcc3-4a36-9734-a239c899b57d" for WZL

    Args:
        num_trajectories_to_keep (int): Number of trajectories to keep.
-        robot_uuid (str): Robot UUID.
+        robot_uuid (str): Robot UUID. Defaults to ITA.

    Returns:
        None: This function does not return anything.
@@ -58,12 +64,14 @@ def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None:
            file = file.replace("meas.csv", "com.csv")
            Path(file).unlink()
        except FileNotFoundError:
-            logger.info("No com.csv file found.")
+            # logger.info("No com.csv file found.")
+            pass
        try:
            file = file.replace("com.csv", "interp_com.csv")
            Path(file).unlink()
        except FileNotFoundError:
-            logger.info("No interp_com.csv file found.")
+            # logger.info("No interp_com.csv file found.")
+            pass
    return None


@@ -83,7 +91,7 @@ def download_resource_content_into_uuid_folders():


 def download_resource_content(resource: coscine.resource.Resource = RESOURCE) -> Path:
-    logger.info(f"Downloading files from resource. This may take a while.\n{resource}")
+    logger.info("Downloading files from resource. This may take a while.")
    resource.download()
    # logger.info(f"Downloading files from resource\n{resource}")
    # files = resource.files(path="train",recursive=True, with_metadata=True)
@@ -291,3 +299,5 @@ if __name__ == "__main__":
    download_resource_content()
    # get_resource_content()
    # get_downloaded_files()
+
+# %%
--- a/dynamics_learning/foundation_model.py
+++ b/dynamics_learning/foundation_model.py
@@ -4,6 +4,7 @@
 # Released under MIT License
 import warnings
 from functools import partial
+from pathlib import Path

 from dynamics_learning.environment import WANDB_API_TOKEN, WANDB_ENTITY, WANDB_PROJECT

@@ -14,7 +15,7 @@ from pritty_logger import RichLogger
 # import env variables and set tensorflow variables
 import wandb

-from dynamics_learning.data_retrieval import download_resource_content
+# from dynamics_learning.data_retrieval import download_resource_content
 from dynamics_learning.preprocessing.dataset_analysis import analyze
 from dynamics_learning.preprocessing.trajectory_interpolation import interpolate
 from dynamics_learning.sweep.setup import setup_sweep
@@ -31,8 +32,8 @@ logger = RichLogger("dynamics_learning-foundation_model")

 if __name__ == "__main__":
    # download not existing data
-    local_resource_path = download_resource_content()
-    # local_resource_path = Path("/app/dynamics_learning/Trajectory Data")
+    # local_resource_path = download_resource_content()
+    local_resource_path = Path("/app/dynamics_learning/Trajectory Data")

    # preprocess data
    attained_data, command_data = analyze(local_resource_path / "train")

--- a/dynamics_learning/playground.py
+++ b/dynamics_learning/playground.py
+#%%
+from pathlib import Path
+from typing import List, Tuple
+
+import coscine
+import coscine.resource
+from pritty_logger import RichLogger
+from rich.progress import track
+
+
+from dynamics_learning.environment import COSCINE_API_TOKEN
+
+logger = RichLogger("dynamics_learning-coscine_data_retrieval")
+
+CLIENT = coscine.ApiClient(
+    COSCINE_API_TOKEN, timeout=120, retries=5, verbose=False, enable_caching=True
+)
+
+projects = CLIENT.projects()
+for project in projects:
+	print(project.name)
+
+PROJECT = CLIENT.project("IoP Ws A.III Franka Emika Robot World Wide Lab Demonstrator", coscine.Project.name)
+logger.info(str(PROJECT))
+resources = PROJECT.resources()
+for resource in resources:
+      logger.info(resource.name)
+
+RESOURCE = PROJECT.resource("Trajectory Data")
+metadata = RESOURCE.metadata_form()
+#metadata["Title"] = "Bla bla bla"
+#metadata["Creator"] = "Me"
+#RESOURCE.upload("app/MyTest.txt", "Linked Data Content as string or bytes", metadata)
+
+# Replace 'YOUR_SPECIFIC_PATH' with the actual path you want to filter for
+QUERY = (
+    """
+    SELECT ?path WHERE {
+        ?path dcterms:creator ?value .
+    } LIMIT 10
+    """
+)
+logger.info(QUERY)
+try:
+    files = RESOURCE.query(QUERY)
+    for file in files:
+        print(file.path)
+except Exception as e:
+    print(f"Error: {e}")
+
+# %% 
+QUERY = (
+    """
+    SELECT ?path WHERE {
+        ?path dcterms:created ?value .
+    } ORDER BY DESC(?value)
+    """
+)
+logger.info(QUERY)
+try:
+    files = RESOURCE.query(QUERY)
+    for file in files:
+        print(file.path)
+except Exception as e:
+    print(f"Error: {e}")
+# %%
+import time
+# ITA ROBOT DATA query
+ITA_QUERY = (
+    """
+    PREFIX fwwl: <https://franka-wwl-demonstrator-iop-workstreams-ws-a3-55c043b308e72e51f.pages.git-ce.rwth-aachen.de/metadata.html#>
+    PREFIX dcterms: <http://purl.org/dc/terms/>
+    SELECT ?path WHERE {
+        ?path dcterms:created ?date ;
+              fwwl:robot-uuid "c9ff52e1-1733-4829-a209-ebd1586a8697" .
+        FILTER (CONTAINS(STR(?path), "train"))
+    } ORDER BY DESC(?date)
+    LIMIT 100
+    """
+)
+
+ITA_QUERY = (
+    """
+    PREFIX fwwl: <https://franka-wwl-demonstrator-iop-workstreams-ws-a3-55c043b308e72e51f.pages.git-ce.rwth-aachen.de/metadata.html#>
+    PREFIX dcterms: <http://purl.org/dc/terms/>
+    SELECT ?path WHERE {
+        ?path dcterms:created ?date .
+        FILTER (CONTAINS(STR(?path), "train") && !CONTAINS(STR(?path), "analysis"))
+    } ORDER BY DESC(?date)
+    LIMIT 100
+    """
+)
+
+# LLT ROBOT DATA query
+LLT_QUERY = (
+    """
+    PREFIX fwwl: <https://franka-wwl-demonstrator-iop-workstreams-ws-a3-55c043b308e72e51f.pages.git-ce.rwth-aachen.de/metadata.html#>
+    PREFIX dcterms: <http://purl.org/dc/terms/>
+    SELECT ?path WHERE {
+        ?path dcterms:created ?date ;
+              fwwl:robot-uuid "f2e72889-c140-4397-809f-fba1b892f17a" .
+        FILTER (CONTAINS(STR(?path), "train") && !CONTAINS(STR(?path), "analysis"))
+    } ORDER BY DESC(?date)
+    LIMIT 100
+    """
+)
+
+# Querying ITA ROBOT DATA
+logger.info(ITA_QUERY)
+t_start = time.time()
+ita_files = resource.query(ITA_QUERY)
+t_end = time.time()
+logger.info(f"Time taken: {t_end - t_start}")
+for file in ita_files:
+    logger.info(f"ITA File Path: {file.path}")
+
+# Querying LLT ROBOT DATA
+# logger.info(LLT_QUERY)
+# t_start = time.time()
+# llt_files = resource.query(LLT_QUERY)
+# t_end = time.time()
+# logger.info(f"Time taken: {t_end - t_start}")
+# for file in llt_files:
+#     logger.info("LLT File Path:", file.path)
+
+
+# %%
--- a/dynamics_learning/test.py
+++ b/dynamics_learning/test.py
+#%%
+from pathlib import Path
+#from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders
+
+from pathlib import Path
+from typing import List, Tuple
+
+import coscine
+import coscine.resource
+from pritty_logger import RichLogger
+from rich.progress import track
+
+
+from dynamics_learning.environment import COSCINE_API_TOKEN
+
+logger = RichLogger("dynamics_learning-coscine_data_retrieval")
+
+CLIENT = coscine.ApiClient(
+    COSCINE_API_TOKEN, timeout=120, retries=5, verbose=False, enable_caching=True
+)
+PROJECT = CLIENT.project(
+    "IoP Ws A.III FER WWL Demo"
+)  # Connect to the specified Coscine project
+#print(PROJECT)
+
+# logger.log(
+#     f"Connecting to Coscine Project\n{PROJECT}"
+# )  # Log the connection to the project
+RESOURCE = PROJECT.resource(
+    "Trajectory Data"
+)  # Address the specific resource in the project
+
+def download_resource_content_into_uuid_folders():
+    """Download the resource content into folders named after the robot UUID. Keeps only 50 newest trajectories per robot."""
+    files = RESOURCE.files(path="train", recursive=True, with_metadata=True)
+    logger.info(f"Attempting to download {len(files)} files:")  # \n{files}
+    for file in track(files):
+        if file.is_folder:
+            continue
+        logger.info(f"File: {file.name}")
+        robot_uuid = file.metadata_form()["Robot UUID"][0]
+        Path(f"./Trajectory Data/train/{robot_uuid}").mkdir(parents=True, exist_ok=True)
+        file.download(f"./Trajectory Data/train/{robot_uuid}/{file.name}")
+    # logger.info(f"Keeping only 50 trajectories per robot.")
+    # delete_files(50, robot_uuid)
+
+def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None:
+    """Delete files from the training data directory.
+
+    Files are sorted by date and the newest files are kept.
+
+    Args:
+        num_trajectories_to_keep (int): Number of trajectories to keep.
+        robot_uuid (str): Robot UUID.
+
+    Returns:
+        None: This function does not return anything.
+    """
+    files = [
+        str(file)
+        for file in Path(
+            f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}"
+        ).iterdir()
+        if file.is_file() and str(file).endswith("meas.csv")
+    ]
+    files.sort(reverse=True)
+    for file in files[num_trajectories_to_keep:]:
+        Path(file).unlink()
+        try:
+            file = file.replace("meas.csv", "com.csv")
+            Path(file).unlink()
+        except FileNotFoundError:
+            # logger.info("No com.csv file found.")
+            pass
+        try:
+            file = file.replace("com.csv", "interp_com.csv")
+            Path(file).unlink()
+        except FileNotFoundError:
+            # logger.info("No interp_com.csv file found.")
+            pass
+    return None
+
+
+if __name__ == "__main__":
+    download_resource_content_into_uuid_folders()
+
+    #%%
+    num_trajectories_to_keep = 10
+    LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a"
+    robot_uuid = LLT_ROBOT_UUID
+    #%%
+    delete_files(num_trajectories_to_keep, robot_uuid)
+
+
+#%%
+from pathlib import Path
+from datetime import datetime, timedelta
+
+root = Path("/app/dynamics_learning/dummy")
+def create_txt_files(directory: Path = root) -> None:
+    current_time = datetime.now()
+
+    for index in range(1001):
+        adjusted_time = current_time + timedelta(seconds=index)
+        file_name = f"{adjusted_time.strftime('%Y%m%d_%H%M%S')}.txt"
+        file_path = directory / file_name
+        with file_path.open('w') as file:
+            file.write(str(index))
+
+# Run the function to create the files
+create_txt_files(root)
+#%%
+files = [
+    str(file)
+    for file in Path(
+        f"/app/dynamics_learning/dummy"
+    ).iterdir()
+    if file.is_file() and str(file).endswith(".txt")
+]
+files.sort(reverse=True)
+files
+#%%
+for file in files[10:]:
+    Path(file).unlink()
+files = [
+    str(file)
+    for file in Path(
+        f"/app/dynamics_learning/dummy"
+    ).iterdir()
+    if file.is_file() and str(file).endswith(".txt")
+]
+files.sort(reverse=True)
+files
+# %%
+# %%
+def delete_files(num_trajectories_to_keep: int, robot_uuid: str="c9ff52e1-1733-4829-a209-ebd1586a8697") -> None:
+    """Delete files from the training data directory.
+
+    Files are sorted by date and the newest files are kept.
+    robot_uuid = "c9ff52e1-1733-4829-a209-ebd1586a8697" for ITA
+    robot_uuid = "f2e72889-c140-4397-809f-fba1b892f17a" for LLT
+    robot_uuid = "2e60a671-dcc3-4a36-9734-a239c899b57d" for WZL
+
+    Args:
+        num_trajectories_to_keep (int): Number of trajectories to keep.
+        robot_uuid (str): Robot UUID. Defaults to ITA.
+
+    Returns:
+        None: This function does not return anything.
+    """
+    files = [
+        str(file)
+        for file in Path(
+            f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}"
+        ).iterdir()
+        if file.is_file() and str(file).endswith("meas.csv")
+    ]
+    files.sort(reverse=True)
+    for file in files[num_trajectories_to_keep:]:
+        Path(file).unlink()
+        try:
+            file = file.replace("meas.csv", "com.csv")
+            Path(file).unlink()
+        except FileNotFoundError:
+            # logger.info("No com.csv file found.")
+            pass
+        try:
+            file = file.replace("com.csv", "interp_com.csv")
+            Path(file).unlink()
+        except FileNotFoundError:
+            # logger.info("No interp_com.csv file found.")
+            pass
+    return None
+
+delete_files(100, "c9ff52e1-1733-4829-a209-ebd1586a8697") # ITA
+delete_files(100, "f2e72889-c140-4397-809f-fba1b892f17a") # LLT
+# %%
+import os
+
+def check_file_pairs(directory):
+    # Get all files in the directory
+    files = os.listdir(directory)
+    
+    # Split files into two groups based on their suffix
+    com_files = set(f[:-8] for f in files if f.endswith('_com.csv'))
+    meas_files = set(f[:-9] for f in files if f.endswith('_meas.csv'))
+    
+    print(len(com_files))
+    print(com_files)
+    print(meas_files)
+
+    # Find unmatched files
+    unmatched_com = com_files - meas_files
+    unmatched_meas = meas_files - com_files
+    
+    # Report results
+    if unmatched_com or unmatched_meas:
+        print("Unmatched files found:")
+        for name in unmatched_com:
+            print(f"No matching _meas.csv file for: {name}_com.csv")
+        for name in unmatched_meas:
+            print(f"No matching _com.csv file for: {name}_meas.csv")
+    else:
+        print("All files are properly paired.")
+
+# Example usage
+LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a"
+ITA_ROBOT_UUID = "c9ff52e1-1733-4829-a209-ebd1586a8697"
+directory = f"/app/dynamics_learning/Trajectory Data/train/{ITA_ROBOT_UUID}"
+check_file_pairs(directory)
+
+# %%