Skip to content
Snippets Groups Projects
Commit a81cc8ca authored by Leon Michel Gorißen's avatar Leon Michel Gorißen
Browse files

Update .gitignore, Dockerfiles, and data retrieval logic; refactor benchmarks

- Added new paths (**/benchmark_models, **/benchmark_trajectory_data) to .gitignore
- Updated Dockerfiles for foundation model and cron job:
  - Set DEBIAN_FRONTEND to non-interactive to avoid tzdata prompts during installation
  - Preconfigured tzdata settings for consistent timezone configuration
- Modified docker-compose.yaml:
  - Renamed and added sections for foundation model services
- Deleted obsolete benchmark_model_accuracy.py script
- Refactored benchmark_number_of_runs.py:
  - Adjusted paths to benchmark data directories
  - Updated training notes and increased trajectory limits
- Improved data_retrieval module:
  - Added default robot UUID for delete_files function
  - Refined download logging messages
  - Commented out unused logger statements
- Minor adjustments to foundation_model.py:
  - Replaced dynamic download paths with static path reference
parent 5a51c7ce
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,8 @@
**/Foundation_Model
**/models
**/analysis
**/benchmark_models
**/benchmark_trajectory_data
**commit**
commit
......
......@@ -91,7 +91,7 @@ services:
network_mode: host
# network_mode: pandavlan # Uses the host's network stack, not creating a separate network namespace
#########################foundation_model##############################
#########################foundation model cron##########################
foundation_model_cron_job:
build:
context: .
......@@ -129,6 +129,7 @@ services:
volumes:
- /home/lgorissen/git/iop/franka_wwl_demonstrator:/app
#########################foundation model###############################
foundation_model:
build:
context: .
......
FROM nvcr.io/nvidia/tensorflow:23.05-tf2-py3
# Install required packages for setting up the locale and timezone
RUN apt-get update && apt-get install -y \
# Set non-interactive frontend and preconfigure tzdata
ENV DEBIAN_FRONTEND=noninteractive
# Preconfigure tzdata to avoid prompts
RUN echo "tzdata tzdata/Areas select Europe" | debconf-set-selections && \
echo "tzdata tzdata/Zones/Europe select Berlin" | debconf-set-selections && \
apt-get update && apt-get install -y \
locales \
tzdata
......
FROM nvcr.io/nvidia/tensorflow:23.05-tf2-py3
# Install required packages for setting up the locale and timezone
RUN apt-get update && apt-get install -y \
# Set non-interactive frontend and preconfigure tzdata
ENV DEBIAN_FRONTEND=noninteractive
# Preconfigure tzdata to avoid prompts
RUN echo "tzdata tzdata/Areas select Europe" | debconf-set-selections && \
echo "tzdata tzdata/Zones/Europe select Berlin" | debconf-set-selections && \
apt-get update && apt-get install -y \
locales \
tzdata
......@@ -20,6 +25,7 @@ ENV LANG=en_US.UTF-8 \
LC_ALL=en_US.UTF-8 \
TZ=Europe/Berlin
WORKDIR /app
COPY . .
......
from pathlib import Path
from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders
def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None:
"""Delete files from the training data directory.
Files are sorted by date and the newest files are kept.
Args:
num_trajectories_to_keep (int): Number of trajectories to keep.
robot_uuid (str): Robot UUID.
Returns:
None: This function does not return anything.
"""
files = [
str(file)
for file in Path(
f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}"
).iterdir()
if file.is_file() and str(file).endswith("meas.csv")
]
files.sort(reverse=True)
for file in files[num_trajectories_to_keep:]:
Path(file).unlink()
try:
file = file.replace("meas.csv", "com.csv")
Path(file).unlink()
except FileNotFoundError:
# logger.info("No com.csv file found.")
pass
try:
file = file.replace("com.csv", "interp_com.csv")
Path(file).unlink()
except FileNotFoundError:
# logger.info("No interp_com.csv file found.")
pass
return None
if __name__ == "__main__":
download_resource_content_into_uuid_folders()
num_trajectories_to_keep = 10
LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a"
robot_uuid = LLT_ROBOT_UUID
delete_files(num_trajectories_to_keep, robot_uuid)
from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders
from dynamics_learning.preprocessing.dataset_analysis import analyze
from dynamics_learning.preprocessing.trajectory_interpolation import interpolate
from dynamics_learning.environment import (
......@@ -136,8 +135,8 @@ if __name__ == "__main__":
# 7tglijx8: LLT instance based on foundation model
# Download Training Data from the server
if download_file:
download_resource_content_into_uuid_folders()
# if download_file:
# download_resource_content_into_uuid_folders()
# TODO implement max number of trajectories used for benchmark training
wandb.login(key=WANDB_API_TOKEN, relogin=True)
......@@ -148,7 +147,9 @@ if __name__ == "__main__":
if model1:
# LLT instance model trained from scratch
robot_uuid = LLT_ROBOT_UUID
directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}")
directory = Path(
f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}"
)
# Interpolate Training Data in UUID folders
(
attained_data,
......@@ -175,7 +176,7 @@ if __name__ == "__main__":
q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input,
tau_attained_input=tau_attained_input,
model=None,
notes="Sweep to train model from scratch. 50 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.",
notes="Sweep to train model from scratch. 100 Trajectories are avaiulable for training. Training ist stoped when the validation loss is below 50.",
)
runs_model1 = runs
......@@ -188,7 +189,9 @@ if __name__ == "__main__":
if model2:
# LLT model based on ITA model without known hyperparameters
robot_uuid = LLT_ROBOT_UUID
directory = Path(f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}")
directory = Path(
f"/app/dynamics_learning/benchmark_trajectory_data/{robot_uuid}"
)
# Interpolate Training Data in UUID folders
(
attained_data,
......@@ -220,7 +223,7 @@ if __name__ == "__main__":
q_qd_qdd_interpolated_command_input=q_qd_qdd_interpolated_command_input,
tau_attained_input=tau_attained_input,
model=model,
notes="Sweep to train model based on ITA model. 50 Trajectories are avaiulable for training. Training is stoped when the validation loss is below 50.",
notes="Sweep to train model based on ITA model. 100 Trajectories are avaiulable for training. Training is stoped when the validation loss is below 50.",
)
runs_model2 = runs
......
......@@ -4,7 +4,7 @@
# Released under MIT License
# Contact us for other licensing options.
# %%
from pathlib import Path
from typing import List, Tuple
......@@ -32,14 +32,20 @@ RESOURCE = PROJECT.resource(
) # Address the specific resource in the project
def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None:
def delete_files(
num_trajectories_to_keep: int,
robot_uuid: str = "c9ff52e1-1733-4829-a209-ebd1586a8697",
) -> None:
"""Delete files from the training data directory.
Files are sorted by date and the newest files are kept.
robot_uuid = "c9ff52e1-1733-4829-a209-ebd1586a8697" for ITA
robot_uuid = "f2e72889-c140-4397-809f-fba1b892f17a" for LLT
robot_uuid = "2e60a671-dcc3-4a36-9734-a239c899b57d" for WZL
Args:
num_trajectories_to_keep (int): Number of trajectories to keep.
robot_uuid (str): Robot UUID.
robot_uuid (str): Robot UUID. Defaults to ITA.
Returns:
None: This function does not return anything.
......@@ -58,12 +64,14 @@ def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None:
file = file.replace("meas.csv", "com.csv")
Path(file).unlink()
except FileNotFoundError:
logger.info("No com.csv file found.")
# logger.info("No com.csv file found.")
pass
try:
file = file.replace("com.csv", "interp_com.csv")
Path(file).unlink()
except FileNotFoundError:
logger.info("No interp_com.csv file found.")
# logger.info("No interp_com.csv file found.")
pass
return None
......@@ -83,7 +91,7 @@ def download_resource_content_into_uuid_folders():
def download_resource_content(resource: coscine.resource.Resource = RESOURCE) -> Path:
logger.info(f"Downloading files from resource. This may take a while.\n{resource}")
logger.info("Downloading files from resource. This may take a while.")
resource.download()
# logger.info(f"Downloading files from resource\n{resource}")
# files = resource.files(path="train",recursive=True, with_metadata=True)
......@@ -291,3 +299,5 @@ if __name__ == "__main__":
download_resource_content()
# get_resource_content()
# get_downloaded_files()
# %%
......@@ -4,6 +4,7 @@
# Released under MIT License
import warnings
from functools import partial
from pathlib import Path
from dynamics_learning.environment import WANDB_API_TOKEN, WANDB_ENTITY, WANDB_PROJECT
......@@ -14,7 +15,7 @@ from pritty_logger import RichLogger
# import env variables and set tensorflow variables
import wandb
from dynamics_learning.data_retrieval import download_resource_content
# from dynamics_learning.data_retrieval import download_resource_content
from dynamics_learning.preprocessing.dataset_analysis import analyze
from dynamics_learning.preprocessing.trajectory_interpolation import interpolate
from dynamics_learning.sweep.setup import setup_sweep
......@@ -31,8 +32,8 @@ logger = RichLogger("dynamics_learning-foundation_model")
if __name__ == "__main__":
# download not existing data
local_resource_path = download_resource_content()
# local_resource_path = Path("/app/dynamics_learning/Trajectory Data")
# local_resource_path = download_resource_content()
local_resource_path = Path("/app/dynamics_learning/Trajectory Data")
# preprocess data
attained_data, command_data = analyze(local_resource_path / "train")
......
#%%
from pathlib import Path
from typing import List, Tuple
import coscine
import coscine.resource
from pritty_logger import RichLogger
from rich.progress import track
from dynamics_learning.environment import COSCINE_API_TOKEN
logger = RichLogger("dynamics_learning-coscine_data_retrieval")
CLIENT = coscine.ApiClient(
COSCINE_API_TOKEN, timeout=120, retries=5, verbose=False, enable_caching=True
)
projects = CLIENT.projects()
for project in projects:
print(project.name)
PROJECT = CLIENT.project("IoP Ws A.III Franka Emika Robot World Wide Lab Demonstrator", coscine.Project.name)
logger.info(str(PROJECT))
resources = PROJECT.resources()
for resource in resources:
logger.info(resource.name)
RESOURCE = PROJECT.resource("Trajectory Data")
metadata = RESOURCE.metadata_form()
#metadata["Title"] = "Bla bla bla"
#metadata["Creator"] = "Me"
#RESOURCE.upload("app/MyTest.txt", "Linked Data Content as string or bytes", metadata)
# Replace 'YOUR_SPECIFIC_PATH' with the actual path you want to filter for
QUERY = (
"""
SELECT ?path WHERE {
?path dcterms:creator ?value .
} LIMIT 10
"""
)
logger.info(QUERY)
try:
files = RESOURCE.query(QUERY)
for file in files:
print(file.path)
except Exception as e:
print(f"Error: {e}")
# %%
QUERY = (
"""
SELECT ?path WHERE {
?path dcterms:created ?value .
} ORDER BY DESC(?value)
"""
)
logger.info(QUERY)
try:
files = RESOURCE.query(QUERY)
for file in files:
print(file.path)
except Exception as e:
print(f"Error: {e}")
# %%
import time
# ITA ROBOT DATA query
ITA_QUERY = (
"""
PREFIX fwwl: <https://franka-wwl-demonstrator-iop-workstreams-ws-a3-55c043b308e72e51f.pages.git-ce.rwth-aachen.de/metadata.html#>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT ?path WHERE {
?path dcterms:created ?date ;
fwwl:robot-uuid "c9ff52e1-1733-4829-a209-ebd1586a8697" .
FILTER (CONTAINS(STR(?path), "train"))
} ORDER BY DESC(?date)
LIMIT 100
"""
)
ITA_QUERY = (
"""
PREFIX fwwl: <https://franka-wwl-demonstrator-iop-workstreams-ws-a3-55c043b308e72e51f.pages.git-ce.rwth-aachen.de/metadata.html#>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT ?path WHERE {
?path dcterms:created ?date .
FILTER (CONTAINS(STR(?path), "train") && !CONTAINS(STR(?path), "analysis"))
} ORDER BY DESC(?date)
LIMIT 100
"""
)
# LLT ROBOT DATA query
LLT_QUERY = (
"""
PREFIX fwwl: <https://franka-wwl-demonstrator-iop-workstreams-ws-a3-55c043b308e72e51f.pages.git-ce.rwth-aachen.de/metadata.html#>
PREFIX dcterms: <http://purl.org/dc/terms/>
SELECT ?path WHERE {
?path dcterms:created ?date ;
fwwl:robot-uuid "f2e72889-c140-4397-809f-fba1b892f17a" .
FILTER (CONTAINS(STR(?path), "train") && !CONTAINS(STR(?path), "analysis"))
} ORDER BY DESC(?date)
LIMIT 100
"""
)
# Querying ITA ROBOT DATA
logger.info(ITA_QUERY)
t_start = time.time()
ita_files = resource.query(ITA_QUERY)
t_end = time.time()
logger.info(f"Time taken: {t_end - t_start}")
for file in ita_files:
logger.info(f"ITA File Path: {file.path}")
# Querying LLT ROBOT DATA
# logger.info(LLT_QUERY)
# t_start = time.time()
# llt_files = resource.query(LLT_QUERY)
# t_end = time.time()
# logger.info(f"Time taken: {t_end - t_start}")
# for file in llt_files:
# logger.info("LLT File Path:", file.path)
# %%
#%%
from pathlib import Path
#from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders
from pathlib import Path
from typing import List, Tuple
import coscine
import coscine.resource
from pritty_logger import RichLogger
from rich.progress import track
from dynamics_learning.environment import COSCINE_API_TOKEN
logger = RichLogger("dynamics_learning-coscine_data_retrieval")
CLIENT = coscine.ApiClient(
COSCINE_API_TOKEN, timeout=120, retries=5, verbose=False, enable_caching=True
)
PROJECT = CLIENT.project(
"IoP Ws A.III FER WWL Demo"
) # Connect to the specified Coscine project
#print(PROJECT)
# logger.log(
# f"Connecting to Coscine Project\n{PROJECT}"
# ) # Log the connection to the project
RESOURCE = PROJECT.resource(
"Trajectory Data"
) # Address the specific resource in the project
def download_resource_content_into_uuid_folders():
"""Download the resource content into folders named after the robot UUID. Keeps only 50 newest trajectories per robot."""
files = RESOURCE.files(path="train", recursive=True, with_metadata=True)
logger.info(f"Attempting to download {len(files)} files:") # \n{files}
for file in track(files):
if file.is_folder:
continue
logger.info(f"File: {file.name}")
robot_uuid = file.metadata_form()["Robot UUID"][0]
Path(f"./Trajectory Data/train/{robot_uuid}").mkdir(parents=True, exist_ok=True)
file.download(f"./Trajectory Data/train/{robot_uuid}/{file.name}")
# logger.info(f"Keeping only 50 trajectories per robot.")
# delete_files(50, robot_uuid)
def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None:
"""Delete files from the training data directory.
Files are sorted by date and the newest files are kept.
Args:
num_trajectories_to_keep (int): Number of trajectories to keep.
robot_uuid (str): Robot UUID.
Returns:
None: This function does not return anything.
"""
files = [
str(file)
for file in Path(
f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}"
).iterdir()
if file.is_file() and str(file).endswith("meas.csv")
]
files.sort(reverse=True)
for file in files[num_trajectories_to_keep:]:
Path(file).unlink()
try:
file = file.replace("meas.csv", "com.csv")
Path(file).unlink()
except FileNotFoundError:
# logger.info("No com.csv file found.")
pass
try:
file = file.replace("com.csv", "interp_com.csv")
Path(file).unlink()
except FileNotFoundError:
# logger.info("No interp_com.csv file found.")
pass
return None
if __name__ == "__main__":
download_resource_content_into_uuid_folders()
#%%
num_trajectories_to_keep = 10
LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a"
robot_uuid = LLT_ROBOT_UUID
#%%
delete_files(num_trajectories_to_keep, robot_uuid)
#%%
from pathlib import Path
from datetime import datetime, timedelta
root = Path("/app/dynamics_learning/dummy")
def create_txt_files(directory: Path = root) -> None:
current_time = datetime.now()
for index in range(1001):
adjusted_time = current_time + timedelta(seconds=index)
file_name = f"{adjusted_time.strftime('%Y%m%d_%H%M%S')}.txt"
file_path = directory / file_name
with file_path.open('w') as file:
file.write(str(index))
# Run the function to create the files
create_txt_files(root)
#%%
files = [
str(file)
for file in Path(
f"/app/dynamics_learning/dummy"
).iterdir()
if file.is_file() and str(file).endswith(".txt")
]
files.sort(reverse=True)
files
#%%
for file in files[10:]:
Path(file).unlink()
files = [
str(file)
for file in Path(
f"/app/dynamics_learning/dummy"
).iterdir()
if file.is_file() and str(file).endswith(".txt")
]
files.sort(reverse=True)
files
# %%
# %%
def delete_files(num_trajectories_to_keep: int, robot_uuid: str="c9ff52e1-1733-4829-a209-ebd1586a8697") -> None:
"""Delete files from the training data directory.
Files are sorted by date and the newest files are kept.
robot_uuid = "c9ff52e1-1733-4829-a209-ebd1586a8697" for ITA
robot_uuid = "f2e72889-c140-4397-809f-fba1b892f17a" for LLT
robot_uuid = "2e60a671-dcc3-4a36-9734-a239c899b57d" for WZL
Args:
num_trajectories_to_keep (int): Number of trajectories to keep.
robot_uuid (str): Robot UUID. Defaults to ITA.
Returns:
None: This function does not return anything.
"""
files = [
str(file)
for file in Path(
f"/app/dynamics_learning/Trajectory Data/train/{robot_uuid}"
).iterdir()
if file.is_file() and str(file).endswith("meas.csv")
]
files.sort(reverse=True)
for file in files[num_trajectories_to_keep:]:
Path(file).unlink()
try:
file = file.replace("meas.csv", "com.csv")
Path(file).unlink()
except FileNotFoundError:
# logger.info("No com.csv file found.")
pass
try:
file = file.replace("com.csv", "interp_com.csv")
Path(file).unlink()
except FileNotFoundError:
# logger.info("No interp_com.csv file found.")
pass
return None
delete_files(100, "c9ff52e1-1733-4829-a209-ebd1586a8697") # ITA
delete_files(100, "f2e72889-c140-4397-809f-fba1b892f17a") # LLT
# %%
import os
def check_file_pairs(directory):
# Get all files in the directory
files = os.listdir(directory)
# Split files into two groups based on their suffix
com_files = set(f[:-8] for f in files if f.endswith('_com.csv'))
meas_files = set(f[:-9] for f in files if f.endswith('_meas.csv'))
print(len(com_files))
print(com_files)
print(meas_files)
# Find unmatched files
unmatched_com = com_files - meas_files
unmatched_meas = meas_files - com_files
# Report results
if unmatched_com or unmatched_meas:
print("Unmatched files found:")
for name in unmatched_com:
print(f"No matching _meas.csv file for: {name}_com.csv")
for name in unmatched_meas:
print(f"No matching _com.csv file for: {name}_meas.csv")
else:
print("All files are properly paired.")
# Example usage
LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a"
ITA_ROBOT_UUID = "c9ff52e1-1733-4829-a209-ebd1586a8697"
directory = f"/app/dynamics_learning/Trajectory Data/train/{ITA_ROBOT_UUID}"
check_file_pairs(directory)
# %%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment