Skip to content
Snippets Groups Projects
Commit eb7c4544 authored by Leon Michel Gorißen's avatar Leon Michel Gorißen
Browse files

Added `check_file_pairs` function to validate paired `_com` and `_meas` files...

Added `check_file_pairs` function to validate paired `_com` and `_meas` files in training data directory.

- Imported `check_file_pairs` from `dataset_analysis` and integrated it into the main script for data preprocessing.
- Updated `foundation_model.py` to call `check_file_pairs` before analyzing and interpolating data in `train` directory.
- Standardized whitespace and formatting for better readability in `test.py`, aligning with Python style conventions.
parent 106df228
No related branches found
No related tags found
No related merge requests found
...@@ -16,7 +16,7 @@ from pritty_logger import RichLogger ...@@ -16,7 +16,7 @@ from pritty_logger import RichLogger
import wandb import wandb
# from dynamics_learning.data_retrieval import download_resource_content # from dynamics_learning.data_retrieval import download_resource_content
from dynamics_learning.preprocessing.dataset_analysis import analyze from dynamics_learning.preprocessing.dataset_analysis import analyze, check_file_pairs
from dynamics_learning.preprocessing.trajectory_interpolation import interpolate from dynamics_learning.preprocessing.trajectory_interpolation import interpolate
from dynamics_learning.sweep.setup import setup_sweep from dynamics_learning.sweep.setup import setup_sweep
...@@ -36,6 +36,7 @@ if __name__ == "__main__": ...@@ -36,6 +36,7 @@ if __name__ == "__main__":
local_resource_path = Path("/app/dynamics_learning/Trajectory Data") local_resource_path = Path("/app/dynamics_learning/Trajectory Data")
# preprocess data # preprocess data
check_file_pairs(local_resource_path / "train")
attained_data, command_data = analyze(local_resource_path / "train") attained_data, command_data = analyze(local_resource_path / "train")
interpolated_command_data = interpolate(local_resource_path / "train") interpolated_command_data = interpolate(local_resource_path / "train")
......
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
from pathlib import Path from pathlib import Path
# from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders # from dynamics_learning.data_retrieval import download_resource_content_into_uuid_folders
from pathlib import Path
from typing import List, Tuple
import coscine import coscine
import coscine.resource import coscine.resource
...@@ -30,6 +28,7 @@ RESOURCE = PROJECT.resource( ...@@ -30,6 +28,7 @@ RESOURCE = PROJECT.resource(
"Trajectory Data" "Trajectory Data"
) # Address the specific resource in the project ) # Address the specific resource in the project
def download_resource_content_into_uuid_folders(): def download_resource_content_into_uuid_folders():
"""Download the resource content into folders named after the robot UUID. Keeps only 50 newest trajectories per robot.""" """Download the resource content into folders named after the robot UUID. Keeps only 50 newest trajectories per robot."""
files = RESOURCE.files(path="train", recursive=True, with_metadata=True) files = RESOURCE.files(path="train", recursive=True, with_metadata=True)
...@@ -44,6 +43,7 @@ def download_resource_content_into_uuid_folders(): ...@@ -44,6 +43,7 @@ def download_resource_content_into_uuid_folders():
# logger.info(f"Keeping only 50 trajectories per robot.") # logger.info(f"Keeping only 50 trajectories per robot.")
# delete_files(50, robot_uuid) # delete_files(50, robot_uuid)
def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None: def delete_files(num_trajectories_to_keep: int, robot_uuid: str) -> None:
"""Delete files from the training data directory. """Delete files from the training data directory.
...@@ -93,10 +93,11 @@ if __name__ == "__main__": ...@@ -93,10 +93,11 @@ if __name__ == "__main__":
# %% # %%
from pathlib import Path
from datetime import datetime, timedelta from datetime import datetime, timedelta
root = Path("/app/dynamics_learning/dummy") root = Path("/app/dynamics_learning/dummy")
def create_txt_files(directory: Path = root) -> None: def create_txt_files(directory: Path = root) -> None:
current_time = datetime.now() current_time = datetime.now()
...@@ -104,17 +105,16 @@ def create_txt_files(directory: Path = root) -> None: ...@@ -104,17 +105,16 @@ def create_txt_files(directory: Path = root) -> None:
adjusted_time = current_time + timedelta(seconds=index) adjusted_time = current_time + timedelta(seconds=index)
file_name = f"{adjusted_time.strftime('%Y%m%d_%H%M%S')}.txt" file_name = f"{adjusted_time.strftime('%Y%m%d_%H%M%S')}.txt"
file_path = directory / file_name file_path = directory / file_name
with file_path.open('w') as file: with file_path.open("w") as file:
file.write(str(index)) file.write(str(index))
# Run the function to create the files # Run the function to create the files
create_txt_files(root) create_txt_files(root)
# %% # %%
files = [ files = [
str(file) str(file)
for file in Path( for file in Path("/app/dynamics_learning/dummy").iterdir()
f"/app/dynamics_learning/dummy"
).iterdir()
if file.is_file() and str(file).endswith(".txt") if file.is_file() and str(file).endswith(".txt")
] ]
files.sort(reverse=True) files.sort(reverse=True)
...@@ -124,16 +124,19 @@ for file in files[10:]: ...@@ -124,16 +124,19 @@ for file in files[10:]:
Path(file).unlink() Path(file).unlink()
files = [ files = [
str(file) str(file)
for file in Path( for file in Path("/app/dynamics_learning/dummy").iterdir()
f"/app/dynamics_learning/dummy"
).iterdir()
if file.is_file() and str(file).endswith(".txt") if file.is_file() and str(file).endswith(".txt")
] ]
files.sort(reverse=True) files.sort(reverse=True)
files files
# %% # %%
# %% # %%
def delete_files(num_trajectories_to_keep: int, robot_uuid: str="c9ff52e1-1733-4829-a209-ebd1586a8697") -> None: def delete_files(
num_trajectories_to_keep: int,
robot_uuid: str = "c9ff52e1-1733-4829-a209-ebd1586a8697",
) -> None:
"""Delete files from the training data directory. """Delete files from the training data directory.
Files are sorted by date and the newest files are kept. Files are sorted by date and the newest files are kept.
...@@ -172,18 +175,20 @@ def delete_files(num_trajectories_to_keep: int, robot_uuid: str="c9ff52e1-1733-4 ...@@ -172,18 +175,20 @@ def delete_files(num_trajectories_to_keep: int, robot_uuid: str="c9ff52e1-1733-4
pass pass
return None return None
delete_files(100, "c9ff52e1-1733-4829-a209-ebd1586a8697") # ITA delete_files(100, "c9ff52e1-1733-4829-a209-ebd1586a8697") # ITA
delete_files(100, "f2e72889-c140-4397-809f-fba1b892f17a") # LLT delete_files(100, "f2e72889-c140-4397-809f-fba1b892f17a") # LLT
# %% # %%
import os import os
def check_file_pairs(directory): def check_file_pairs(directory):
# Get all files in the directory # Get all files in the directory
files = os.listdir(directory) files = os.listdir(directory)
# Split files into two groups based on their suffix # Split files into two groups based on their suffix
com_files = set(f[:-8] for f in files if f.endswith('_com.csv')) com_files = set(f[:-8] for f in files if f.endswith("_com.csv"))
meas_files = set(f[:-9] for f in files if f.endswith('_meas.csv')) meas_files = set(f[:-9] for f in files if f.endswith("_meas.csv"))
print(len(com_files)) print(len(com_files))
print(com_files) print(com_files)
...@@ -203,6 +208,7 @@ def check_file_pairs(directory): ...@@ -203,6 +208,7 @@ def check_file_pairs(directory):
else: else:
print("All files are properly paired.") print("All files are properly paired.")
# Example usage # Example usage
LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a" LLT_ROBOT_UUID = "f2e72889-c140-4397-809f-fba1b892f17a"
ITA_ROBOT_UUID = "c9ff52e1-1733-4829-a209-ebd1586a8697" ITA_ROBOT_UUID = "c9ff52e1-1733-4829-a209-ebd1586a8697"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment