Skip to content
Snippets Groups Projects
Commit 971cdc8e authored by Leon Michel Gorißen's avatar Leon Michel Gorißen
Browse files

feat: Update Dockerfile and add coscine_api_retrieval module

- Dockerfile.dynamics_learning: Created .logs directory to store log files.
- coscine_api_retrieval.py: Added new module for retrieving data from Coscine API with logging configuration.
- main.py: Integrated the coscine_api_retrieval module to download resource content.
- requirements.txt: Updated dependencies to reflect necessary packages for the new functionalities.
parent 0d8eb45e
No related branches found
No related tags found
No related merge requests found
......@@ -2,6 +2,8 @@ FROM nvcr.io/nvidia/tensorflow:24.04-tf2-py3
WORKDIR /app
RUN mkdir .logs
# COPY . .
# Make entrypoint executeable
......
#!/usr/bin/python3
# -*- coding:utf-8 -*-
# Copyright Leon Gorissen
# Released under MIT License
# Contact us for other licensing options.
import logging
import coscine
import os
from pathlib import Path
from typing import List
# Configure logging
logger = logging.getLogger('coscine_data_retrieval_logger') # Create a logger named 'coscine_receiver_logger'
logger.setLevel(logging.INFO) # Set the logging level to INFO
# Create and configure console handler
console_handler = logging.StreamHandler() # Create a stream handler to log to console
console_handler.setLevel(logging.INFO) # Set the logging level for the console handler
# Create and configure file handler
file_handler = logging.FileHandler('/app/.logs/coscine_data_retrieval.log') # Create a file handler to log to a file
file_handler.setLevel(logging.INFO) # Set the logging level for the file handler
# Create a formatter for log messages
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter) # Set formatter for console handler
file_handler.setFormatter(formatter) # Set formatter for file handler
# Add handlers to the logger
logger.addHandler(console_handler) # Add console handler to logger
logger.addHandler(file_handler) # Add file handler to logger
# Retrieve environment variables for API token
def get_env_variable(var_name):
value = os.environ.get(var_name)
if value is None:
raise EnvironmentError(f"The environment variable {var_name} is not set.")
return value
try:
API_TOKEN = get_env_variable("COSCINE_API_TOKEN")
except EnvironmentError as e:
print(e)
# Handle the error appropriately, e.g., exit the program or set default values
API_TOKEN = None
CLIENT = coscine.ApiClient(API_TOKEN)
PROJECT = CLIENT.project("IoP Ws A.III FER WWL Demo") # Connect to the specified Coscine project
logger.info(f"Connecting to Coscine Project\n{PROJECT}") # Log the connection to the project
RESOURCE = PROJECT.resource("Trajectory Data") # Address the specific resource in the project
logger.info(f"Addressing resource\n{RESOURCE}")
def download_resource_content(resource:coscine.resource.Resource=RESOURCE) -> None:
"""Downloads none-existend contents of the resource.
Args:
resource (coscine.resource.Resource, optional): The resource to download from. Defaults to RESOURCE.
"""
resource_files = set(get_resource_content(resource))
existing_files = set(get_downloaded_files())
unique_elements = resource_files.symmetric_difference(existing_files)
missing_files = list(unique_elements)
if missing_files:
logger.info(f"Files not yet downloaded are {missing_files}. These files will now be downloaded.")
for file in missing_files:
try:
resource.file(file).download(path=f"./Trajectory Data/{file}")
except Exception as e:
logger.warn(f"{e}\nLikely you have local copy of a file that is not on coscine.")
def get_resource_content(resource:coscine.resource.Resource=RESOURCE) -> List:
"""Get the contents the specified resource.
Args:
resource (coscine.resource.Resource, optional): Coscine Resource to use. Defaults to RESOURCE.
Returns:
List: List of resource contents.
"""
files = []
for file in resource.files(recursive=True):
if not str(file).endswith("/"):
files.append(file.path)
return files
def get_downloaded_files(root_dir:str=f"./{RESOURCE.name}/") -> List:
"""Looks which whiles have been downloaded from the resource.
Args:
root_dir (str, optional): Directory where stored files are saved. Defaults to f"./{RESOURCE.name}/".
Returns:
List: List of downloaded files.
"""
root_path = Path(root_dir)
files = []
for path in root_path.rglob('*'):
if path.is_file():
file = str(path).split("/", 1)[1]
if file != "":
files.append(file)
return files
if __name__ == "__main__":
download_resource_content()
#get_resource_content()
#get_downloaded_files()
\ No newline at end of file
from dynamics_learning.data_retrieval.coscine_api_retrieval import download_resource_content
if __name__ == "__main__":
download_resource_content()
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment