diff --git a/app/db/models/__init__.py b/app/db/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7b68cfcde95f352654a63c30236dec3320b304a7 --- /dev/null +++ b/app/db/models/__init__.py @@ -0,0 +1,3 @@ +from .tracked_mitm_dataset import BaseTrackedMitMDataset, AddTrackedMitMDataset, TrackedMitMDataset +from .common import FromPydanticModelsMixin, APPLICATION_DB_SCHEMA +from .presentation import ListTrackedMitMDataset \ No newline at end of file diff --git a/app/db/models/common.py b/app/db/models/common.py new file mode 100644 index 0000000000000000000000000000000000000000..f01fa7b609c77dcf25f9598ba2a09c04fd85ff54 --- /dev/null +++ b/app/db/models/common.py @@ -0,0 +1,18 @@ +from typing import Self + +import pydantic + +APPLICATION_DB_SCHEMA = 'main' # 'APPLICATION_DB' + +class FromPydanticModelsMixin: + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + @classmethod + def from_models(cls, *base_objs: pydantic.BaseModel, **kwargs) -> Self: + const_kwargs = {} + for base_obj in base_objs: + const_kwargs |= base_obj.model_dump(round_trip=True) + const_kwargs |= kwargs + return cls(**const_kwargs) diff --git a/app/db/models/presentation.py b/app/db/models/presentation.py new file mode 100644 index 0000000000000000000000000000000000000000..676e4914d25e2044a7a82ab517e304d5191f680d --- /dev/null +++ b/app/db/models/presentation.py @@ -0,0 +1,7 @@ +from mitm_tooling.definition import MITM + +from .tracked_mitm_dataset import BaseTrackedMitMDataset + +class ListTrackedMitMDataset(BaseTrackedMitMDataset): + mitm: MITM + diff --git a/app/db/models.py b/app/db/models/tracked_mitm_dataset.py similarity index 76% rename from app/db/models.py rename to app/db/models/tracked_mitm_dataset.py index 3482fde07e1963b2014bde57fe0e1cc872f3e991..4c640902cbcf2d899105d802d6dc764526a0ea79 100644 --- a/app/db/models.py +++ b/app/db/models/tracked_mitm_dataset.py @@ -1,20 +1,27 @@ import uuid +from abc import ABC from datetime import datetime from uuid import UUID import pydantic import sqlmodel +from mitm_tooling.definition import MITM + from mitm_tooling.representation import Header, SQLRepresentationSchema, mk_sql_rep_schema from mitm_tooling.transformation.superset.asset_bundles import MitMDatasetIdentifierBundle, DatasourceIdentifierBundle from mitm_tooling.transformation.superset.common import DBConnectionInfo -from mitm_tooling.transformation.superset.definitions.mitm_dataset import MitMDatasetIdentifier, SupersetMitMDatasetDef -from pydantic import AnyUrl, BaseModel +from mitm_tooling.transformation.superset.definitions import MitMDatasetIdentifier +from pydantic import BaseModel, AnyUrl from sqlmodel import SQLModel, Field -from .adapters import PydanticType, StrType -from app.utils.response_utils import FromBaseMixin +from app.db.adapters import StrType, PydanticType +from .common import FromPydanticModelsMixin, APPLICATION_DB_SCHEMA + -APPLICATION_DB_SCHEMA = 'main' # 'APPLICATION_DB' + +class BaseTrackedMitMDataset(BaseModel, ABC): + uuid: UUID + dataset_name: str class AddTrackedMitMDataset(BaseModel): @@ -25,9 +32,9 @@ class AddTrackedMitMDataset(BaseModel): mitm_header: Header -class TrackedMitMDataset(FromBaseMixin, AddTrackedMitMDataset, SQLModel, table=True): +class TrackedMitMDataset(FromPydanticModelsMixin, AddTrackedMitMDataset, BaseTrackedMitMDataset, SQLModel, table=True): model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) - __tablename__ = 'uploaded_mitm_datasets' + __tablename__ = 'tracked_mitm_datasets' __table_args__ = {'schema': APPLICATION_DB_SCHEMA} # id: int = Field(primary_key=True, sa_column_kwargs={'autoincrement': True}) @@ -58,3 +65,7 @@ class TrackedMitMDataset(FromBaseMixin, AddTrackedMitMDataset, SQLModel, table=T def sql_rep_schema(self) -> SQLRepresentationSchema: return mk_sql_rep_schema(self.mitm_header, override_schema=self.schema_name) + #@pydantic.computed_field() + @property + def mitm(self) -> MITM: + return self.mitm_header.mitm diff --git a/app/db/setup.py b/app/db/setup.py index a1441a24986ccb1fcc0df6b50c5d8a60503aea06..d077a782b5df778fcddadb49c1cb267a64b5ff94 100644 --- a/app/db/setup.py +++ b/app/db/setup.py @@ -4,7 +4,8 @@ import sqlalchemy as sa from mitm_tooling.utilities.python_utils import pick_from_mapping from sqlalchemy import create_engine, inspect, Engine from sqlalchemy.orm import Session -from .utils import create_schema +from sqlmodel import SQLModel + from ..config import app_cfg logger = logging.getLogger(__name__) @@ -19,7 +20,8 @@ if MITM_DATABASE_URL.get_dialect().name == 'sqlite': engine = create_engine(MITM_DATABASE_URL, execution_options=execution_options) def init_db(): - from .models import SQLModel, APPLICATION_DB_SCHEMA + from .models import APPLICATION_DB_SCHEMA + from .utils import create_schema logger.info(f'Setting up MITM DB @ {MITM_DATABASE_URL}') with engine.connect() as conn: if APPLICATION_DB_SCHEMA not in inspect(engine).get_schema_names(): diff --git a/app/db/utils.py b/app/db/utils.py index 269b425375e11fe0d02e2d55c75a4326dd7dcfd1..a11965b665b16cb8a7e7131c96d4fcd415726199 100644 --- a/app/db/utils.py +++ b/app/db/utils.py @@ -1,28 +1,74 @@ +import contextlib +import logging +from typing import Generator from uuid import UUID import sqlalchemy as sa import sqlmodel -from mitm_tooling.extraction.sql.data_models import DBMetaInfo +from mitm_tooling.extraction.sql.data_models import DBMetaInfo, DBProbe from mitm_tooling.extraction.sql.db import connect_and_reflect +from sqlalchemy import Engine +from sqlalchemy.exc import OperationalError, SQLAlchemyError +from sqlalchemy.orm import Session from sqlalchemy.orm.session import Session -from sqlalchemy.sql.ddl import CreateSchema +from sqlalchemy.sql.ddl import CreateSchema, DropSchema +from sqlmodel import Session as ORMSession from .models import TrackedMitMDataset +from mitm_tooling.extraction.sql.db import create_db_probe -def infer_uploaded_mitm_dataset_schema(engine: sa.Engine, mitm_dataset_uuid: UUID) -> DBMetaInfo | None: +logger = logging.getLogger(__name__) + +@contextlib.contextmanager +def mk_session(engine: Engine) -> Generator[Session, None, None]: + with Session(engine) as session: + yield session + +@contextlib.contextmanager +def mk_orm_session(engine: Engine) -> Generator[ORMSession, None, None]: with sqlmodel.Session(engine) as session: - model = session.get(TrackedMitMDataset, (mitm_dataset_uuid,)) + yield session + + +def infer_tracked_mitm_dataset_schema(engine: sa.Engine, uuid: UUID) -> DBMetaInfo | None: + with mk_session(engine) as session: + model = session.get(TrackedMitMDataset, (uuid,)) if model is not None: - model.datasource_identifiers meta, _ = connect_and_reflect(engine, allowed_schemas={model.schema_name}) return DBMetaInfo.from_sa_meta(meta, default_schema=model.schema_name) + return None -def create_schema(conn_or_sess: sa.Connection | Session, unique_schema_name: str) -> None: +def probe_tracked_mitm_dataset_schema(engine: sa.Engine, uuid: UUID) -> DBProbe | None: + db_meta = infer_tracked_mitm_dataset_schema(engine, uuid) + if db_meta is not None: + with mk_session(engine) as session: + return create_db_probe(session, db_meta, sample_size=1000) + return None + + +def create_schema(conn_or_sess: sa.Connection | Session, unique_schema_name: str) -> bool: dialect = conn_or_sess.bind.dialect if isinstance(conn_or_sess, Session) else conn_or_sess.dialect - if dialect.name == 'sqlite': - conn_or_sess.execute(sa.text(f"ATTACH DATABASE ':memory:' AS {unique_schema_name}")) - else: - conn_or_sess.execute(CreateSchema(unique_schema_name, if_not_exists=False)) + try: + if dialect.name == 'sqlite': + conn_or_sess.execute(sa.text(f"ATTACH DATABASE ':memory:' AS {unique_schema_name}")) + else: + conn_or_sess.execute(CreateSchema(unique_schema_name, if_not_exists=False)) + return True + except SQLAlchemyError as e: + logger.error(e) + return False + +def delete_schema(conn_or_sess: sa.Connection | Session, unique_schema_name: str) -> bool: + dialect = conn_or_sess.bind.dialect if isinstance(conn_or_sess, Session) else conn_or_sess.dialect + try: + if dialect.name == 'sqlite': + conn_or_sess.execute(sa.text(f"DETACH DATABASE {unique_schema_name}")) + else: + conn_or_sess.execute(DropSchema(unique_schema_name, cascade=True, if_exists=False)) + return True + except SQLAlchemyError as e: + logger.error(e) + return False diff --git a/app/dependencies/db.py b/app/dependencies/db.py index 70280cdffd006c9b1325f85a0aed56eb18be951a..2f767349eede741d7fc5199be19a69d2e8f9ba8b 100644 --- a/app/dependencies/db.py +++ b/app/dependencies/db.py @@ -1,26 +1,26 @@ -from typing import Annotated +from typing import Annotated, Generator -import sqlmodel from fastapi import Depends from sqlalchemy import engine, Engine from sqlalchemy.orm import Session from sqlmodel import Session as ORMSession +from ..db.utils import mk_session, mk_orm_session from ..db.setup import engine -def get_engine(): +def get_engine() -> Generator[Engine, None, None]: yield engine -def get_session(): - with Session(engine) as session: +def get_session() -> Generator[Session, None, None]: + with mk_session(engine) as session: yield session -def get_orm_session(): - with sqlmodel.Session(engine) as session: - yield session +def get_orm_session() -> Generator[ORMSession, None, None]: + with mk_orm_session(engine) as orm_session: + yield orm_session DBEngineDependency = Annotated[Engine, Depends(get_engine)] diff --git a/app/main.py b/app/main.py index 4e9b76c585bc867a66ad1a79fa2e4a67525fb31b..9505052d0e22e10b06ed8f50874d826527706f38 100644 --- a/app/main.py +++ b/app/main.py @@ -1,13 +1,21 @@ +import logging from fastapi import FastAPI from .config import app_cfg from .dependencies.startup import lifecycle, use_route_names_as_operation_ids -from .routes import mitm_dataset, definitions +from .routes import mitm_dataset, definitions, admin, data + +# Configure logging +logging.basicConfig( + level=logging.INFO, # Set the logging level to INFO +) app = FastAPI(title='SupersetMitMService', lifespan=lifecycle, root_path=app_cfg['API_PREFIX']) app.include_router(mitm_dataset.router) app.include_router(definitions.router) +app.include_router(data.router) +app.include_router(admin.router) use_route_names_as_operation_ids(app) @@ -20,3 +28,4 @@ async def root(): @app.get('/health') async def health(): return {'status': 'healthy'} + diff --git a/app/routes/admin/__init__.py b/app/routes/admin/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e5d16142a14b01bab0804a708b527a6192bf4b8a --- /dev/null +++ b/app/routes/admin/__init__.py @@ -0,0 +1 @@ +from .router import router \ No newline at end of file diff --git a/app/routes/admin/router.py b/app/routes/admin/router.py new file mode 100644 index 0000000000000000000000000000000000000000..469da615dbc5c535ffdfa3834f81b79bf78c4052 --- /dev/null +++ b/app/routes/admin/router.py @@ -0,0 +1,61 @@ +import io +import logging +from typing import Literal + +from fastapi import APIRouter +from mitm_tooling.transformation.superset import write_superset_import_as_zip +from mitm_tooling.transformation.superset.definitions import MetadataType +from sqlmodel import SQLModel +from starlette.responses import StreamingResponse + +from app.dependencies.orm import TrackedMitMDatasetDependency +from app.routes.definitions.requests import GenerateIndependentMitMDatasetDefinitionRequest, \ + GenerateVisualizationsRequest +from app.routes.definitions.responses import MitMDatasetBundleResponse, MitMDatasetImportResponse, \ + VisualizationImportResponse +from ...db.models import TrackedMitMDataset, ListTrackedMitMDataset +from ...db.utils import delete_schema, mk_session, mk_orm_session +from ...dependencies.db import ORMSessionDependency, DBEngineDependency, ORMSession +from sqlmodel.sql.expression import select + + + +router = APIRouter(prefix='/admin', tags=['admin']) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) # Ensure the logger level is set to INFO + + +class ClearDBResponse(SQLModel): + """Response model for clearing the database.""" + # status: Literal["success", "error"] = "error" + dropped_mitm_datasets: list[ListTrackedMitMDataset] | None = None + dropped_schemas: list[str] | None = None + +@router.post('/clear-db') +def clear_db(engine: DBEngineDependency) -> ClearDBResponse: + """Clear the database.""" + schemas_to_drop = [] + dropped_datasets = [] + with mk_orm_session(engine) as orm_session: + tracked_datasets = list(orm_session.exec(select(TrackedMitMDataset)).all()) + + orm_session: ORMSession + for tds in tracked_datasets: + schemas_to_drop.append(tds.schema_name) + orm_session.delete(tds) + dropped_mds = ListTrackedMitMDataset(uuid=tds.uuid, dataset_name=tds.dataset_name, mitm=tds.mitm) + logger.info('Deleted tracked dataset: %s', dropped_mds) + dropped_datasets.append(dropped_mds) + + orm_session.commit() + + dropped_schemas = [] + with mk_session(engine) as session: + for schema_name in schemas_to_drop: + if delete_schema(session, schema_name): + dropped_schemas.append(schema_name) + logger.info('Dropped schema: %s', schema_name) + session.commit() + + return ClearDBResponse(dropped_mitm_datasets=dropped_datasets, dropped_schemas=dropped_schemas) + diff --git a/app/routes/data/__init__.py b/app/routes/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e5d16142a14b01bab0804a708b527a6192bf4b8a --- /dev/null +++ b/app/routes/data/__init__.py @@ -0,0 +1 @@ +from .router import router \ No newline at end of file diff --git a/app/routes/data/responses.py b/app/routes/data/responses.py new file mode 100644 index 0000000000000000000000000000000000000000..d152b4e8d472e012f146537d01457e719124c2ed --- /dev/null +++ b/app/routes/data/responses.py @@ -0,0 +1,11 @@ +import pydantic +from mitm_tooling.extraction.sql.data_models.db_meta import DBMetaInfoBase +from mitm_tooling.extraction.sql.data_models.db_probe import DBProbeBase, DBProbeMinimal + + +class DBMetaResponse(pydantic.BaseModel): + db_meta: DBMetaInfoBase | None = None + + +class DBProbeResponse(pydantic.BaseModel): + db_probe: DBProbeMinimal | None = None diff --git a/app/routes/data/router.py b/app/routes/data/router.py new file mode 100644 index 0000000000000000000000000000000000000000..d3d24774edaf777586d8850110570390382a0ed4 --- /dev/null +++ b/app/routes/data/router.py @@ -0,0 +1,22 @@ +import logging +from uuid import UUID + +from fastapi import APIRouter, Path + +from app.db.utils import infer_tracked_mitm_dataset_schema, probe_tracked_mitm_dataset_schema +from app.dependencies.db import DBEngineDependency +from app.routes.data.responses import DBMetaResponse, DBProbeResponse + +router = APIRouter(prefix='/data', tags=['data']) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) # Ensure the logger level is set to INFO + + +@router.get('/db-meta/{uuid}') +def get_db_meta(engine: DBEngineDependency, uuid: UUID = Path()) -> DBMetaResponse: + return DBMetaResponse(db_meta=infer_tracked_mitm_dataset_schema(engine, uuid)) + + +@router.get('/db-probe/{uuid}') +def get_db_probe(engine: DBEngineDependency, uuid: UUID = Path()) -> DBProbeResponse: + return DBProbeResponse(db_probe=probe_tracked_mitm_dataset_schema(engine, uuid)) diff --git a/app/routes/definitions/responses.py b/app/routes/definitions/responses.py index e6a6772f06f4d7325cd1227963a58e6dc77eeaae..ea53052e1b3b6f65f458785a8266ce0a6a8696ec 100644 --- a/app/routes/definitions/responses.py +++ b/app/routes/definitions/responses.py @@ -3,14 +3,14 @@ from typing import Self from mitm_tooling.transformation.superset.definitions import SupersetMitMDatasetImport, SupersetAssetsImport from mitm_tooling.transformation.superset.asset_bundles import SupersetMitMDatasetBundle -from app.utils.response_utils import FromBaseMixin +from app.db.models import FromPydanticModelsMixin -class MitMDatasetBundleResponse(FromBaseMixin, SupersetMitMDatasetBundle): +class MitMDatasetBundleResponse(FromPydanticModelsMixin, SupersetMitMDatasetBundle): pass -class MitMDatasetImportResponse(FromBaseMixin, SupersetMitMDatasetImport): +class MitMDatasetImportResponse(FromPydanticModelsMixin, SupersetMitMDatasetImport): pass -class VisualizationImportResponse(FromBaseMixin, SupersetAssetsImport): +class VisualizationImportResponse(FromPydanticModelsMixin, SupersetAssetsImport): pass \ No newline at end of file diff --git a/app/routes/definitions/router.py b/app/routes/definitions/router.py index 8d7b2ed5ba5d9717f62fed5121f0a01b19d86b5b..02938ea32750e8540353a9d154a2dee6caf74822 100644 --- a/app/routes/definitions/router.py +++ b/app/routes/definitions/router.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) def generate_mitm_dataset_bundle(request: GenerateIndependentMitMDatasetDefinitionRequest, include_visualizations: bool = False) -> MitMDatasetBundleResponse: mitm_dataset_bundle = exec_def_request(request, include_visualizations) - return MitMDatasetBundleResponse.from_base(mitm_dataset_bundle) + return MitMDatasetBundleResponse.from_models(mitm_dataset_bundle) @router.post('/mitm_dataset/import') @@ -31,7 +31,7 @@ def generate_mitm_dataset_import(request: GenerateIndependentMitMDatasetDefiniti include_visualizations: bool = False, override_metadata_type: MetadataType | None = None) -> MitMDatasetImportResponse: importable = exec_asset_import_request(request, include_visualizations, override_metadata_type) - return MitMDatasetImportResponse.from_base(importable) + return MitMDatasetImportResponse.from_models(importable) @router.post('/mitm_dataset/import/zip', response_class=StreamingResponse, @@ -51,7 +51,7 @@ def generate_mitm_dataset_import_zip(request: GenerateIndependentMitMDatasetDefi def generate_tracked_mitm_dataset_bundle(tracked_dataset: TrackedMitMDatasetDependency, include_visualizations: bool = False) -> MitMDatasetBundleResponse: mitm_dataset_bundle = exec_tracked_def_request(tracked_dataset, include_visualizations) - return MitMDatasetBundleResponse.from_base(mitm_dataset_bundle) + return MitMDatasetBundleResponse.from_models(mitm_dataset_bundle) @router.get('/mitm_dataset/{uuid}/import') @@ -61,7 +61,7 @@ def generate_tracked_mitm_dataset_import(tracked_dataset: TrackedMitMDatasetDepe importable = exec_tracked_asset_import_request(tracked_dataset, include_visualizations, override_metadata_type=override_metadata_type) - return MitMDatasetImportResponse.from_base(importable.model_dump()) + return MitMDatasetImportResponse.from_models(importable.model_dump()) @router.get('/mitm_dataset/{uuid}/import/zip', response_class=StreamingResponse, @@ -84,7 +84,7 @@ def generate_visualizations_for_tracked_dataset(orm_session: ORMSessionDependenc tracked_dataset: TrackedMitMDatasetDependency, request: GenerateVisualizationsRequest) -> MitMDatasetBundleResponse: mitm_dataset_bundle = exec_viz_request(orm_session, tracked_dataset, request) - return MitMDatasetBundleResponse.from_base(mitm_dataset_bundle.model_dump()) + return MitMDatasetBundleResponse.from_models(mitm_dataset_bundle.model_dump()) @router.post('/mitm_dataset/viz/{uuid}/import') @@ -93,7 +93,7 @@ def generate_visualizations_import_for_tracked_dataset(orm_session: ORMSessionDe request: GenerateVisualizationsRequest, as_assets: bool = False) -> VisualizationImportResponse: importable = exec_viz_import_request(orm_session, tracked_dataset, request, as_assets=as_assets) - return VisualizationImportResponse.from_base(importable.model_dump()) + return VisualizationImportResponse.from_models(importable.model_dump()) @router.post('/mitm_dataset/viz/{uuid}/import/zip', response_class=StreamingResponse, diff --git a/app/routes/mitm_dataset/export.py b/app/routes/mitm_dataset/export.py index 59686a64c6241680597e5d3b1c0d9b67a3850b08..7888f0d34d8132a552ae677d51b85acd2a60eabc 100644 --- a/app/routes/mitm_dataset/export.py +++ b/app/routes/mitm_dataset/export.py @@ -21,6 +21,8 @@ def export_via_mapping(tracked_dataset: TrackedMitMDataset) -> tuple[sa.Engine, cms = sql_rep_into_mappings(header, sql_rep_schema) + logger.info('Preparing to export CMs:\n', '\n'.join((str(cm) for cm in cms))) + db_meta_info = DBMetaInfo.from_sa_meta(sql_rep_schema.meta, default_schema=schema_name) db_metas = {SourceDBType.OriginalDB: db_meta_info} remote_engine = create_sa_engine(sql_alchemy_uri) diff --git a/app/routes/mitm_dataset/register.py b/app/routes/mitm_dataset/register.py index 9a208167b14639ac2c39b78bfdc4b55bb252066c..fccc6b815b83471675bb449d3a23b167db3964b4 100644 --- a/app/routes/mitm_dataset/register.py +++ b/app/routes/mitm_dataset/register.py @@ -13,7 +13,7 @@ def register_mitm_dataset(session: ORMSessionDependency, request: AddTrackedMitM definition = mk_mitm_dataset_bundle(request.mitm_header, db_conn_info, request.dataset_name, identifiers=identifiers) identifier_bundle = definition.identifiers - model = TrackedMitMDataset.from_base(request, identifier_bundle=identifier_bundle) + model = TrackedMitMDataset.from_models(request, identifier_bundle=identifier_bundle) session.add(model) session.commit() diff --git a/app/routes/mitm_dataset/responses.py b/app/routes/mitm_dataset/responses.py index ad9b36d3ad06b0e23810a3ef9a0922e6bcc99d5d..09daed4a0974d0f8a1152da5e12414ade02e439e 100644 --- a/app/routes/mitm_dataset/responses.py +++ b/app/routes/mitm_dataset/responses.py @@ -1,9 +1,12 @@ +from abc import ABC from typing import Literal from uuid import UUID import pydantic +from mitm_tooling.definition import MITM +from pydantic import Field -from app.db.models import TrackedMitMDataset +from app.db.models import TrackedMitMDataset, BaseTrackedMitMDataset, ListTrackedMitMDataset class TrackMitMResponse(pydantic.BaseModel): @@ -19,6 +22,5 @@ class UploadMitMResponse(TrackMitMResponse): class RegisterMitMResponse(TrackMitMResponse): pass -class TrackedMitMEntry(pydantic.BaseModel): - dataset_name: str - uuid: UUID \ No newline at end of file + +MitMsListResponse = pydantic.TypeAdapter(list[ListTrackedMitMDataset]) diff --git a/app/routes/mitm_dataset/router.py b/app/routes/mitm_dataset/router.py index 0b02b5c2c956a5c5a1124618fa70c4583d035384..0c807d9d3a0be11c5de10df2a9ba9f71993b901f 100644 --- a/app/routes/mitm_dataset/router.py +++ b/app/routes/mitm_dataset/router.py @@ -9,13 +9,14 @@ from mitm_tooling.utilities.identifiers import mk_uuid from pydantic import ValidationError from starlette.responses import StreamingResponse -from app.db.models import TrackedMitMDataset +from app.db.models import TrackedMitMDataset, ListTrackedMitMDataset from app.dependencies.db import DBEngineDependency, ORMSessionDependency from app.dependencies.orm import TrackedMitMDatasetDependency from .export import export_via_mapping from .requests import AddTrackedMitMDatasetRequest, RegisterExternalMitMDatasetRequest -from .responses import UploadMitMResponse, RegisterMitMResponse, TrackedMitMEntry +from .responses import UploadMitMResponse, RegisterMitMResponse, MitMsListResponse from .upload import upload_mitm_file +from ...db.utils import mk_session router = APIRouter(prefix='/mitm_dataset', tags=['mitm_dataset']) logger = logging.getLogger(__name__) @@ -62,7 +63,7 @@ def get_mitm_dataset(tracked_dataset: TrackedMitMDatasetDependency) -> TrackedMi return tracked_dataset -@router.get('/', response_model=list[TrackedMitMEntry]) +@router.get('/', response_model=list[ListTrackedMitMDataset]) def get_mitm_datasets(session: ORMSessionDependency) -> Sequence[TrackedMitMDataset]: sequence = session.exec(sqlmodel.select(TrackedMitMDataset)).all() return sequence @@ -78,8 +79,7 @@ def delete_mitm_dataset(session: ORMSessionDependency, tracked_dataset: TrackedM responses={200: {'content': {'application/zip': {}}}}) def export_mitm_dataset(engine: DBEngineDependency, tracked_dataset: TrackedMitMDatasetDependency) -> StreamingResponse: remote_engine, exportable = export_via_mapping(tracked_dataset) - import sqlalchemy as sa - with sa.orm.Session(remote_engine) as session: + with mk_session(remote_engine) as session: ze = exportable.export_to_memory(session) buf = ze.to_buffer() return StreamingResponse(buf, media_type='application/zip') diff --git a/app/routes/mitm_dataset/upload.py b/app/routes/mitm_dataset/upload.py index 3513ffcac0c7a20807a578ff566470c87e2a82a9..70824beedc404e1ddd4778096701e9fd649eddd0 100644 --- a/app/routes/mitm_dataset/upload.py +++ b/app/routes/mitm_dataset/upload.py @@ -15,6 +15,8 @@ from app.db.utils import create_schema from app.dependencies.db import get_engine from app.routes.definitions.generate import mk_mitm_dataset_bundle +import logging +logger = logging.getLogger(__name__) def upload_mitm_file(mitm: MITM, mitm_zip: DataSource, @@ -32,12 +34,17 @@ def upload_mitm_data(mitm_data: MITMData, engine = engine if engine is not None else get_engine() uuid = uuid or mk_uuid() unique_schema_name = name_plus_uuid(dataset_name, uuid, sep='_') + + logger.info(f'Uploading MitM Data with uuid {uuid} into target schema {unique_schema_name} on connected DB {engine.url}.') + sql_alchemy_uri = sa_url_into_any_url(engine.url) - sql_rep_schema = mk_sql_rep_schema(mitm_data.header, override_schema=unique_schema_name, view_generators=None) + sql_rep_schema = mk_sql_rep_schema(mitm_data.header, override_schema=unique_schema_name, skip_fk_constraints=True) with engine.connect() as connection: create_schema(connection, unique_schema_name) + logger.info(f'Created schema: {unique_schema_name}') insert_mitm_data(connection, sql_rep_schema, mitm_data) + logger.info(f'Inserted MitM Data into schema: {unique_schema_name}') connection.commit() mds_bundle = mk_mitm_dataset_bundle(mitm_data.header, @@ -54,4 +61,6 @@ def upload_mitm_data(mitm_data: MITMData, session.commit() session.refresh(model) + logger.info(f'Tracked uploaded MitM Dataset in application DB:\n{model}') + return model diff --git a/app/utils/response_utils.py b/app/utils/response_utils.py index fa02c3dc5eeae1818a94d182d1171381559b5741..b28b04f643122b019e912540f228c8ed20be9eeb 100644 --- a/app/utils/response_utils.py +++ b/app/utils/response_utils.py @@ -1,14 +1,3 @@ -from typing import Self -import pydantic -class FromBaseMixin: - - def __init__(self, **kwargs): - super().__init__(**kwargs) - - @classmethod - def from_base(cls, base_obj: pydantic.BaseModel, **kwargs) -> Self: - kwargs = base_obj.model_dump(round_trip=True) | kwargs - return cls(**kwargs) \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 81dd76adaea2d6c59d21fc48643bcc4993e267f4..2e6b30fdbe08b1f0e5f7f31fff697169f5e75216 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,19 +2,20 @@ services: mitm-service: build: context: . - pull: true + no_cache: true pull_policy: always container_name: test_mitm_service volumes: - ./exports:/exports - ./uploads:/uploads + - ./app:/code/app ports: - "8180:8180" env_file: docker/.env depends_on: mitm-db: condition: service_started - # command: ["fastapi", "run", "app/main.py", "--port", "8180"] + command: ["fastapi", "dev", "app/main.py", "--reload", "--host", "0.0.0.0", "--port", "8180"] mitm-db: image: timescale/timescaledb:latest-pg16 diff --git a/helm/superset-mitm-service/Chart.yaml b/helm/superset-mitm-service/Chart.yaml index 88e9b01dafee61200466c731bfccecddcd0f55ad..815b9877181fd332a12d58e2b9f9e2febaf34d74 100644 --- a/helm/superset-mitm-service/Chart.yaml +++ b/helm/superset-mitm-service/Chart.yaml @@ -15,13 +15,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.1 +version: 0.1.3 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.1.0" +appVersion: "0.1.1" dependencies: - name: postgresql diff --git a/helm/superset-mitm-service/templates/postgresql-auth-secret.yaml b/helm/superset-mitm-service/templates/postgresql-auth-secret.yaml index f759443abaa472503b11b87e46f620067875c1c5..4c958dfb76ef36acc26d6be1a9ccd809a5dee9f9 100644 --- a/helm/superset-mitm-service/templates/postgresql-auth-secret.yaml +++ b/helm/superset-mitm-service/templates/postgresql-auth-secret.yaml @@ -19,13 +19,13 @@ apiVersion: v1 kind: Secret metadata: - name: {{ .Values.global.postgresql.configPrefix }}-postgresql-auth-env + name: {{ .Values.global.mitmPostgresql.configPrefix }}-postgresql-auth-env labels: {{- include "superset-mitm-service.labels" . | nindent 4 }} namespace: {{ .Release.Namespace }} type: Opaque stringData: - username: {{.Values.mitmDB.mitm_database_user | quote }} - password: {{.Values.mitmDB.mitm_database_password | quote }} - postgres-password: {{.Values.mitmDB.mitm_database_password | quote }} - database: {{.Values.mitmDB.mitm_database_db | quote }} + username: {{ tpl .Values.mitmDB.mitm_database_user . | quote }} + password: {{ tpl .Values.mitmDB.mitm_database_password . | quote }} + postgres-password: {{ tpl .Values.mitmDB.mitm_database_password . | quote }} + database: {{ tpl .Values.mitmDB.mitm_database_db . | quote }} diff --git a/helm/superset-mitm-service/templates/secret-env.yaml b/helm/superset-mitm-service/templates/secret-env.yaml index 45142c70864b8a24c7989213532a0236196fc387..d5f4208f82793b998cafd7485aaec9cdcd7430fd 100644 --- a/helm/superset-mitm-service/templates/secret-env.yaml +++ b/helm/superset-mitm-service/templates/secret-env.yaml @@ -25,10 +25,10 @@ metadata: namespace: {{ .Release.Namespace }} type: Opaque stringData: - MITM_DATABASE_DIALECT: {{.Values.mitmDB.mitm_database_dialect | quote }} - MITM_DATABASE_USER: {{.Values.mitmDB.mitm_database_user | quote }} - MITM_DATABASE_PASSWORD: {{.Values.mitmDB.mitm_database_password | quote }} + MITM_DATABASE_DIALECT: {{ tpl .Values.mitmDB.mitm_database_dialect . | quote }} + MITM_DATABASE_USER: {{ tpl .Values.mitmDB.mitm_database_user . | quote }} + MITM_DATABASE_PASSWORD: {{ tpl .Values.mitmDB.mitm_database_password . | quote }} MITM_DATABASE_HOST: {{ tpl .Values.mitmDB.mitm_database_host . | quote }} - MITM_DATABASE_PORT: {{.Values.mitmDB.mitm_database_port | quote }} - MITM_DATABASE_DB: {{ .Values.mitmDB.mitm_database_db | quote }} + MITM_DATABASE_PORT: {{ tpl .Values.mitmDB.mitm_database_port . | quote }} + MITM_DATABASE_DB: {{ tpl .Values.mitmDB.mitm_database_db . | quote }} diff --git a/helm/superset-mitm-service/values.yaml b/helm/superset-mitm-service/values.yaml index b13369eadc23c19fec514e169c84f592710f0940..5ee29b532cd3f944f950acab5674da723d1ba9b7 100644 --- a/helm/superset-mitm-service/values.yaml +++ b/helm/superset-mitm-service/values.yaml @@ -23,7 +23,7 @@ ingress: enabled: false global: - postgresql: + mitmPostgresql: configPrefix: "superset-mitm-service" security: @@ -154,8 +154,8 @@ connections: origin: "http://localhost:8080" # can be overridden with a template string apiConfig: - EXPORT_DIR: "/exports/" - UPLOAD_DIR: "/uploads/" + export_dir: "/exports/" + upload_dir: "/uploads/" mitm-postgresql: @@ -169,7 +169,7 @@ mitm-postgresql: database: "mitm_db" # apparently this does *not* override the username and database setting which is annoying - existingSecret: '{{ .Values.global.postgresql.configPrefix }}-postgresql-auth-env' + existingSecret: '{{ .Values.global.mitmPostgresql.configPrefix }}-postgresql-auth-env' serviceAccount: automountServiceAccountToken: true diff --git a/justfile b/justfile index 835a43cc921985efc91ea67adecbd618d816e8a7..82edd1518088e638e6e099c8912bc5886105279b 100644 --- a/justfile +++ b/justfile @@ -9,6 +9,9 @@ lock: sync: uv sync +sync-mt: + uv sync --upgrade-package mitm-tooling + requirements: uv export --no-hashes > requirements.txt @@ -23,6 +26,12 @@ up: down: docker compose down +clippy: + cat untracked.txt | Set-Clipboard + +rebuild-service: + docker compose up -d --build --force-recreate mitm-service + helm-package: cd helm/ && helm package superset-mitm-service @@ -47,4 +56,19 @@ kube-forward: kubectl --context=c4c --namespace mdata port-forward $POD_NAME 8080:$CONTAINER_PORT kube *arg: - kubectl --context=c4c -n mdata {{arg}} \ No newline at end of file + kubectl --context=c4c -n mdata {{arg}} + +kubeinto svc="superset-mitm-service" inst="superset-mitm-service": + #! powershell + $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app.kubernetes.io/name={{svc}},app.kubernetes.io/instance={{inst}}" -o jsonpath="{.items[0].metadata.name}" + kubectl --context=c4c --namespace mdata exec --stdin --tty $POD_NAME -- /bin/bash + +kubeinto-exporter comp="backend": + #! powershell + $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app.kubernetes.io/name=maed-exporter-app,app.kubernetes.io/component={{comp}}" -o jsonpath="{.items[0].metadata.name}" + kubectl --context=c4c --namespace mdata exec --stdin --tty $POD_NAME -- /bin/bash + +kubelogs svc="superset-mitm-service": + #! powershell + $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app.kubernetes.io/name={{svc}},app.kubernetes.io/instance=superset-mitm-service" -o jsonpath="{.items[0].metadata.name}" + kubectl --context=c4c --namespace mdata logs $POD_NAME \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 2fbcc44e37d3c66f3cd03aa039047274827b4e5d..57f210becc7f0a5ec259dbe16aa97aa3704101ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ url = "file:///C:/Users/leah/PycharmProjects/mitm-tooling" explicit = true [tool.uv.sources] -# mitm-tooling = { path = "file:///C:/Users/leah/PycharmProjects/mitm-tooling", editable = true } +# mitm-tooling = { path = "C:/Users/leah/PycharmProjects/mitm-tooling", editable = true } mitm-tooling = { git = "https://git-ce.rwth-aachen.de/machine-data/mitm-tooling.git", branch = "master" } [build-system] diff --git a/startup.sh b/startup.sh index 64bef21589a1f1a741178478b3703ebfb7bda202..58cf3d89b3639ee825469a8c130fd719638c45ba 100644 --- a/startup.sh +++ b/startup.sh @@ -1,2 +1,2 @@ #!/bin/bash -fastapi run app/main.py --reload --port "${API_PORT:-8180}" \ No newline at end of file +fastapi run app/main.py --port "${API_PORT:-8180}" \ No newline at end of file diff --git a/test/admin.http b/test/admin.http new file mode 100644 index 0000000000000000000000000000000000000000..2edaec2e36882e00e371f02eb4a372f1544e6f8e --- /dev/null +++ b/test/admin.http @@ -0,0 +1,11 @@ +### + +# @name Drop MitM datasets +POST http://localhost:{{port}}/admin/clear-db + +### + +# @name Drop MitM datasets +POST http://localhost:{{port}}/admin/clear-db + +### \ No newline at end of file diff --git a/test/data.http b/test/data.http new file mode 100644 index 0000000000000000000000000000000000000000..d270b5212b85c5cad795df60c64e197dca1168e4 --- /dev/null +++ b/test/data.http @@ -0,0 +1,11 @@ +### + +# @name Probe MitM Dataset Schema +GET http://localhost:{{port}}/data/db-meta/{{uuid}} + +### + +# @name Probe MitM Dataset Tables +GET http://localhost:{{port}}/data/db-probe/{{uuid}} + +### diff --git a/test/dataset_processing.py b/test/dataset_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..2d724ab12b0216c6b809c13e5358e0143be05c72 --- /dev/null +++ b/test/dataset_processing.py @@ -0,0 +1,17 @@ + +import mitm_tooling.io as mio +from mitm_tooling.representation import mk_sqlite +from mitm_tooling.transformation.df import mitm_data_into_mitm_dataframes + + +def trim(p): + data = mio.read_zip(p).as_specialized() + for c, df in data.concept_dfs.items(): + data.concept_dfs[c] = df.iloc[:10_000] + for c, df in data.concept_dfs.items(): + print(c, len(df)) + mio.write_zip('synthetic-trimmed.maed', data.as_generalized()) + mk_sqlite(data, 'synthetic-trimmed.sqlite') + +if __name__ == '__main__': + trim('synthetic.maed') \ No newline at end of file diff --git a/test/definitions.http b/test/definitions.http index d87f9c313cf6e6f3f1e885329dcccf1de1cf16da..8488787bf9da7fda51f6977401bb386026b21826 100644 --- a/test/definitions.http +++ b/test/definitions.http @@ -8,7 +8,9 @@ Accept: application/json ### -GET http://localhost:{{port}}/definitions/mitm_dataset/{{uuid}}/import/zip?include_visualizations=False +GET http://localhost:{{port}}/definitions/mitm_dataset/{{uuid}}/import/zip?include_visualizations=True Accept: application/zip +>> generated_mds_import.zip + ### \ No newline at end of file diff --git a/test/http-client.env.json b/test/http-client.env.json index 6816d229c420f41142d9f5bb2708f55abc883831..dae25fc7dfe9e4a9aa132e18680ddbe0f6659365 100644 --- a/test/http-client.env.json +++ b/test/http-client.env.json @@ -1,15 +1,15 @@ { "dev": { "port": "8181", - "uuid": "ebf7b689-ce39-4de6-acad-af01ccf76f75", + "uuid": "8e8e00c1-edf1-4d56-863b-938ec8b07eb6" }, "docker": { "port": "8180", - "uuid": "8f09a527-3e69-4839-82e9-8db3dd38268c" + "uuid": "8ff3bd53-a5d6-40fa-848b-d8b271c7043c" }, "superset": { "port": "8180", - "uuid": "b4004d6a-bcaa-4a48-aa54-271b074109ca" + "uuid": "607d9d82-0435-4984-bdbf-6512960cd69f" }, "kubernetes": { "port": "8080", diff --git a/test/upload.http b/test/upload.http index a1ca8aec6f66577cdd40821e68cc658b060e1f96..d43706e1b953384e04632e4c164b21730792ee51 100644 --- a/test/upload.http +++ b/test/upload.http @@ -1,20 +1,22 @@ ### # @name Upload MAED dataset +# @timeout 180 # @connection-timeout 180 POST http://localhost:{{port}}/mitm_dataset/upload?dataset_name=myname_0&mitm=MAED Accept: application/json Content-Type: multipart/form-data; boundary=WebAppBoundary --WebAppBoundary -Content-Disposition: form-data; name="mitm_zip"; filename="synthetic.maed" +Content-Disposition: form-data; name="mitm_zip"; filename="synthetic-trimmed.maed" Content-Type: application/zip -< ./synthetic.maed +< ./synthetic-trimmed.maed --WebAppBoundary-- ### +# @name List MitM datasets GET http://localhost:{{port}}/mitm_dataset/ ###