From bf4c661d496ea3f910534cf1f4d3fcc5820cf9ee Mon Sep 17 00:00:00 2001 From: Leah Tacke genannt Unterberg <leah.tgu@pads.rwth-aachen.de> Date: Wed, 26 Feb 2025 17:03:28 +0100 Subject: [PATCH] major representation and transformation refactor --- .idea/mitm-tooling.iml | 1 - justfile | 2 +- mitm_tooling/__init__.py | 4 +- .../extraction/sql/data_models/__init__.py | 8 +- .../extraction/sql/data_models/db_meta.py | 9 +- .../extraction/sql/data_models/db_probe.py | 5 +- .../sql/data_models/table_identifiers.py | 7 +- .../sql/data_models/virtual_view.py | 2 +- mitm_tooling/extraction/sql/db/db_probing.py | 3 +- .../extraction/sql/db/db_reflection.py | 3 +- .../extraction/sql/db/db_virtual_view.py | 3 +- .../extraction/sql/mapping/mapping.py | 3 +- .../sql/transformation/db_transformation.py | 4 +- mitm_tooling/io/__init__.py | 2 +- mitm_tooling/io/importing.py | 25 ++-- mitm_tooling/representation/__init__.py | 13 +- mitm_tooling/representation/common.py | 5 + .../representation/df_representation.py | 2 +- .../intermediate_representation.py | 15 +-- mitm_tooling/representation/sql/__init__.py | 0 mitm_tooling/representation/sql/common.py | 4 + .../representation/sql_representation.py | 121 ++++++++++-------- mitm_tooling/transformation/__init__.py | 5 +- mitm_tooling/transformation/df/__init__.py | 4 +- ...transformation.py => from_intermediate.py} | 40 +----- .../transformation/df/into_intermediate.py | 41 ++++++ mitm_tooling/transformation/sql/__init__.py | 1 + .../transformation/sql/from_intermediate.py | 18 +++ .../transformation/superset/__init__.py | 7 +- .../transformation/superset/common.py | 44 +++++++ .../superset/definition_bundles.py | 53 ++++++++ .../superset/definitions/__init__.py | 4 +- .../definitions/{high_level.py => assets.py} | 94 ++++++++++---- .../superset/definitions/charts.py | 14 +- .../superset/definitions/constants.py | 27 ++-- .../superset/definitions/core.py | 51 ++++---- .../superset/definitions/post_processing.py | 9 +- .../transformation/superset/exporting.py | 45 +++++++ .../superset/factories/assets.py | 21 +++ .../superset/factories/charts.py | 46 ++++--- .../transformation/superset/factories/core.py | 4 +- .../superset/factories/dashboard.py | 14 ++ .../superset/factories/database.py | 15 +++ .../superset/factories/dataset.py | 9 +- .../superset/factories/datasource.py | 9 -- .../superset/factories/mitm_dataset.py | 14 ++ .../factories/mitm_specific/__init__.py | 0 .../factories/mitm_specific/maed_charts.py | 40 ++++++ .../mitm_specific/maed_dashboards.py | 11 ++ .../superset/factories/query.py | 13 +- .../superset/from_intermediate.py | 22 ++++ .../transformation/superset/from_sql.py | 59 +++++++++ .../transformation/superset/interface.py | 37 ++++++ .../superset/mitm_specific/__init__.py | 1 + .../mitm_specific/maed_visualization.py | 14 ++ .../superset/superset_representation.py | 100 --------------- mitm_tooling/utilities/io_utils.py | 13 +- mitm_tooling/utilities/python_utils.py | 19 ++- mitm_tooling/utilities/sql_utils.py | 14 +- pyproject.toml | 2 +- test/something.py | 23 +++- 61 files changed, 812 insertions(+), 386 deletions(-) create mode 100644 mitm_tooling/representation/sql/__init__.py create mode 100644 mitm_tooling/representation/sql/common.py rename mitm_tooling/transformation/df/{intermediate_transformation.py => from_intermediate.py} (57%) create mode 100644 mitm_tooling/transformation/df/into_intermediate.py create mode 100644 mitm_tooling/transformation/sql/__init__.py create mode 100644 mitm_tooling/transformation/sql/from_intermediate.py create mode 100644 mitm_tooling/transformation/superset/common.py create mode 100644 mitm_tooling/transformation/superset/definition_bundles.py rename mitm_tooling/transformation/superset/definitions/{high_level.py => assets.py} (60%) create mode 100644 mitm_tooling/transformation/superset/exporting.py create mode 100644 mitm_tooling/transformation/superset/factories/assets.py create mode 100644 mitm_tooling/transformation/superset/factories/dashboard.py create mode 100644 mitm_tooling/transformation/superset/factories/database.py delete mode 100644 mitm_tooling/transformation/superset/factories/datasource.py create mode 100644 mitm_tooling/transformation/superset/factories/mitm_dataset.py create mode 100644 mitm_tooling/transformation/superset/factories/mitm_specific/__init__.py create mode 100644 mitm_tooling/transformation/superset/factories/mitm_specific/maed_charts.py create mode 100644 mitm_tooling/transformation/superset/factories/mitm_specific/maed_dashboards.py create mode 100644 mitm_tooling/transformation/superset/from_intermediate.py create mode 100644 mitm_tooling/transformation/superset/from_sql.py create mode 100644 mitm_tooling/transformation/superset/interface.py create mode 100644 mitm_tooling/transformation/superset/mitm_specific/__init__.py create mode 100644 mitm_tooling/transformation/superset/mitm_specific/maed_visualization.py delete mode 100644 mitm_tooling/transformation/superset/superset_representation.py diff --git a/.idea/mitm-tooling.iml b/.idea/mitm-tooling.iml index 3f7d7f5..cef4abc 100644 --- a/.idea/mitm-tooling.iml +++ b/.idea/mitm-tooling.iml @@ -2,7 +2,6 @@ <module version="4"> <component name="NewModuleRootManager"> <content url="file://$MODULE_DIR$"> - <sourceFolder url="file://$MODULE_DIR$/mitm_tooling" isTestSource="false" /> <sourceFolder url="file://$MODULE_DIR$/test" isTestSource="true" /> </content> <orderEntry type="jdk" jdkName="Poetry (mitm-tooling)" jdkType="Python SDK" /> diff --git a/justfile b/justfile index 3858bd5..ccd579b 100644 --- a/justfile +++ b/justfile @@ -1,7 +1,7 @@ set windows-shell := ["pwsh", "-c"] lock: - @poetry lock --no-update + @poetry lock update: @poetry update --with dev diff --git a/mitm_tooling/__init__.py b/mitm_tooling/__init__.py index 7f899da..4b2ad56 100644 --- a/mitm_tooling/__init__.py +++ b/mitm_tooling/__init__.py @@ -1,2 +1,2 @@ -from . import data_types, definition, extraction, io, representation, utilities -__all__ = ['data_types', 'definition', 'extraction', 'io', 'representation', 'utilities'] \ No newline at end of file +# from . import data_types, definition, extraction, io, representation, utilities +# __all__ = ['data_types', 'definition', 'extraction', 'io', 'representation', 'utilities'] diff --git a/mitm_tooling/extraction/sql/data_models/__init__.py b/mitm_tooling/extraction/sql/data_models/__init__.py index 134a447..65810b3 100644 --- a/mitm_tooling/extraction/sql/data_models/__init__.py +++ b/mitm_tooling/extraction/sql/data_models/__init__.py @@ -1,11 +1,13 @@ # noinspection PyUnresolvedReferences from .db_meta import Queryable, ColumnProperties, TableMetaInfo, DBMetaInfo, ForeignKeyConstraint, ExplicitTableSelection, \ - ExplicitColumnSelection, ExplicitSelectionUtils, ColumnName + ExplicitColumnSelection, ExplicitSelectionUtils +from mitm_tooling.representation import ColumnName # noinspection PyUnresolvedReferences from .db_probe import TableProbe, DBProbe, SampleSummary # noinspection PyUnresolvedReferences -from .table_identifiers import SourceDBType, SchemaName, TableName, TableIdentifier, AnyTableIdentifier, \ - LocalTableIdentifier, AnyLocalTableIdentifier, ShortTableIdentifier, LongTableIdentifier +from .table_identifiers import SourceDBType, TableIdentifier, AnyTableIdentifier, \ + LocalTableIdentifier, AnyLocalTableIdentifier, LongTableIdentifier +from mitm_tooling.representation.sql.common import TableName, SchemaName, ShortTableIdentifier # noinspection PyUnresolvedReferences from .virtual_view import TypedRawQuery, VirtualView, VirtualDB, CompiledVirtualView from . import base diff --git a/mitm_tooling/extraction/sql/data_models/db_meta.py b/mitm_tooling/extraction/sql/data_models/db_meta.py index 44903b5..6ffcc00 100644 --- a/mitm_tooling/extraction/sql/data_models/db_meta.py +++ b/mitm_tooling/extraction/sql/data_models/db_meta.py @@ -7,10 +7,10 @@ from pydantic import Field from sqlalchemy import Table, MetaData from mitm_tooling.data_types import MITMDataType, SA_SQLTypeName, sa_sql_to_mitm_type -from .table_identifiers import TableName, SchemaName, ShortTableIdentifier, LocalTableIdentifier, \ - AnyLocalTableIdentifier -from mitm_tooling.representation.intermediate_representation import ColumnName +from mitm_tooling.representation.sql_representation import ColumnName, TableName, SchemaName, ShortTableIdentifier from mitm_tooling.utilities.sql_utils import unqualify +from .table_identifiers import LocalTableIdentifier, \ + AnyLocalTableIdentifier ExplicitTableSelection = dict[SchemaName, set[TableName]] ExplicitColumnSelection = dict[SchemaName, dict[TableName, set[ColumnName]]] @@ -230,7 +230,8 @@ class DBMetaInfo(DBMetaInfoBase): meta = MetaData() filtered_tms = { schema: {table_name: tm.filter(self, meta, table_selection, column_selection) for table_name, tm in - tables.items() if ExplicitSelectionUtils.table_survives(tm, table_selection)} for schema, tables in + tables.items() if ExplicitSelectionUtils.table_survives(tm, table_selection)} for + schema, tables in self.db_structure.items()} new_dbm = self.__class__( db_structure={schema: {table_name: tm for table_name, (tm, _) in filter_results.items()} for diff --git a/mitm_tooling/extraction/sql/data_models/db_probe.py b/mitm_tooling/extraction/sql/data_models/db_probe.py index 09f4961..40e4543 100644 --- a/mitm_tooling/extraction/sql/data_models/db_probe.py +++ b/mitm_tooling/extraction/sql/data_models/db_probe.py @@ -5,9 +5,10 @@ import pydantic from pydantic import NonNegativeInt, Field from mitm_tooling.data_types.data_types import MITMDataType -from .db_meta import TableMetaInfoBase, DBMetaInfoBase, DBMetaInfo, ColumnName +from .db_meta import TableMetaInfoBase, DBMetaInfoBase, DBMetaInfo +from mitm_tooling.representation import ColumnName from .probe_models import SampleSummary -from .table_identifiers import ShortTableIdentifier +from mitm_tooling.representation.sql.common import ShortTableIdentifier logger = logging.getLogger('api') diff --git a/mitm_tooling/extraction/sql/data_models/table_identifiers.py b/mitm_tooling/extraction/sql/data_models/table_identifiers.py index 1f9f4d1..993d4ad 100644 --- a/mitm_tooling/extraction/sql/data_models/table_identifiers.py +++ b/mitm_tooling/extraction/sql/data_models/table_identifiers.py @@ -6,14 +6,11 @@ from typing import Self, Union, Annotated, TYPE_CHECKING import pydantic from pydantic import Field, AfterValidator +from mitm_tooling.representation.sql.common import TableName, SchemaName, ShortTableIdentifier + if TYPE_CHECKING: from .db_meta import DBMetaInfo, TableMetaInfo -TableName = str -SchemaName = str -ShortTableIdentifier = tuple[SchemaName, TableName] -QualifiedTableName = str - class SourceDBType(enum.StrEnum): OriginalDB = 'original' diff --git a/mitm_tooling/extraction/sql/data_models/virtual_view.py b/mitm_tooling/extraction/sql/data_models/virtual_view.py index 1a6210f..0a56d21 100644 --- a/mitm_tooling/extraction/sql/data_models/virtual_view.py +++ b/mitm_tooling/extraction/sql/data_models/virtual_view.py @@ -8,7 +8,7 @@ from pydantic import Field from mitm_tooling.data_types import get_sa_sql_type, SQL_DataType from mitm_tooling.utilities.sql_utils import qualify from .db_meta import TableMetaInfoBase, TableMetaInfo, DBMetaInfo -from .table_identifiers import SchemaName, TableName, ShortTableIdentifier +from mitm_tooling.representation.sql_representation import TableName, SchemaName, ShortTableIdentifier logger = logging.getLogger('api') diff --git a/mitm_tooling/extraction/sql/db/db_probing.py b/mitm_tooling/extraction/sql/db/db_probing.py index 008921f..fdb39d9 100644 --- a/mitm_tooling/extraction/sql/db/db_probing.py +++ b/mitm_tooling/extraction/sql/db/db_probing.py @@ -13,7 +13,8 @@ from sqlalchemy.orm import Session from sqlalchemy.sql import sqltypes from mitm_tooling.data_types import MITMDataType -from ..data_models import TableMetaInfo, DBMetaInfo, ColumnName +from ..data_models import TableMetaInfo, DBMetaInfo +from mitm_tooling.representation import ColumnName from ..data_models import TableProbe, DBProbe from ..data_models.probe_models import NumericSummaryStatistics, DatetimeSummaryStatistics, \ CategoricalSummaryStatistics, SampleSummary diff --git a/mitm_tooling/extraction/sql/db/db_reflection.py b/mitm_tooling/extraction/sql/db/db_reflection.py index f79a160..205b401 100644 --- a/mitm_tooling/extraction/sql/db/db_reflection.py +++ b/mitm_tooling/extraction/sql/db/db_reflection.py @@ -3,7 +3,8 @@ from typing import TypedDict, Any from sqlalchemy import MetaData, Engine, inspect -from ..data_models import TableMetaInfo, SchemaName +from ..data_models import TableMetaInfo +from mitm_tooling.representation.sql.common import SchemaName from mitm_tooling.utilities.sql_utils import qualify diff --git a/mitm_tooling/extraction/sql/db/db_virtual_view.py b/mitm_tooling/extraction/sql/db/db_virtual_view.py index 46feff8..89dc1de 100644 --- a/mitm_tooling/extraction/sql/db/db_virtual_view.py +++ b/mitm_tooling/extraction/sql/db/db_virtual_view.py @@ -5,7 +5,8 @@ from typing import Callable import pydantic from pydantic import Field -from ..data_models import DBMetaInfo, Queryable, TableIdentifier, SourceDBType, SchemaName, TableName +from ..data_models import DBMetaInfo, Queryable, TableIdentifier, SourceDBType +from mitm_tooling.representation.sql.common import TableName, SchemaName from ..data_models import VirtualView from mitm_tooling.extraction.sql.transformation.db_transformation import TableTransforms, TableCreations, InvalidQueryException, \ TransformationError, TableNotFoundException, ColumnNotFoundException diff --git a/mitm_tooling/extraction/sql/mapping/mapping.py b/mitm_tooling/extraction/sql/mapping/mapping.py index d3136b8..5642ac6 100644 --- a/mitm_tooling/extraction/sql/mapping/mapping.py +++ b/mitm_tooling/extraction/sql/mapping/mapping.py @@ -13,7 +13,8 @@ from mitm_tooling.definition.definition_tools import map_col_groups from mitm_tooling.data_types import MITMDataType from mitm_tooling.definition import ConceptName, RelationName, MITM, ConceptLevel, ConceptKind, get_mitm_def from ..data_models import VirtualView, DBMetaInfo, TableMetaInfo, Queryable, SourceDBType, TableIdentifier, \ - AnyTableIdentifier, ColumnName + AnyTableIdentifier +from mitm_tooling.representation import ColumnName from ..transformation.db_transformation import TableNotFoundException, col_by_name, TableTransforms from ..transformation.df_transformation import transform_df diff --git a/mitm_tooling/extraction/sql/transformation/db_transformation.py b/mitm_tooling/extraction/sql/transformation/db_transformation.py index d841785..451b4ea 100644 --- a/mitm_tooling/extraction/sql/transformation/db_transformation.py +++ b/mitm_tooling/extraction/sql/transformation/db_transformation.py @@ -15,7 +15,9 @@ from sqlalchemy.sql import sqltypes from mitm_tooling.data_types import get_sa_sql_type, SQL_DataType, WrappedMITMDataType, MITMDataType, get_pandas_cast from mitm_tooling.utilities.python_utils import ExtraInfoExc from ..data_models import DBMetaInfo, TypedRawQuery -from ..data_models import ColumnName, TableName, SourceDBType, TableIdentifier, AnyTableIdentifier +from ..data_models import SourceDBType, TableIdentifier, AnyTableIdentifier +from mitm_tooling.representation import ColumnName +from mitm_tooling.representation.sql.common import TableName from .df_transformation import extract_json_path, PandasSeriesTransform, PandasCreation, PandasDataframeTransform logger = logging.getLogger('api') diff --git a/mitm_tooling/io/__init__.py b/mitm_tooling/io/__init__.py index af4de0b..fae686b 100644 --- a/mitm_tooling/io/__init__.py +++ b/mitm_tooling/io/__init__.py @@ -1,4 +1,4 @@ from .exporting import ZippedExport, StreamingZippedExport, write_zip -from .importing import ZippedImport, FolderImport, read_zip +from .importing import ZippedImport, FolderImport, read_zip, MITM from . import exporting from . import importing \ No newline at end of file diff --git a/mitm_tooling/io/importing.py b/mitm_tooling/io/importing.py index d1cdb7c..8591c2e 100644 --- a/mitm_tooling/io/importing.py +++ b/mitm_tooling/io/importing.py @@ -1,13 +1,9 @@ import glob import logging import os.path +import pydantic import zipfile from abc import ABC, abstractmethod - -import pandas as pd -import pydantic - -import io from mitm_tooling.definition import MITM, get_mitm_def from mitm_tooling.representation.file_representation import read_header_file, read_data_file from mitm_tooling.representation.intermediate_representation import MITMData, Header @@ -18,7 +14,7 @@ logger = logging.getLogger('api') class FileImport(pydantic.BaseModel, ABC): mitm: MITM - filename: str + filename: str | None = None @abstractmethod def read(self, source: DataSource, **kwargs) -> MITMData | None: @@ -67,12 +63,17 @@ class FolderImport(FileImport): return MITMData(header=Header.from_df(parts.pop('header'), self.mitm), concept_dfs=parts) -def read_zip(source: FilePath, mitm: MITM | None = None) -> MITMData | None: - try: +def read_zip(source: DataSource, mitm: MITM | None = None) -> MITMData | None: + filename = None + if isinstance(source, FilePath): + filename = source _, ext = os.path.splitext(source) if not mitm: - mitm = MITM(ext.upper()) - except ValueError: + try: + mitm = MITM(ext[1:].upper()) + except ValueError: + pass + if mitm: + return ZippedImport(mitm=mitm, filename=filename).read(source) + else: logger.error('Attempted to import data with unspecified MitM.') - return None - return ZippedImport(mitm=mitm, filename=source).read(source) diff --git a/mitm_tooling/representation/__init__.py b/mitm_tooling/representation/__init__.py index f7f848b..dc87f67 100644 --- a/mitm_tooling/representation/__init__.py +++ b/mitm_tooling/representation/__init__.py @@ -1,9 +1,10 @@ -from . import intermediate_representation -from . import file_representation +from . import common from . import df_representation +from . import file_representation +from . import intermediate_representation from . import sql_representation +from .common import mk_concept_file_header, ColumnName +from .df_representation import MITMDataFrames from .file_representation import write_header_file, write_data_file, read_data_file, read_header_file -from .common import mk_concept_file_header -from .intermediate_representation import HeaderEntry, Header, MITMData, StreamingMITMData, StreamingConceptData, ColumnName -from .df_representation import MITMDataset -from .sql_representation import mk_db_schema, insert_mitm_data, mk_sqlite, SQLRepresentationSchema +from .intermediate_representation import HeaderEntry, Header, MITMData, StreamingMITMData, StreamingConceptData +from .sql_representation import mk_sql_rep_schema, insert_mitm_data, mk_sqlite, SQLRepresentationSchema diff --git a/mitm_tooling/representation/common.py b/mitm_tooling/representation/common.py index b0c21e9..00f0371 100644 --- a/mitm_tooling/representation/common.py +++ b/mitm_tooling/representation/common.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import itertools from mitm_tooling.data_types import MITMDataType @@ -32,3 +34,6 @@ def mk_concept_file_header(mitm: MITM, concept: ConceptName, k: int) -> tuple[li def mk_attr_columns(k: int) -> list[str]: return [f'a_{i}' for i in range(1, k + 1)] + + +ColumnName = str diff --git a/mitm_tooling/representation/df_representation.py b/mitm_tooling/representation/df_representation.py index 6f8b4ae..e0571f0 100644 --- a/mitm_tooling/representation/df_representation.py +++ b/mitm_tooling/representation/df_representation.py @@ -8,7 +8,7 @@ from mitm_tooling.definition import ConceptName from .intermediate_representation import Header -class MITMDataset(Iterable[tuple[str, dict[str, pd.DataFrame]]], pydantic.BaseModel): +class MITMDataFrames(Iterable[tuple[str, dict[str, pd.DataFrame]]], pydantic.BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) header: Header diff --git a/mitm_tooling/representation/intermediate_representation.py b/mitm_tooling/representation/intermediate_representation.py index a30bbec..803b3ba 100644 --- a/mitm_tooling/representation/intermediate_representation.py +++ b/mitm_tooling/representation/intermediate_representation.py @@ -4,21 +4,19 @@ import itertools import logging from collections import defaultdict from collections.abc import Iterator, Iterable, Sequence, Mapping -from typing import TYPE_CHECKING, Self, Any, Annotated +from typing import Self import pandas as pd import pydantic from pydantic import ConfigDict -from mitm_tooling.definition import get_mitm_def from mitm_tooling.data_types.data_types import MITMDataType +from mitm_tooling.definition import get_mitm_def from mitm_tooling.definition.definition_representation import ConceptName, MITM from mitm_tooling.utilities.python_utils import take_first - -from .common import guess_k_of_header_df, mk_header_file_columns +from .common import mk_header_file_columns, ColumnName logger = logging.getLogger('api') -ColumnName = str class HeaderEntry(pydantic.BaseModel): @@ -62,10 +60,9 @@ class HeaderEntry(pydantic.BaseModel): itertools.chain(*zip(self.attributes, map(str, self.attribute_dtypes)))) - class Header(pydantic.BaseModel): mitm: MITM - header_entries: Annotated[list[HeaderEntry], pydantic.Field(default_factory=list)] + header_entries: list[HeaderEntry] = pydantic.Field(default_factory=list) @classmethod def from_df(cls, df: pd.DataFrame, mitm: MITM) -> Self: @@ -95,7 +92,7 @@ class MITMData(Iterable[tuple[ConceptName, pd.DataFrame]], pydantic.BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) header: Header - concept_dfs: Annotated[dict[ConceptName, pd.DataFrame], pydantic.Field(default_factory=dict)] + concept_dfs: dict[ConceptName, pd.DataFrame] = pydantic.Field(default_factory=dict) def __iter__(self): return iter(self.concept_dfs.items()) @@ -106,7 +103,7 @@ class MITMData(Iterable[tuple[ConceptName, pd.DataFrame]], pydantic.BaseModel): for c, df in self.concept_dfs.items(): c = mitm_def.get_parent(c) dfs[c].append(df) - dfs = {c : pd.concat(dfs_, axis='rows', ignore_index=True) for c, dfs_ in dfs.items()} + dfs = {c: pd.concat(dfs_, axis='rows', ignore_index=True) for c, dfs_ in dfs.items()} return MITMData(header=self.header, concept_dfs=dfs) def as_specialized(self) -> Self: diff --git a/mitm_tooling/representation/sql/__init__.py b/mitm_tooling/representation/sql/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mitm_tooling/representation/sql/common.py b/mitm_tooling/representation/sql/common.py new file mode 100644 index 0000000..4c45e44 --- /dev/null +++ b/mitm_tooling/representation/sql/common.py @@ -0,0 +1,4 @@ +TableName = str +SchemaName = str +ShortTableIdentifier = tuple[SchemaName, TableName] +QualifiedTableName = str diff --git a/mitm_tooling/representation/sql_representation.py b/mitm_tooling/representation/sql_representation.py index e88c684..14cfddd 100644 --- a/mitm_tooling/representation/sql_representation.py +++ b/mitm_tooling/representation/sql_representation.py @@ -1,29 +1,43 @@ from __future__ import annotations -from collections.abc import Callable, Generator, Mapping -from typing import TYPE_CHECKING - import pydantic import sqlalchemy as sa import sqlalchemy.sql.schema -from pydantic import AnyUrl, ConfigDict -from mitm_tooling.data_types import MITMDataType -from mitm_tooling.definition import MITMDefinition, ConceptProperties, OwnedRelations, ConceptName, MITM, get_mitm_def, \ - RelationName -from mitm_tooling.definition.definition_tools import map_col_groups, ColGroupMaps -from .intermediate_representation import Header, MITMData -from mitm_tooling.utilities.sql_utils import create_sa_engine, qualify +from collections.abc import Callable, Mapping +from contextlib import contextmanager +from mitm_tooling.definition import MITMDefinition, ConceptProperties, OwnedRelations, RelationName +from mitm_tooling.definition.definition_tools import ColGroupMaps from mitm_tooling.utilities import python_utils from mitm_tooling.utilities.io_utils import FilePath - +from mitm_tooling.utilities.sql_utils import create_sa_engine, qualify +from pydantic import AnyUrl, ConfigDict +from sqlalchemy import Connection, NestedTransaction +from sqlalchemy.pool import StaticPool from sqlalchemy_utils.view import create_view +from typing import TYPE_CHECKING, Generator + +from .common import * +from .intermediate_representation import Header, MITMData +from .sql.common import * if TYPE_CHECKING: from mitm_tooling.extraction.sql.data_models import Queryable -from mitm_tooling.extraction.sql.data_models.table_identifiers import TableName SQL_REPRESENTATION_DEFAULT_SCHEMA = 'main' +ConceptTablesDict = dict[ConceptName, sa.Table] +ViewsDict = dict[TableName, sa.Table] +ConceptTypeTablesDict = dict[ConceptName, dict[str, sa.Table]] + + +class SQLRepresentationSchema(pydantic.BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + + meta: sa.MetaData + concept_tables: ConceptTablesDict + type_tables: ConceptTypeTablesDict + views: ViewsDict + def mk_concept_table_name(mitm: MITM, concept: ConceptName) -> TableName: return get_mitm_def(mitm).get_properties(concept).plural @@ -60,7 +74,8 @@ def mk_table(meta: sa.MetaData, mitm: MITM, concept: ConceptName, table_name: Ta [MITM, ConceptName, ConceptProperties, OwnedRelations, dict[RelationName, sa.Column], list[tuple[RelationName, sa.Column]]], Generator[ - sqlalchemy.sql.schema.SchemaItem, None, None]] | None = None) -> \ + sqlalchemy.sql.schema.SchemaItem, None, None]] | None = None, + override_schema: SchemaName | None = None) -> \ tuple[ sa.Table, dict[RelationName, sa.Column], list[tuple[RelationName, sa.Column]]]: mitm_def = get_mitm_def(mitm) @@ -80,7 +95,8 @@ def mk_table(meta: sa.MetaData, mitm: MITM, concept: ConceptName, table_name: Ta ref_columns) constraints.extend(schema_items) - return sa.Table(table_name, meta, schema=SQL_REPRESENTATION_DEFAULT_SCHEMA, *columns, + return sa.Table(table_name, meta, schema=override_schema if override_schema else SQL_REPRESENTATION_DEFAULT_SCHEMA, + *columns, *constraints), created_columns, ref_columns @@ -92,6 +108,7 @@ def gen_foreign_key_constraints(mitm: MITM, concept: ConceptName, concept_proper parent_table = mk_concept_table_name(mitm, concept) cols, refcols = zip( *((c, qualify(table=parent_table, column=s)) for s, c in ref_columns)) + yield sa.ForeignKeyConstraint(name='parent', columns=cols, refcolumns=refcols) for fk_name, fk_info in concept_relations.foreign.items(): cols, refcols = zip(*fk_info.fk_relations.items()) @@ -102,27 +119,15 @@ def gen_foreign_key_constraints(mitm: MITM, concept: ConceptName, concept_proper yield fkc -ConceptTablesDict = dict[ConceptName, sa.Table] -ViewsDict = dict[TableName, sa.Table] -ConceptTypeTablesDict = dict[ConceptName, dict[TableName, sa.Table]] - - -class SQLRepresentationSchema(pydantic.BaseModel): - model_config = ConfigDict(arbitrary_types_allowed=True) - - meta: sa.MetaData - concept_tables: ConceptTablesDict - type_tables: ConceptTypeTablesDict - views: ViewsDict - - -def mk_db_schema(header: Header, gen_views: Callable[ - [MITM, MITMDefinition, ConceptTablesDict, ConceptTypeTablesDict], - Generator[ - tuple[ - TableName, Queryable], None, None]] | None = None) -> SQLRepresentationSchema: +def mk_sql_rep_schema(header: Header, + gen_views: Callable[ + [MITM, MITMDefinition, ConceptTablesDict, ConceptTypeTablesDict], + Generator[ + tuple[ + TableName, Queryable], None, None]] | None = None, + override_schema: SchemaName | None = None) -> SQLRepresentationSchema: mitm_def = get_mitm_def(header.mitm) - meta = sa.MetaData(schema=SQL_REPRESENTATION_DEFAULT_SCHEMA) + meta = sa.MetaData(schema=override_schema if override_schema else SQL_REPRESENTATION_DEFAULT_SCHEMA) concept_tables: ConceptTablesDict = {} type_tables: ConceptTypeTablesDict = {} @@ -146,7 +151,7 @@ def mk_db_schema(header: Header, gen_views: Callable[ 'foreign': lambda: [(name, sa.Column(name, dt.sa_sql_type)) for _, resolved_fk in mitm_def.resolve_foreign_types(concept).items() for name, dt in resolved_fk.items()] - }) + }, override_schema=override_schema) concept_tables[concept] = t for he in header.header_entries: @@ -171,7 +176,7 @@ def mk_db_schema(header: Header, gen_views: Callable[ resolved_fk.items()], 'attributes': lambda: [(name, sa.Column(name, dt.sa_sql_type)) for name, dt in zip(he.attributes, he.attribute_dtypes)], - }, gen_additional_schema_items=gen_foreign_key_constraints) + }, gen_additional_schema_items=gen_foreign_key_constraints, override_schema=override_schema) if he_concept not in type_tables: type_tables[he_concept] = {} @@ -192,13 +197,29 @@ def mk_db_schema(header: Header, gen_views: Callable[ return SQLRepresentationSchema(meta=meta, concept_tables=concept_tables, type_tables=type_tables, views=views) -def insert_db_instances(engine: sa.Engine, sql_rep_schema: SQLRepresentationSchema, mitm_data: MITMData): - from mitm_tooling.transformation.df import pack_mitm_dataset, unpack_mitm_data +EngineOrConnection = sa.Engine | sa.Connection + + +@contextmanager +def _nested_conn(bind: EngineOrConnection) -> Generator[Connection | NestedTransaction, None, None]: + if isinstance(bind, sa.Engine): + yield bind.connect() + elif isinstance(bind, sa.Connection): + with bind.begin_nested(): + yield bind + + +def insert_db_schema(bind: EngineOrConnection, sql_rep_schema: SQLRepresentationSchema) -> None: + sql_rep_schema.meta.create_all(bind=bind, checkfirst=False) + + +def insert_db_instances(bind: EngineOrConnection, sql_rep_schema: SQLRepresentationSchema, mitm_data: MITMData) -> None: + from mitm_tooling.transformation.df import mitm_data_into_mitm_dataframes h = mitm_data.header mitm = mitm_data.header.mitm mitm_def = get_mitm_def(mitm) - mitm_dataset = unpack_mitm_data(mitm_data) - with engine.connect() as conn: + mitm_dataset = mitm_data_into_mitm_dataframes(mitm_data) + with _nested_conn(bind) as conn: for concept, typed_dfs in mitm_dataset: concept_properties, concept_relations = mitm_def.get(concept) for type_name, type_df in typed_dfs.items(): @@ -208,8 +229,6 @@ def insert_db_instances(engine: sa.Engine, sql_rep_schema: SQLRepresentationSche conn.execute(t_concept.insert(), type_df[[c.name for c in t_concept.columns]].to_dict('records')) if has_type_tables(mitm, concept): - #for typ, idx in df.groupby(concept_properties.typing_concept).groups.items(): - # type_df = df.loc[idx] t_type = sql_rep_schema.type_tables[concept][type_name] to_dict = type_df[[c.name for c in t_type.columns]].to_dict('records') conn.execute(t_type.insert(), to_dict) @@ -217,15 +236,17 @@ def insert_db_instances(engine: sa.Engine, sql_rep_schema: SQLRepresentationSche conn.commit() -def insert_mitm_data(engine: sa.Engine, mitm_data: MITMData) -> SQLRepresentationSchema: - sql_rep_schema = mk_db_schema(mitm_data.header) - sql_rep_schema.meta.create_all(engine) - insert_db_instances(engine, sql_rep_schema, mitm_data) - return sql_rep_schema +def insert_mitm_data(bind: EngineOrConnection, sql_rep_schema, mitm_data: MITMData) -> None: + insert_db_schema(bind, sql_rep_schema) + insert_db_instances(bind, sql_rep_schema, mitm_data) -def mk_sqlite(mitm_data: MITMData, file_path: FilePath | None = ':memory:') -> tuple[sa.Engine, SQLRepresentationSchema]: - engine = create_sa_engine(AnyUrl(f'sqlite:///{str(file_path)}')) - sql_rep_schema = insert_mitm_data(engine, mitm_data) +def mk_sqlite(mitm_data: MITMData, file_path: FilePath | None = ':memory:', autoclose: bool = True) -> tuple[ + sa.Engine, SQLRepresentationSchema]: + engine = create_sa_engine(AnyUrl(f'sqlite:///{str(file_path)}'), poolclass=StaticPool) + sql_rep_schema = mk_sql_rep_schema(mitm_data.header) + insert_mitm_data(engine, sql_rep_schema, mitm_data) # print([f'{t.name}: {t.columns} {t.constraints}' for ts in sql_rep_schema.type_tables.values() for t in ts.values()]) + if autoclose: + engine.dispose() return engine, sql_rep_schema diff --git a/mitm_tooling/transformation/__init__.py b/mitm_tooling/transformation/__init__.py index 0bf5d16..dd772b9 100644 --- a/mitm_tooling/transformation/__init__.py +++ b/mitm_tooling/transformation/__init__.py @@ -1,2 +1,3 @@ -from . import df, superset -__all__ = ['df','superset'] \ No newline at end of file +from . import df, sql, superset + +__all__ = ['df', 'sql', 'superset'] diff --git a/mitm_tooling/transformation/df/__init__.py b/mitm_tooling/transformation/df/__init__.py index 6c3abd7..1d31d83 100644 --- a/mitm_tooling/transformation/df/__init__.py +++ b/mitm_tooling/transformation/df/__init__.py @@ -1,2 +1,2 @@ -from .intermediate_transformation import unpack_mitm_data, pack_mitm_dataset -from . import intermediate_transformation \ No newline at end of file +from .from_intermediate import mitm_data_into_mitm_dataframes +from .into_intermediate import mitm_dataframes_into_mitm_data \ No newline at end of file diff --git a/mitm_tooling/transformation/df/intermediate_transformation.py b/mitm_tooling/transformation/df/from_intermediate.py similarity index 57% rename from mitm_tooling/transformation/df/intermediate_transformation.py rename to mitm_tooling/transformation/df/from_intermediate.py index 7562a4b..262dbfe 100644 --- a/mitm_tooling/transformation/df/intermediate_transformation.py +++ b/mitm_tooling/transformation/df/from_intermediate.py @@ -7,32 +7,11 @@ import pandas as pd from mitm_tooling.data_types import convert, MITMDataType from mitm_tooling.definition import get_mitm_def, MITM, ConceptName from mitm_tooling.definition.definition_tools import map_col_groups -from mitm_tooling.representation import MITMData, MITMDataset, Header +from mitm_tooling.representation import MITMData, MITMDataFrames, Header from mitm_tooling.representation import mk_concept_file_header from mitm_tooling.representation.common import guess_k_of_header_df, mk_header_file_columns -def pack_typed_dfs_as_concept_table(mitm: MITM, concept: ConceptName, dfs: Iterable[pd.DataFrame]) -> pd.DataFrame: - normalized_dfs = [] - for df in dfs: - base_cols, col_dts = mk_concept_file_header(mitm, concept, 0) - attr_cols = set(df.columns) - set(base_cols) - k = len(attr_cols) - normal_form_cols = list(base_cols) + list(attr_cols) - df = df.reindex(columns=normal_form_cols) - df = convert.convert_df(df, col_dts | {c: MITMDataType.Unknown for c in attr_cols}) - squashed_form_cols = mk_concept_file_header(mitm, concept, k)[0] - df.columns = squashed_form_cols - normalized_dfs.append((df, k)) - - assert len(normalized_dfs) > 0 - max_k = max(normalized_dfs, key=lambda x: x[1])[1] - - squashed_form_cols = mk_concept_file_header(mitm, concept, max_k)[0] - return pd.concat([df for df, _ in normalized_dfs], axis='rows', ignore_index=True).reindex( - columns=squashed_form_cols) - - def unpack_concept_table_as_typed_dfs(header: Header, concept: ConceptName, df: pd.DataFrame) -> dict[ str, pd.DataFrame]: mitm_def = get_mitm_def(header.mitm) @@ -69,16 +48,9 @@ def unpack_concept_table_as_typed_dfs(header: Header, concept: ConceptName, df: return res -def unpack_mitm_data(mitm_data: MITMData) -> MITMDataset: +def mitm_data_into_mitm_dataframes(mitm_data: MITMData) -> MITMDataFrames: mitm_data = mitm_data.as_specialized() - return MITMDataset(header=mitm_data.header, - dfs={concept: unpack_concept_table_as_typed_dfs(mitm_data.header, concept, df) for concept, df in - mitm_data}) - - -def pack_mitm_dataset(mitm_dataset: MITMDataset) -> MITMData: - return MITMData(header=mitm_dataset.header, concept_dfs={concept: - pack_typed_dfs_as_concept_table( - mitm_dataset.header.mitm, concept, - typed_dfs.values()) for concept, typed_dfs in - mitm_dataset if len(typed_dfs) > 1}).as_generalized() + return MITMDataFrames(header=mitm_data.header, + dfs={concept: unpack_concept_table_as_typed_dfs(mitm_data.header, concept, df) for concept, df + in + mitm_data}) diff --git a/mitm_tooling/transformation/df/into_intermediate.py b/mitm_tooling/transformation/df/into_intermediate.py new file mode 100644 index 0000000..91b6875 --- /dev/null +++ b/mitm_tooling/transformation/df/into_intermediate.py @@ -0,0 +1,41 @@ +import itertools +from collections import defaultdict +from collections.abc import Sequence, Iterable + +import pandas as pd + +from mitm_tooling.data_types import convert, MITMDataType +from mitm_tooling.definition import get_mitm_def, MITM, ConceptName +from mitm_tooling.definition.definition_tools import map_col_groups +from mitm_tooling.representation import MITMData, MITMDataFrames, Header +from mitm_tooling.representation import mk_concept_file_header +from mitm_tooling.representation.common import guess_k_of_header_df, mk_header_file_columns + + +def pack_typed_dfs_as_concept_table(mitm: MITM, concept: ConceptName, dfs: Iterable[pd.DataFrame]) -> pd.DataFrame: + normalized_dfs = [] + for df in dfs: + base_cols, col_dts = mk_concept_file_header(mitm, concept, 0) + attr_cols = set(df.columns) - set(base_cols) + k = len(attr_cols) + normal_form_cols = list(base_cols) + list(attr_cols) + df = df.reindex(columns=normal_form_cols) + df = convert.convert_df(df, col_dts | {c: MITMDataType.Unknown for c in attr_cols}) + squashed_form_cols = mk_concept_file_header(mitm, concept, k)[0] + df.columns = squashed_form_cols + normalized_dfs.append((df, k)) + + assert len(normalized_dfs) > 0 + max_k = max(normalized_dfs, key=lambda x: x[1])[1] + + squashed_form_cols = mk_concept_file_header(mitm, concept, max_k)[0] + return pd.concat([df for df, _ in normalized_dfs], axis='rows', ignore_index=True).reindex( + columns=squashed_form_cols) + + +def mitm_dataframes_into_mitm_data(mitm_dataset: MITMDataFrames) -> MITMData: + return MITMData(header=mitm_dataset.header, concept_dfs={concept: + pack_typed_dfs_as_concept_table( + mitm_dataset.header.mitm, concept, + typed_dfs.values()) for concept, typed_dfs in + mitm_dataset if len(typed_dfs) > 1}).as_generalized() diff --git a/mitm_tooling/transformation/sql/__init__.py b/mitm_tooling/transformation/sql/__init__.py new file mode 100644 index 0000000..c9511ca --- /dev/null +++ b/mitm_tooling/transformation/sql/__init__.py @@ -0,0 +1 @@ +from .from_intermediate import header_into_db_meta, mitm_data_into_db_meta diff --git a/mitm_tooling/transformation/sql/from_intermediate.py b/mitm_tooling/transformation/sql/from_intermediate.py new file mode 100644 index 0000000..891961f --- /dev/null +++ b/mitm_tooling/transformation/sql/from_intermediate.py @@ -0,0 +1,18 @@ +from mitm_tooling.extraction.sql.data_models import DBMetaInfo +from mitm_tooling.representation import MITMData +from mitm_tooling.representation.intermediate_representation import Header +from mitm_tooling.representation.sql_representation import mk_sql_rep_schema, SQLRepresentationSchema, \ + SQL_REPRESENTATION_DEFAULT_SCHEMA + + +def sql_rep_schema_to_db_meta(sql_rep_schema: SQLRepresentationSchema) -> DBMetaInfo: + return DBMetaInfo.from_sa_meta(sql_rep_schema.meta, default_schema=SQL_REPRESENTATION_DEFAULT_SCHEMA) + + +def header_into_db_meta(header: Header) -> DBMetaInfo: + sql_rep_schema = mk_sql_rep_schema(header) + return sql_rep_schema_to_db_meta(sql_rep_schema) + + +def mitm_data_into_db_meta(mitm_data: MITMData) -> DBMetaInfo: + return header_into_db_meta(mitm_data.header) diff --git a/mitm_tooling/transformation/superset/__init__.py b/mitm_tooling/transformation/superset/__init__.py index 92ec212..c5cb639 100644 --- a/mitm_tooling/transformation/superset/__init__.py +++ b/mitm_tooling/transformation/superset/__init__.py @@ -1,2 +1,5 @@ -from .superset_representation import mk_inferred_superset_dataset_def, mk_superset_dataset_def, mk_inferred_superset_defs -from . import dataset_definition, superset_representation \ No newline at end of file +from . import definitions, factories, mitm_specific +from . import exporting, from_sql, from_intermediate +from . import interface +from .exporting import write_superset_assets_def +from .interface import mk_superset_datasource_import, mk_superset_visualization_import, mk_superset_mitm_dataset_import diff --git a/mitm_tooling/transformation/superset/common.py b/mitm_tooling/transformation/superset/common.py new file mode 100644 index 0000000..2f14c49 --- /dev/null +++ b/mitm_tooling/transformation/superset/common.py @@ -0,0 +1,44 @@ +import pydantic +import sqlalchemy as sa +from mitm_tooling.representation.sql.common import SchemaName +from mitm_tooling.representation.sql_representation import SQL_REPRESENTATION_DEFAULT_SCHEMA +from mitm_tooling.transformation.superset.definitions import StrUrl +from mitm_tooling.utilities.io_utils import FilePath +from mitm_tooling.utilities.sql_utils import create_sa_engine, dialect_cls_from_url, any_url_into_sa_url +from pydantic import AnyUrl, ConfigDict +from pydantic.v1 import UUID4 +from typing import Type + +SQLiteFileOrEngine = FilePath | sa.Engine + + +class SupersetDBConnectionInfo(pydantic.BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + + sql_alchemy_uri: StrUrl + explicit_db_name: str | None = None + schema_name: SchemaName = SQL_REPRESENTATION_DEFAULT_SCHEMA + + @property + def db_name_in_uri(self) -> AnyUrl | None: + if self.sql_alchemy_uri is not None: + return any_url_into_sa_url(self.sql_alchemy_uri).database + + @property + def db_name(self) -> str: + return self.explicit_db_name or self.db_name_in_uri + + @property + def dialect_cls(self) -> Type[sa.engine.Dialect]: + return dialect_cls_from_url(self.sql_alchemy_uri) + + +def name_plus_uuid(name: str, uuid: UUID4) -> str: + return f'{name}-{uuid.hex[:8]}' + + +def _mk_engine(arg: SQLiteFileOrEngine) -> sa.Engine: + if isinstance(arg, sa.Engine): + return arg + else: + return create_sa_engine(AnyUrl(f'sqlite:///{str(arg)}'), poolclass=sa.pool.StaticPool) diff --git a/mitm_tooling/transformation/superset/definition_bundles.py b/mitm_tooling/transformation/superset/definition_bundles.py new file mode 100644 index 0000000..3869657 --- /dev/null +++ b/mitm_tooling/transformation/superset/definition_bundles.py @@ -0,0 +1,53 @@ +from abc import ABC, abstractmethod +from typing import Any + +import pydantic + +from .definitions import SupersetDatabaseDef, SupersetMitMDatasetDef, \ + SupersetChartDef, SupersetDashboardDef, BaseSupersetDefinition, SupersetAssetsDef, SupersetDatasetDef, \ + ExtendedSupersetAssetsDef, SupersetDefFolder, DatasourceIdentifier +from .factories.assets import mk_assets, mk_extended_assets +from ...representation.sql.common import TableName + + +class SupersetAssetBundle(SupersetDefFolder, ABC): + @abstractmethod + def to_assets(self) -> SupersetAssetsDef | ExtendedSupersetAssetsDef: + pass + + @property + def folder_dict(self) -> dict[str, Any]: + return self.to_assets().folder_dict + + +class SupersetDatasourceBundle(SupersetAssetBundle): + database: SupersetDatabaseDef + datasets: list[SupersetDatasetDef] = pydantic.Field(default_factory=list) + + @property + def placeholder_dataset_identifiers(self) -> dict[TableName, DatasourceIdentifier]: + return {ds.table_name: DatasourceIdentifier(dataset_uuid=ds.uuid) for ds in self.datasets} + + def to_assets(self) -> SupersetAssetsDef: + return mk_assets(databases=[self.database], datasets=self.datasets) + + +class SupersetVisualizationBundle(SupersetAssetBundle): + charts: list[SupersetChartDef] = pydantic.Field(default_factory=list) + dashboards: list[SupersetDashboardDef] = pydantic.Field(default_factory=list) + + def to_assets(self) -> SupersetAssetsDef: + return mk_assets(charts=self.charts, dashboards=self.dashboards) + + +class SupersetMitMDatasetBundle(SupersetAssetBundle): + mitm_dataset: SupersetMitMDatasetDef + datasource_bundle: SupersetDatasourceBundle + visualization_bundle: SupersetVisualizationBundle = pydantic.Field(default_factory=SupersetVisualizationBundle) + + def to_assets(self) -> ExtendedSupersetAssetsDef: + base_assets = mk_assets(databases=[self.datasource_bundle.database], + datasets=self.datasource_bundle.datasets, + charts=self.visualization_bundle.charts, + dashboards=self.visualization_bundle.dashboards) + return mk_extended_assets(mitm_datasets=[self.mitm_dataset], base_assets=base_assets) diff --git a/mitm_tooling/transformation/superset/definitions/__init__.py b/mitm_tooling/transformation/superset/definitions/__init__.py index 9226df6..f9ac7e5 100644 --- a/mitm_tooling/transformation/superset/definitions/__init__.py +++ b/mitm_tooling/transformation/superset/definitions/__init__.py @@ -1,5 +1,5 @@ +from .assets import * +from .charts import * from .constants import * from .core import * from .post_processing import * -from .charts import * -from .high_level import * \ No newline at end of file diff --git a/mitm_tooling/transformation/superset/definitions/high_level.py b/mitm_tooling/transformation/superset/definitions/assets.py similarity index 60% rename from mitm_tooling/transformation/superset/definitions/high_level.py rename to mitm_tooling/transformation/superset/definitions/assets.py index 1f499b0..f04b7c2 100644 --- a/mitm_tooling/transformation/superset/definitions/high_level.py +++ b/mitm_tooling/transformation/superset/definitions/assets.py @@ -1,17 +1,8 @@ -from abc import ABC, abstractmethod +from abc import abstractmethod from collections import defaultdict -from datetime import datetime, tzinfo, UTC -from enum import StrEnum, IntEnum -from typing import Any, Annotated, Literal, Self, Union +from datetime import UTC -import pydantic -from uuid import UUID - -from pydantic import Field, AnyUrl - -from mitm_tooling.data_types import MITMDataType -from .constants import * -from .core import * +from mitm_tooling.definition import MITM from .charts import * from .post_processing import * @@ -21,10 +12,12 @@ class MetadataType(StrEnum): SqlaTable = 'SqlaTable' Slice = 'Slice' Chart = 'Chart' + Dashboard = 'Dashboard' + Asset = 'Asset' + MitMDataset = 'MitMDataset' -class SupersetDefFile(pydantic.BaseModel, ABC): - model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) +class SupersetDefFile(BaseSupersetDefinition, ABC): @property @abstractmethod @@ -32,6 +25,14 @@ class SupersetDefFile(pydantic.BaseModel, ABC): pass +class SupersetDefFolder(BaseSupersetDefinition, ABC): + + @property + @abstractmethod + def folder_dict(self) -> dict[str, Any]: + pass + + class SupersetMetadataDef(SupersetDefFile): version: str = '1.0.0' type: MetadataType = MetadataType.SqlaTable @@ -46,6 +47,7 @@ class SupersetDatabaseDef(SupersetDefFile): database_name: str sqlalchemy_uri: StrUrl uuid: StrUUID + # verbose_name : str | None = None cache_timeout: str | None = None expose_in_sqllab: bool = True allow_run_async: bool = False @@ -66,8 +68,6 @@ class SupersetDatabaseDef(SupersetDefFile): class SupersetDatasetDef(SupersetDefFile): - model_config = pydantic.ConfigDict(populate_by_name=True) - table_name: str schema_name: str = pydantic.Field(alias='schema') uuid: StrUUID @@ -103,7 +103,7 @@ class SupersetChartDef(SupersetDefFile): description: str | None = None certified_by: str | None = None certification_details: str | None = None - params: Annotated[ChartParams | dict[str, Any], pydantic.SerializeAsAny, pydantic.Field(default_factory=dict)] + params: ChartParams | dict[str, Any] = pydantic.Field(default_factory=dict) query_context: Annotated[pydantic.Json | QueryContext | None, pydantic.PlainSerializer( lambda x: x.model_dump_json(by_alias=True, exclude_none=True) if isinstance(x, pydantic.BaseModel) else x, return_type=pydantic.Json), pydantic.Field(default=None)] @@ -118,30 +118,74 @@ class SupersetChartDef(SupersetDefFile): class SupersetDashboardDef(SupersetDefFile): + uuid: StrUUID + dashboard_title: str + description: str | None = None + css: str | None = None + slug: str | None = None + position: dict[str, Any] = pydantic.Field(default_factory=dict) + metadata: dict[str, Any] = pydantic.Field(default_factory=dict) + is_managed_externally: bool | None = False + external_url: StrUrl | None = None + certified_by: str | None = None + certification_details: str | None = None + published: bool | None = False + version: str = '1.0.0' @property def filename(self) -> str: - return 'dashboard' + return f'{self.dashboard_title}_{self.uuid}' -class SupersetAssetsDef(pydantic.BaseModel): +class SupersetMitMDatasetDef(SupersetDefFile): + uuid: StrUUID + dataset_name: str + mitm: MITM + database_uuid: StrUUID + version: str = '1.0.0' + + @property + def filename(self) -> str: + return self.dataset_name + + +class SupersetAssetsDef(SupersetDefFolder): databases: list[SupersetDatabaseDef] | None = None datasets: list[SupersetDatasetDef] | None = None charts: list[SupersetChartDef] | None = None dashboards: list[SupersetDashboardDef] | None = None metadata: SupersetMetadataDef = pydantic.Field(default_factory=SupersetMetadataDef) - def to_folder_structure(self) -> dict[str, Any]: - folder = {'.': self.metadata} + @property + def folder_dict(self) -> dict[str, Any]: + folder_dict = {'.': self.metadata} dbs = {} if self.databases: dbs |= {db.uuid: db.database_name for db in self.databases} - folder['databases'] = [db for db in self.databases] + folder_dict['databases'] = [db for db in self.databases] if self.datasets: db_dss = defaultdict(list) for ds in self.datasets: db_dss[dbs[ds.database_uuid]].append(ds) - folder['datasets'] = db_dss + folder_dict['datasets'] = db_dss if self.charts: - folder['charts'] = self.charts - return {'my_import': folder} + folder_dict['charts'] = self.charts + return {'my_import': folder_dict} + + +class ExtendedSupersetAssetsDef(SupersetDefFolder): + mitm_datasets: list[SupersetMitMDatasetDef] | None + base_assets: SupersetAssetsDef | None + + @property + def folder_dict(self) -> dict[str, Any]: + asset_folder_dict = self.base_assets.folder_dict if self.base_assets else {'my_import': {}} + dbs = {} + if self.base_assets.databases: + dbs = {db.uuid: db.database_name for db in self.base_assets.databases} + if self.mitm_datasets: + mitm_dss = defaultdict(list) + for mitm_ds in self.mitm_datasets: + mitm_dss[dbs[mitm_ds.database_uuid]].append(mitm_ds) + asset_folder_dict['my_import']['mitm_datasets'] = mitm_dss + return asset_folder_dict diff --git a/mitm_tooling/transformation/superset/definitions/charts.py b/mitm_tooling/transformation/superset/definitions/charts.py index 6d99e1b..ce8af2b 100644 --- a/mitm_tooling/transformation/superset/definitions/charts.py +++ b/mitm_tooling/transformation/superset/definitions/charts.py @@ -1,16 +1,4 @@ -from abc import ABC, abstractmethod -from datetime import datetime, tzinfo, UTC -from enum import StrEnum, IntEnum -from typing import Any, Annotated, Literal, Self, Union - -import pydantic -from uuid import UUID - -from pydantic import Field, AnyUrl - -from mitm_tooling.data_types import MITMDataType from .core import * -from .constants import * class ChartParams(FormData): @@ -67,7 +55,7 @@ class TimeSeriesChartParams(ChartParams): only_total: bool = True order_desc: bool = True time_grain_sqla: TimeGrain | None = None - annotation_layers: AnnotationLayers = pydantic.Field(default_factory=list) + annotation_layers: list[AnnotationLayer] | None = pydantic.Field(default_factory=list) # # forecastEnabled: bool = False diff --git a/mitm_tooling/transformation/superset/definitions/constants.py b/mitm_tooling/transformation/superset/definitions/constants.py index f55f4e9..dbe2a73 100644 --- a/mitm_tooling/transformation/superset/definitions/constants.py +++ b/mitm_tooling/transformation/superset/definitions/constants.py @@ -1,15 +1,13 @@ -from abc import ABC, abstractmethod -from datetime import datetime, tzinfo, UTC +from datetime import datetime +from datetime import datetime from enum import StrEnum, IntEnum -from typing import Any, Annotated, Literal, Self, Union - -import pydantic +from typing import Annotated, Literal, Self, Union from uuid import UUID -from pydantic import Field, AnyUrl +import pydantic +from pydantic import AnyUrl, ConfigDict from mitm_tooling.data_types import MITMDataType -from mitm_tooling.representation.intermediate_representation import ColumnName StrUUID = Annotated[ UUID, @@ -134,8 +132,11 @@ class FilterStringOperators(StrEnum): IS_FALSE = "IS_FALSE" @classmethod - def from_operator(cls, operator: FilterOperator) -> Self: - return getattr(cls, operator.name) + def from_operator(cls, operator: FilterOperator) -> Self | None: + try: + return cls(operator.name) + except ValueError: + pass class TimeGrain(StrEnum): @@ -181,3 +182,11 @@ class ChartDataResultType(StrEnum): TIMEGRAINS = "timegrains" POST_PROCESSED = "post_processed" DRILL_DETAIL = "drill_detail" + + +class BaseSupersetDefinition(pydantic.BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True, use_enum_values=True, populate_by_name=True) + + +class FrozenSupersetDefinition(BaseSupersetDefinition): + model_config = ConfigDict(arbitrary_types_allowed=True, use_enum_values=True, populate_by_name=True, frozen=True) diff --git a/mitm_tooling/transformation/superset/definitions/core.py b/mitm_tooling/transformation/superset/definitions/core.py index dc0fab3..527acbd 100644 --- a/mitm_tooling/transformation/superset/definitions/core.py +++ b/mitm_tooling/transformation/superset/definitions/core.py @@ -1,14 +1,7 @@ -from abc import ABC, abstractmethod -from datetime import datetime, tzinfo, UTC -from enum import StrEnum, IntEnum -from typing import Any, Annotated, Literal, Self, Union +from abc import ABC +from typing import Any -import pydantic -from uuid import UUID - -from pydantic import Field, AnyUrl - -from mitm_tooling.data_types import MITMDataType +from mitm_tooling.representation import ColumnName from .constants import * @@ -19,8 +12,8 @@ class SupersetPostProcessing(pydantic.BaseModel, ABC): raise NotImplementedError() -class DatasourceIdentifier(pydantic.BaseModel): - id: SupersetId +class DatasourceIdentifier(FrozenSupersetDefinition): + id: SupersetId = 'placeholder' type: Literal['table', 'annotation'] = 'table' dataset_uuid: StrUUID = pydantic.Field(exclude=True) @@ -30,7 +23,7 @@ class DatasourceIdentifier(pydantic.BaseModel): return f'{self.id}__{self.type}' -class SupersetColumn(pydantic.BaseModel): +class SupersetColumn(FrozenSupersetDefinition): column_name: str verbose_name: str | None = None id: SupersetId | None = None @@ -43,7 +36,7 @@ class SupersetColumn(pydantic.BaseModel): filterable: bool = True expression: str | None = None description: str | None = None - python_date_format: str = None + python_date_format: str | None = None extra: dict[str, Any] = pydantic.Field(default_factory=dict) @@ -51,7 +44,7 @@ class IdentifiedSupersetColumn(SupersetColumn): id: SupersetId -class SupersetMetric(pydantic.BaseModel): +class SupersetMetric(FrozenSupersetDefinition): metric_name: str verbose_name: str expression: str @@ -59,11 +52,11 @@ class SupersetMetric(pydantic.BaseModel): description: str | None = None d3format: str | None = None currency: str | None = None - extra: dict[str, Any] = Field(default_factory=dict) + extra: dict[str, Any] = pydantic.Field(default_factory=dict) warning_text: str | None = None -class SupersetAdhocFilter(pydantic.BaseModel): +class SupersetAdhocFilter(FrozenSupersetDefinition): clause: str = 'WHERE' subject: ColumnName operator: FilterOperator @@ -75,7 +68,7 @@ class SupersetAdhocFilter(pydantic.BaseModel): sqlExpression: str | None = None -class SupersetAdhocMetric(pydantic.BaseModel): +class SupersetAdhocMetric(FrozenSupersetDefinition): label: str column: SupersetColumn expressionType: ExpressionType = ExpressionType.SIMPLE @@ -86,7 +79,7 @@ class SupersetAdhocMetric(pydantic.BaseModel): optionName: str | None = None -class SupersetAdhocColumn(pydantic.BaseModel): +class SupersetAdhocColumn(FrozenSupersetDefinition): label: str sqlExpression: str columnType: str = 'BASE_AXIS' @@ -97,11 +90,11 @@ class SupersetAdhocColumn(pydantic.BaseModel): OrderBy = tuple[SupersetAdhocMetric | str, bool] -class AnnotationOverrides(pydantic.BaseModel): +class AnnotationOverrides(FrozenSupersetDefinition): time_range: str | None = None -class AnnotationLayer(pydantic.BaseModel): +class AnnotationLayer(FrozenSupersetDefinition): name: str value: int annotationType: AnnotationType @@ -125,7 +118,7 @@ class TimeAnnotationLayer(AnnotationLayer): descriptionColumns: list[str] = pydantic.Field(default_factory=list) -class QueryObjectFilterClause(pydantic.BaseModel): +class QueryObjectFilterClause(FrozenSupersetDefinition): col: ColumnName op: FilterOperator val: FilterValues | None = None @@ -137,7 +130,7 @@ class QueryObjectFilterClause(pydantic.BaseModel): return cls(col=adhoc_filter.subject, op=adhoc_filter.operator, val=adhoc_filter.comparator) -class QueryObjectExtras(pydantic.BaseModel): +class QueryObjectExtras(FrozenSupersetDefinition): having: str = '' where: str = '' time_grain_sqla: TimeGrain | None = None @@ -147,8 +140,8 @@ AnnotationLayers = Annotated[list[AnnotationLayer] | None, pydantic.SerializeAsA PostProcessingList = Annotated[list[SupersetPostProcessing | dict[str, Any]], pydantic.SerializeAsAny] -class QueryObject(pydantic.BaseModel): - annotation_layers: AnnotationLayers = pydantic.Field(default_factory=list) +class QueryObject(BaseSupersetDefinition): + annotation_layers: list[AnnotationLayer] = pydantic.Field(default_factory=list) applied_time_extras: dict[str, str] = pydantic.Field(default_factory=dict) columns: list[ColumnName | SupersetAdhocColumn] = pydantic.Field(default_factory=list) datasource: DatasourceIdentifier | None = None @@ -164,7 +157,7 @@ class QueryObject(pydantic.BaseModel): is_timeseries: bool | None = None order_desc: bool = True orderby: list[OrderBy] = pydantic.Field(default_factory=list) - post_processing: PostProcessingList = pydantic.Field(default_factory=list) + post_processing: list[SupersetPostProcessing | dict[str, Any]] = pydantic.Field(default_factory=list) result_type: ChartDataResultType | None = None row_limit: int | None = None row_offset: int | None = None @@ -177,16 +170,16 @@ class QueryObject(pydantic.BaseModel): url_params: dict[str, str] | None = pydantic.Field(default_factory=dict) -class FormData(pydantic.BaseModel): +class FormData(BaseSupersetDefinition): pass -class QueryContext(pydantic.BaseModel): +class QueryContext(BaseSupersetDefinition): model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) datasource: DatasourceIdentifier queries: list[QueryObject] = pydantic.Field(default_factory=list) - form_data: Annotated[FormData | dict[str, Any] | None, pydantic.SerializeAsAny, pydantic.Field(default=None)] + form_data: FormData | dict[str, Any] | None = pydantic.Field(default=None) result_type: ChartDataResultType = ChartDataResultType.FULL result_format: ChartDataResultFormat = ChartDataResultFormat.JSON force: bool = False diff --git a/mitm_tooling/transformation/superset/definitions/post_processing.py b/mitm_tooling/transformation/superset/definitions/post_processing.py index 38612f7..e7d7d41 100644 --- a/mitm_tooling/transformation/superset/definitions/post_processing.py +++ b/mitm_tooling/transformation/superset/definitions/post_processing.py @@ -1,13 +1,11 @@ -import pydantic - from .core import * -class PivotOperator(pydantic.BaseModel): +class PivotOperator(FrozenSupersetDefinition): operator: str = 'mean' -class PivotOptions(pydantic.BaseModel): +class PivotOptions(FrozenSupersetDefinition): aggregates: list[dict[ColumnName, PivotOperator]] columns: list[ColumnName] = pydantic.Field(default_factory=list) index: list[ColumnName] = pydantic.Field(default_factory=list) @@ -22,7 +20,7 @@ class Pivot(SupersetPostProcessing): options: PivotOptions -class RenameOptions(pydantic.BaseModel): +class RenameOptions(FrozenSupersetDefinition): columns: dict[ColumnName, ColumnName | None] = pydantic.Field(default_factory=dict) level: int = 0 inplace: bool | None = True @@ -40,4 +38,3 @@ class Flatten(SupersetPostProcessing): @property def operation(self) -> str: return 'flatten' - diff --git a/mitm_tooling/transformation/superset/exporting.py b/mitm_tooling/transformation/superset/exporting.py new file mode 100644 index 0000000..87c2042 --- /dev/null +++ b/mitm_tooling/transformation/superset/exporting.py @@ -0,0 +1,45 @@ +import os.path +import zipfile + +import yaml + +from mitm_tooling.utilities.io_utils import FilePath, ByteSink, use_bytes_io +from .definitions import SupersetDefFile, SupersetDefFolder + + +def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetDefFolder): + folder_structure = superset_def.folder_dict + with use_bytes_io(target, expected_file_ext='.zip', mode='wb', create_file_if_necessary=True) as f: + with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zf: + def mk_node(arg, prefix: str | None = None): + if isinstance(arg, SupersetDefFile): + fn = f'{arg.filename}.yaml' + if prefix: + fn = os.path.join(prefix, fn) + dump = arg.model_dump(by_alias=True, mode='python', exclude_none=True) + s = yaml.dump(dump, default_flow_style=False) + + zf.writestr(fn, s) + # with zf.open(fn, 'w') as df: + # yaml.dump(dump, df) + elif isinstance(arg, list): + for arg in arg: + mk_node(arg, prefix=prefix) + elif isinstance(arg, dict): + for folder, folder_content in arg.items(): + path = None + if folder != '.' and prefix: + path = os.path.join(prefix, folder) + elif prefix: + path = prefix + elif folder != '.': + path = folder + if folder != '.': + zf.mkdir(path) + mk_node(folder_content, prefix=path) + + mk_node(folder_structure) + + +def write_superset_assets_def(output_path: FilePath, superset_def: SupersetDefFolder): + write_superset_def_as_zip(output_path, superset_def) diff --git a/mitm_tooling/transformation/superset/factories/assets.py b/mitm_tooling/transformation/superset/factories/assets.py new file mode 100644 index 0000000..d9fb2a7 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/assets.py @@ -0,0 +1,21 @@ +from ..definitions import SupersetMetadataDef, SupersetDatabaseDef, SupersetDashboardDef +from ..definitions.assets import MetadataType, SupersetAssetsDef, SupersetDatasetDef, SupersetChartDef, \ + ExtendedSupersetAssetsDef, SupersetMitMDatasetDef + + +def mk_metadata(metadata_type: MetadataType) -> SupersetMetadataDef: + return SupersetMetadataDef(type=metadata_type) + + +def mk_assets(databases: list[SupersetDatabaseDef] = None, + datasets: list[SupersetDatasetDef] = None, + charts: list[SupersetChartDef] = None, + dashboards: list[SupersetDashboardDef] = None, + metadata_type: MetadataType | None = None) -> SupersetAssetsDef: + return SupersetAssetsDef(databases=databases, datasets=datasets, charts=charts, dashboards=dashboards, + metadata=SupersetMetadataDef(type=metadata_type or MetadataType.Asset)) + + +def mk_extended_assets(mitm_datasets: list[SupersetMitMDatasetDef], + base_assets: SupersetAssetsDef) -> ExtendedSupersetAssetsDef: + return ExtendedSupersetAssetsDef(mitm_datasets=mitm_datasets, base_assets=base_assets) diff --git a/mitm_tooling/transformation/superset/factories/charts.py b/mitm_tooling/transformation/superset/factories/charts.py index 63a28c8..4bbd85f 100644 --- a/mitm_tooling/transformation/superset/factories/charts.py +++ b/mitm_tooling/transformation/superset/factories/charts.py @@ -1,25 +1,28 @@ from pydantic import UUID4 from mitm_tooling.data_types import MITMDataType -from .core import mk_empty_adhoc_time_filter, mk_adhoc_metric, mk_pivot_post_processing, mk_time_avg_post_processing, \ - mk_adhoc_column -from .query import mk_query_object, mk_query_object_filter_clause, mk_query_context, \ +from mitm_tooling.utilities.python_utils import unique +from .core import mk_empty_adhoc_time_filter, mk_adhoc_metric, mk_pivot_post_processing, mk_adhoc_column +from .query import mk_query_object, mk_query_context, \ mk_empty_query_object_time_filter_clause from .utils import mk_uuid -from ..definitions import SupersetChartDef, PieChartParams, DatasourceIdentifier, ColumnName, SupersetAggregate, \ - SupersetVizType, QueryContext, TimeSeriesBarParams, TimeGrain, QueryObjectFilterClause, SupersetAdhocFilter, \ +from ..definitions import SupersetChartDef, PieChartParams, DatasourceIdentifier, SupersetAggregate, \ + SupersetVizType, TimeSeriesBarParams, TimeGrain, QueryObjectFilterClause, SupersetAdhocFilter, \ TimeSeriesLineParams, QueryObjectExtras +from mitm_tooling.representation import ColumnName def mk_pie_chart(name: str, datasource_identifier: DatasourceIdentifier, col: ColumnName, dt: MITMDataType, - groupby_cols: list[ColumnName], uuid: UUID4 | None = None) -> SupersetChartDef: + groupby_cols: list[ColumnName] | None = None, uuid: UUID4 | None = None) -> SupersetChartDef: + groupby_cols = groupby_cols or [] metric = mk_adhoc_metric(col, agg=SupersetAggregate.COUNT, dt=dt) params = PieChartParams(datasource=datasource_identifier, metric=metric, groupby=groupby_cols, adhoc_filters=[mk_empty_adhoc_time_filter()]) - - qo = mk_query_object([col], metrics=[metric], filters=[mk_empty_query_object_time_filter_clause()]) + # TODO may not be necessary to add groupby + qo = mk_query_object(unique([col], groupby_cols), metrics=[metric], + filters=[mk_empty_query_object_time_filter_clause()]) qc = mk_query_context(datasource=datasource_identifier, queries=[qo], form_data=params) return SupersetChartDef(slice_name=name, @@ -33,11 +36,13 @@ def mk_pie_chart(name: str, datasource_identifier: DatasourceIdentifier, col: Co def mk_time_series_bar_chart(name: str, datasource_identifier: DatasourceIdentifier, y_col: ColumnName, - y_dt: MITMDataType, x_col: ColumnName, - groupby_cols: list[ColumnName], + y_dt: MITMDataType, + x_col: ColumnName, + groupby_cols: list[ColumnName] | None = None, filters: list[SupersetAdhocFilter] | None = None, uuid: UUID4 | None = None, time_grain: TimeGrain | None = None) -> SupersetChartDef: + groupby_cols = groupby_cols or [] metric = mk_adhoc_metric(y_col, agg=SupersetAggregate.COUNT, dt=y_dt) adhoc_filters = [mk_empty_adhoc_time_filter()] if filters: @@ -53,8 +58,7 @@ def mk_time_series_bar_chart(name: str, pp = mk_pivot_post_processing(x_col, cols=[y_col], aggregations={metric.label: 'mean'}, renames={metric.label: None}) adhoc_x = mk_adhoc_column(x_col, timeGrain=time_grain) - cols = list(set([adhoc_x, y_col] + groupby_cols)) - qo = mk_query_object(columns=cols, + qo = mk_query_object(columns=unique([adhoc_x, y_col], groupby_cols), metrics=[metric], filters=[QueryObjectFilterClause.from_adhoc_filter(af) for af in adhoc_filters], post_processing=pp, @@ -69,13 +73,14 @@ def mk_time_series_bar_chart(name: str, uuid=uuid or mk_uuid()) -def mk_count_time_series_chart(name: str, - datasource_identifier: DatasourceIdentifier, - groupby_cols: list[ColumnName], - time_col: ColumnName = 'time', - filters: list[SupersetAdhocFilter] | None = None, - uuid: UUID4 | None = None, - time_grain: TimeGrain | None = None): +def mk_avg_count_time_series_chart(name: str, + datasource_identifier: DatasourceIdentifier, + groupby_cols: list[ColumnName], + time_col: ColumnName = 'time', + filters: list[SupersetAdhocFilter] | None = None, + uuid: UUID4 | None = None, + time_grain: TimeGrain | None = None): + groupby_cols = groupby_cols or [] metric = mk_adhoc_metric(time_col, agg=SupersetAggregate.COUNT, dt=MITMDataType.Datetime) adhoc_filters = [mk_empty_adhoc_time_filter()] if filters: @@ -91,8 +96,7 @@ def mk_count_time_series_chart(name: str, pp = mk_pivot_post_processing(time_col, cols=groupby_cols, aggregations={metric.label: 'mean'}, renames={metric.label: None}) adhoc_time_col = mk_adhoc_column(time_col, timeGrain=time_grain) - cols = list(set([adhoc_time_col] + groupby_cols)) - qo = mk_query_object(columns=cols, + qo = mk_query_object(columns=unique([adhoc_time_col], groupby_cols), metrics=[metric], filters=[QueryObjectFilterClause.from_adhoc_filter(af) for af in adhoc_filters], post_processing=pp, diff --git a/mitm_tooling/transformation/superset/factories/core.py b/mitm_tooling/transformation/superset/factories/core.py index 3e08279..7707725 100644 --- a/mitm_tooling/transformation/superset/factories/core.py +++ b/mitm_tooling/transformation/superset/factories/core.py @@ -1,10 +1,8 @@ -import uuid from typing import overload -import pydantic +import sqlalchemy as sa from ..definitions import * -import sqlalchemy as sa def mk_pivot_post_processing(index_col: ColumnName, cols: list[ColumnName], aggregations: dict[ColumnName, str], diff --git a/mitm_tooling/transformation/superset/factories/dashboard.py b/mitm_tooling/transformation/superset/factories/dashboard.py new file mode 100644 index 0000000..b66224c --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/dashboard.py @@ -0,0 +1,14 @@ +from typing import Any + +from pydantic import UUID4 + +from mitm_tooling.transformation.superset.definitions import SupersetDashboardDef +from mitm_tooling.transformation.superset.factories.utils import mk_uuid + + +def mk_superset_dashboard(title: str, + position: dict[str, Any], + description: str | None = None, + uuid: UUID4 | None = None) -> SupersetDashboardDef: + return SupersetDashboardDef(dashboard_title=title, position=position, description=description, + uuid=uuid or mk_uuid()) diff --git a/mitm_tooling/transformation/superset/factories/database.py b/mitm_tooling/transformation/superset/factories/database.py new file mode 100644 index 0000000..d853bc6 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/database.py @@ -0,0 +1,15 @@ +from pydantic import AnyUrl, UUID4 + +from .utils import mk_uuid +from ..common import name_plus_uuid +from ..definitions import SupersetDatabaseDef + + +def mk_database(name: str, + sqlalchemy_uri: AnyUrl, + uuid: UUID4 | None = None, + uniquify_name: bool = False) -> SupersetDatabaseDef: + uuid = uuid or mk_uuid() + if uniquify_name: + name = name_plus_uuid(name, uuid) + return SupersetDatabaseDef(database_name=name, sqlalchemy_uri=sqlalchemy_uri, uuid=uuid) diff --git a/mitm_tooling/transformation/superset/factories/dataset.py b/mitm_tooling/transformation/superset/factories/dataset.py index e9ab09d..f21c661 100644 --- a/mitm_tooling/transformation/superset/factories/dataset.py +++ b/mitm_tooling/transformation/superset/factories/dataset.py @@ -1,17 +1,18 @@ import pydantic import sqlalchemy as sa -from mitm_tooling.transformation.superset.definitions import SupersetDatasetDef, SupersetMetric + from mitm_tooling.data_types import MITMDataType from mitm_tooling.extraction.sql.data_models import TableMetaInfo +from mitm_tooling.transformation.superset.definitions import SupersetDatasetDef from .core import mk_column, mk_metric from .utils import mk_uuid from ..definitions import SupersetAggregate -def mk_dataset_def(tm: TableMetaInfo, database_uuid: pydantic.UUID4, dialect: sa.Dialect | None = None, - uuid: pydantic.UUID4 | None = None) -> SupersetDatasetDef: +def mk_dataset(tm: TableMetaInfo, database_uuid: pydantic.UUID4, dialect: sa.Dialect | None = None, + uuid: pydantic.UUID4 | None = None) -> SupersetDatasetDef: cols = [] - metrics = [mk_metric('', SupersetAggregate.COUNT)] + metrics = [mk_metric('*', SupersetAggregate.COUNT)] for c in tm.columns: dt = tm.column_properties[c].mitm_data_type cols.append( diff --git a/mitm_tooling/transformation/superset/factories/datasource.py b/mitm_tooling/transformation/superset/factories/datasource.py deleted file mode 100644 index 8b2bf5e..0000000 --- a/mitm_tooling/transformation/superset/factories/datasource.py +++ /dev/null @@ -1,9 +0,0 @@ -import pydantic -from pydantic import AnyUrl, UUID4 - -from .utils import mk_uuid -from ..definitions import SupersetDatabaseDef - - -def mk_datasource(name: str, sqlalchemy_uri: AnyUrl, uuid: UUID4 | None = None) -> SupersetDatabaseDef: - return SupersetDatabaseDef(database_name=name, sqlalchemy_uri=sqlalchemy_uri, uuid=uuid or mk_uuid()) diff --git a/mitm_tooling/transformation/superset/factories/mitm_dataset.py b/mitm_tooling/transformation/superset/factories/mitm_dataset.py new file mode 100644 index 0000000..21802d4 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/mitm_dataset.py @@ -0,0 +1,14 @@ +from pydantic.v1 import UUID4 + +from mitm_tooling.definition import MITM +from mitm_tooling.transformation.superset.definitions import SupersetMitMDatasetDef +from mitm_tooling.transformation.superset.factories.utils import mk_uuid + + +def mk_mitm_dataset(name: str, mitm: MITM, database_uuid: UUID4, uuid: UUID4 | None = None) -> SupersetMitMDatasetDef: + return SupersetMitMDatasetDef( + dataset_name=name, + mitm=mitm, + database_uuid=database_uuid, + uuid=uuid or mk_uuid() + ) diff --git a/mitm_tooling/transformation/superset/factories/mitm_specific/__init__.py b/mitm_tooling/transformation/superset/factories/mitm_specific/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mitm_tooling/transformation/superset/factories/mitm_specific/maed_charts.py b/mitm_tooling/transformation/superset/factories/mitm_specific/maed_charts.py new file mode 100644 index 0000000..b317319 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/mitm_specific/maed_charts.py @@ -0,0 +1,40 @@ +from mitm_tooling.data_types import MITMDataType +from mitm_tooling.representation import SQLRepresentationSchema, Header, mk_sql_rep_schema +from mitm_tooling.utilities.python_utils import take_first +from ..charts import mk_avg_count_time_series_chart, mk_pie_chart, mk_time_series_bar_chart +from ..core import mk_adhoc_filter +from ...definition_bundles import SupersetDatasourceBundle +from ...definitions import SupersetChartDef, FilterOperator + + +def mk_maed_charts(header: Header, superset_datasource_bundle: SupersetDatasourceBundle, + sql_rep_schema: SQLRepresentationSchema | None = None) -> list[ + SupersetChartDef]: + sql_rep_schema = sql_rep_schema or mk_sql_rep_schema(header) + ds_ids = superset_datasource_bundle.placeholder_dataset_identifiers + + event_counts_ts = mk_time_series_bar_chart('Event Counts', + ds_ids['observations'], + 'type', + MITMDataType.Text, + 'time', + groupby_cols=['object'], + filters=[ + mk_adhoc_filter('kind', FilterOperator.EQUALS, 'E')] + ) + measurement_counts_ts = mk_time_series_bar_chart('Measurement Counts', + ds_ids['observations'], + 'type', + MITMDataType.Text, + 'time', + groupby_cols=['object'], + filters=[ + mk_adhoc_filter('kind', FilterOperator.EQUALS, 'M')] + ) + objects_pie = mk_pie_chart('Objects', ds_ids['observations'], 'object', MITMDataType.Text) + + type_name, tbl = take_first(sql_rep_schema.type_tables['measurement'].items()) # TODO + + ts = mk_avg_count_time_series_chart(f'{type_name} Time Series', ds_ids[tbl.name], groupby_cols=['object'], + filters=[mk_adhoc_filter('kind', FilterOperator.EQUALS, 'M')]) + return [event_counts_ts, measurement_counts_ts, objects_pie, ts] diff --git a/mitm_tooling/transformation/superset/factories/mitm_specific/maed_dashboards.py b/mitm_tooling/transformation/superset/factories/mitm_specific/maed_dashboards.py new file mode 100644 index 0000000..217a184 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/mitm_specific/maed_dashboards.py @@ -0,0 +1,11 @@ +from mitm_tooling.representation import Header +from mitm_tooling.transformation.superset.definition_bundles import SupersetDatasourceBundle +from mitm_tooling.transformation.superset.definitions import SupersetDashboardDef +from mitm_tooling.transformation.superset.factories.dashboard import mk_superset_dashboard +from mitm_tooling.transformation.superset.factories.mitm_specific.maed_charts import mk_maed_charts + + +def mk_maed_dashboard(header: Header, datasource_bundle: SupersetDatasourceBundle) -> SupersetDashboardDef: + charts = mk_maed_charts(header, datasource_bundle) + position = {} + return mk_superset_dashboard('MAED Dashboard', position, description='A rudimentary dashboard to view MAED data.') diff --git a/mitm_tooling/transformation/superset/factories/query.py b/mitm_tooling/transformation/superset/factories/query.py index 8ec1d00..2d51203 100644 --- a/mitm_tooling/transformation/superset/factories/query.py +++ b/mitm_tooling/transformation/superset/factories/query.py @@ -1,7 +1,7 @@ - -from ..definitions import QueryObject, QueryContext, ColumnName, DatasourceIdentifier, SupersetAdhocMetric, \ - SupersetAdhocFilter, QueryObjectFilterClause, FormData, FilterValues, FilterOperator, SupersetPostProcessing -from .core import mk_adhoc_metric, mk_adhoc_metrics +from ..definitions import QueryObject, QueryContext, DatasourceIdentifier, SupersetAdhocMetric, \ + QueryObjectFilterClause, FormData, FilterValues, FilterOperator, SupersetPostProcessing, \ + SupersetAdhocColumn +from mitm_tooling.representation import ColumnName def mk_query_object_filter_clause(col: ColumnName, op: FilterOperator, @@ -13,7 +13,7 @@ def mk_empty_query_object_time_filter_clause() -> QueryObjectFilterClause: return mk_query_object_filter_clause('time', FilterOperator.TEMPORAL_RANGE) -def mk_query_object(columns: list[ColumnName], +def mk_query_object(columns: list[ColumnName | SupersetAdhocColumn], metrics: list[SupersetAdhocMetric], filters: list[QueryObjectFilterClause], orderby: list[tuple[SupersetAdhocMetric, bool]] | None = None, @@ -24,7 +24,8 @@ def mk_query_object(columns: list[ColumnName], orderby = [(metrics[0], 0)] if post_processing is None: post_processing = [] - return QueryObject(columns=columns, metrics=metrics, filters=filters, orderby=orderby, post_processing=post_processing, + return QueryObject(columns=columns, metrics=metrics, filters=filters, orderby=orderby, + post_processing=post_processing, row_limit=row_limit, **kwargs) diff --git a/mitm_tooling/transformation/superset/from_intermediate.py b/mitm_tooling/transformation/superset/from_intermediate.py new file mode 100644 index 0000000..717ec5c --- /dev/null +++ b/mitm_tooling/transformation/superset/from_intermediate.py @@ -0,0 +1,22 @@ +from mitm_tooling.representation import Header + +from .common import SupersetDBConnectionInfo +from .definition_bundles import SupersetDatasourceBundle, SupersetMitMDatasetBundle +from ...definition import MITM + + +def header_into_superset_datasource_bundle(header: Header, + db_conn_info: SupersetDBConnectionInfo) -> SupersetDatasourceBundle: + from ..sql.from_intermediate import header_into_db_meta + from .from_sql import db_meta_into_superset_datasource_bundle + db_meta = header_into_db_meta(header) + return db_meta_into_superset_datasource_bundle(db_meta, db_conn_info) + + +def header_into_superset_mitm_dataset(header: Header, + db_conn_info: SupersetDBConnectionInfo, + dataset_name: str) -> SupersetMitMDatasetBundle: + from ..sql.from_intermediate import header_into_db_meta + from .from_sql import db_meta_into_mitm_dataset_bundle + db_meta = header_into_db_meta(header) + return db_meta_into_mitm_dataset_bundle(db_meta, db_conn_info, dataset_name, header.mitm) diff --git a/mitm_tooling/transformation/superset/from_sql.py b/mitm_tooling/transformation/superset/from_sql.py new file mode 100644 index 0000000..de25df1 --- /dev/null +++ b/mitm_tooling/transformation/superset/from_sql.py @@ -0,0 +1,59 @@ +from mitm_tooling.extraction.sql.data_models import DBMetaInfo +from mitm_tooling.extraction.sql.data_models.db_meta import DBMetaInfoBase +from mitm_tooling.extraction.sql.db import connect_and_reflect +from .common import SupersetDBConnectionInfo +from .common import _mk_engine, SQLiteFileOrEngine +from .definition_bundles import SupersetDatasourceBundle, SupersetMitMDatasetBundle +from .factories.database import mk_database +from .factories.dataset import mk_dataset +from .factories.mitm_dataset import mk_mitm_dataset +from ...definition import MITM + + +def db_meta_into_superset_datasource_bundle(db_meta: DBMetaInfoBase, + db_conn_info: SupersetDBConnectionInfo) -> SupersetDatasourceBundle: + sqlalchemy_uri = db_conn_info.sql_alchemy_uri + db_name = db_conn_info.db_name + dialect = db_conn_info.dialect_cls() + + database = mk_database(name=db_name, sqlalchemy_uri=sqlalchemy_uri, uniquify_name=True) + + database_uuid = database.uuid + datasets = [] + for schema_name, schema_tables in db_meta.db_structure.items(): + for table_name, tm in schema_tables.items(): + datasets.append(mk_dataset(tm, database_uuid, dialect=dialect)) + + return SupersetDatasourceBundle(database=database, datasets=datasets) + + +def db_meta_into_mitm_dataset_bundle(db_meta: DBMetaInfoBase, + db_conn_info: SupersetDBConnectionInfo, + dataset_name: str, + mitm: MITM) -> SupersetMitMDatasetBundle: + datasource_bundle = db_meta_into_superset_datasource_bundle(db_meta, db_conn_info) + db_uuid = datasource_bundle.database.uuid + mitm_dataset = mk_mitm_dataset(dataset_name, mitm, db_uuid) + return SupersetMitMDatasetBundle(mitm_dataset=mitm_dataset, datasource_bundle=datasource_bundle) + + +def db_into_superset_datasource_bundle(arg: SQLiteFileOrEngine, + db_conn_info: SupersetDBConnectionInfo) -> SupersetDatasourceBundle: + engine = _mk_engine(arg) + # db_name = db_conn_info.db_name or db_conn_info.db_name_in_uri + # db_name = os.path.splitext(os.path.basename(sqlite_file_path))[0] + + meta, _ = connect_and_reflect(engine, allowed_schemas=[db_conn_info.schema_name]) + db_meta = DBMetaInfo.from_sa_meta(meta, default_schema=db_conn_info.schema_name) + + return db_meta_into_superset_datasource_bundle(db_meta, db_conn_info) + + +def db_into_mitm_dataset_bundle(arg: SQLiteFileOrEngine, + db_conn_info: SupersetDBConnectionInfo, + dataset_name: str, + mitm: MITM) -> SupersetMitMDatasetBundle: + datasource_bundle = db_into_superset_datasource_bundle(arg, db_conn_info) + db_uuid = datasource_bundle.database.uuid + mitm_dataset = mk_mitm_dataset(dataset_name, mitm, db_uuid) + return SupersetMitMDatasetBundle(mitm_dataset=mitm_dataset, datasource_bundle=datasource_bundle) diff --git a/mitm_tooling/transformation/superset/interface.py b/mitm_tooling/transformation/superset/interface.py new file mode 100644 index 0000000..d9c4028 --- /dev/null +++ b/mitm_tooling/transformation/superset/interface.py @@ -0,0 +1,37 @@ +from mitm_tooling.definition import MITM +from mitm_tooling.representation import Header +from mitm_tooling.transformation.superset.definition_bundles import SupersetDatasourceBundle, \ + SupersetVisualizationBundle, SupersetMitMDatasetBundle +from mitm_tooling.transformation.superset.from_intermediate import header_into_superset_mitm_dataset +from pydantic import AnyUrl +from typing import Callable + +from . import mitm_specific +from .common import SupersetDBConnectionInfo +from .from_intermediate import header_into_superset_datasource_bundle +from ...representation.sql_representation import SQL_REPRESENTATION_DEFAULT_SCHEMA, SchemaName + +mitm_specific_visualization_factories: dict[ + MITM, Callable[[Header, SupersetDatasourceBundle], SupersetVisualizationBundle]] = { + MITM.MAED: mitm_specific.mk_maed_visualization, +} + + +def mk_superset_datasource_import(header: Header, sql_alchemy_uri: AnyUrl, explicit_db_name: str | None = None, + schema_name: SchemaName = SQL_REPRESENTATION_DEFAULT_SCHEMA) -> SupersetDatasourceBundle: + db_conn_info = SupersetDBConnectionInfo(sql_alchemy_uri=sql_alchemy_uri, explicit_db_name=explicit_db_name, + schema_name=schema_name) + return header_into_superset_datasource_bundle(header, db_conn_info) + + +def mk_superset_mitm_dataset_import(header: Header, sql_alchemy_uri: AnyUrl, dataset_name: str, + explicit_db_name: str | None = None, + schema_name: SchemaName = SQL_REPRESENTATION_DEFAULT_SCHEMA) -> SupersetMitMDatasetBundle: + db_conn_info = SupersetDBConnectionInfo(sql_alchemy_uri=sql_alchemy_uri, explicit_db_name=explicit_db_name, + schema_name=schema_name) + return header_into_superset_mitm_dataset(header, db_conn_info, dataset_name=dataset_name) + + +def mk_superset_visualization_import(header: Header, + superset_datasource_bundle: SupersetDatasourceBundle) -> SupersetVisualizationBundle: + return mitm_specific_visualization_factories[header.mitm](header, superset_datasource_bundle) diff --git a/mitm_tooling/transformation/superset/mitm_specific/__init__.py b/mitm_tooling/transformation/superset/mitm_specific/__init__.py new file mode 100644 index 0000000..77f4b30 --- /dev/null +++ b/mitm_tooling/transformation/superset/mitm_specific/__init__.py @@ -0,0 +1 @@ +from .maed_visualization import mk_maed_visualization diff --git a/mitm_tooling/transformation/superset/mitm_specific/maed_visualization.py b/mitm_tooling/transformation/superset/mitm_specific/maed_visualization.py new file mode 100644 index 0000000..4594f62 --- /dev/null +++ b/mitm_tooling/transformation/superset/mitm_specific/maed_visualization.py @@ -0,0 +1,14 @@ +from mitm_tooling.representation import Header +from mitm_tooling.transformation.superset.definition_bundles import SupersetVisualizationBundle, \ + SupersetDatasourceBundle +from mitm_tooling.transformation.superset.factories.mitm_specific.maed_charts import mk_maed_charts +from mitm_tooling.transformation.superset.factories.mitm_specific.maed_dashboards import mk_maed_dashboard + + +def mk_maed_visualization(header: Header, + superset_datasource_bundle: SupersetDatasourceBundle) -> SupersetVisualizationBundle: + ds_ids = superset_datasource_bundle.placeholder_dataset_identifiers + + charts = mk_maed_charts(header, superset_datasource_bundle) + dashboard = mk_maed_dashboard(header, superset_datasource_bundle) + return SupersetVisualizationBundle(charts=charts, dashboards=[dashboard]) diff --git a/mitm_tooling/transformation/superset/superset_representation.py b/mitm_tooling/transformation/superset/superset_representation.py deleted file mode 100644 index 3f60b2b..0000000 --- a/mitm_tooling/transformation/superset/superset_representation.py +++ /dev/null @@ -1,100 +0,0 @@ -import os.path -import uuid -import zipfile -from typing import TypedDict, Unpack - -import sqlalchemy as sa -import yaml -from pydantic import AnyUrl - -from mitm_tooling.extraction.sql.data_models import DBMetaInfo -from mitm_tooling.extraction.sql.db import create_sa_engine, connect_and_reflect -from mitm_tooling.utilities.io_utils import DataSink, FilePath, ByteSink, use_bytes_io -from mitm_tooling.representation import MITMData, mk_sqlite, mk_db_schema -from mitm_tooling.representation.sql_representation import MITMData, mk_sqlite, mk_db_schema, \ - SQL_REPRESENTATION_DEFAULT_SCHEMA - -from mitm_tooling.data_types import MITMDataType - -from .definitions import SupersetDatasetDef, SupersetColumn, SupersetDatabaseDef, SupersetAssetsDef, \ - SupersetDefFile, SupersetMetadataDef, SupersetMetric, GenericDataType -from .factories.dataset import mk_dataset_def -from .factories.datasource import mk_datasource - - -class KWArguments(TypedDict, total=False): - sqlalchemy_uri: AnyUrl - - -def tentative_superset_mount_url(db_name: str) -> AnyUrl: - return AnyUrl(f'sqlite:////mounted-files/{db_name}.sqlite?check_same_thread=false') - - -def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetAssetsDef): - folder_structure = superset_def.to_folder_structure() - with use_bytes_io(target, expected_file_ext='.zip', mode='wb', create_file_if_necessary=True) as f: - with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zf: - def mk_node(arg, prefix: str | None = None): - if isinstance(arg, SupersetDefFile): - fn = f'{arg.filename}.yaml' - if prefix: - fn = os.path.join(prefix, fn) - dump = arg.model_dump(by_alias=True, mode='python', exclude_none=True) - s = yaml.dump(dump, default_flow_style=False) - - zf.writestr(fn, s) - # with zf.open(fn, 'w') as df: - # yaml.dump(dump, df) - elif isinstance(arg, list): - for arg in arg: - mk_node(arg, prefix=prefix) - elif isinstance(arg, dict): - for folder, folder_content in arg.items(): - path = None - if folder != '.' and prefix: - path = os.path.join(prefix, folder) - elif prefix: - path = prefix - elif folder != '.': - path = folder - if folder != '.': - zf.mkdir(path) - mk_node(folder_content, prefix=path) - - mk_node(folder_structure) - - -def write_superset_def(output_path: FilePath, superset_def: SupersetAssetsDef): - write_superset_def_as_zip(output_path, superset_def) - - -def infer_superset_dataset_def(sqlite_file_path: FilePath, **kwargs: Unpack[KWArguments]) -> SupersetAssetsDef: - engine = create_sa_engine(AnyUrl(f'sqlite:///{str(sqlite_file_path)}')) - meta, _ = connect_and_reflect(engine) - db_meta = DBMetaInfo.from_sa_meta(meta, default_schema=SQL_REPRESENTATION_DEFAULT_SCHEMA) - - database_uuid = uuid.uuid4() - datasets = [] - for schema_name, schema_tables in db_meta.db_structure.items(): - for table_name, tm in schema_tables.items(): - datasets.append(mk_dataset_def(tm, database_uuid, dialect=engine.dialect)) - - db_name = os.path.splitext(os.path.basename(sqlite_file_path))[0] - sqlalchemy_uri = kwargs.get('sqlalchemy_uri', tentative_superset_mount_url(db_name)) - return SupersetAssetsDef( - databases=[mk_datasource(name=db_name, - sqlalchemy_uri=sqlalchemy_uri, - uuid=database_uuid)], - datasets=datasets) - - -def mk_inferred_superset_dataset_def(output_path: FilePath, sqlite_file_path: FilePath, **kwargs: Unpack[KWArguments]): - dataset_def = infer_superset_dataset_def(sqlite_file_path, **kwargs) - write_superset_def(output_path, dataset_def) - - -def mk_superset_dataset_def(mitm_data: MITMData, sqlite_file_path: str | None = ':memory:', - definition_file_path: str | None = 'superset_definition.zip', - **kwargs: Unpack[KWArguments]): - engine, sql_rep_schema = mk_sqlite(mitm_data, file_path=sqlite_file_path) - mk_inferred_superset_dataset_def(definition_file_path, sqlite_file_path, **kwargs) diff --git a/mitm_tooling/utilities/io_utils.py b/mitm_tooling/utilities/io_utils.py index b1a8eb1..ab22e48 100644 --- a/mitm_tooling/utilities/io_utils.py +++ b/mitm_tooling/utilities/io_utils.py @@ -1,14 +1,12 @@ from __future__ import annotations -import datetime -from collections.abc import Mapping - import io import os +from collections.abc import Mapping from contextlib import contextmanager from typing import TextIO, BinaryIO, Generator -import pandas as pd +import pydantic def ensure_directory_exists(path): @@ -102,6 +100,7 @@ def use_for_pandas_io(arg: DataSource | DataSink) -> Generator[FilePath | TextIO else: yield arg + def read_yaml_dict_from(arg: DataSource, swallow_exceptions=True) -> Mapping | None: import yaml from yaml import YAMLError @@ -126,3 +125,9 @@ def read_json_dict_from(arg: DataSource, swallow_exceptions=True) -> Mapping | N return None else: raise e + + +def dump_pydantic(model: pydantic.BaseModel, target: DataSink, **kwargs): + with use_string_io(target, expected_file_ext='.json', mode='w') as sink: + s = model.model_dump_json(indent=2, by_alias=True, **kwargs) + sink.write(s) diff --git a/mitm_tooling/utilities/python_utils.py b/mitm_tooling/utilities/python_utils.py index 9539574..c766c0d 100644 --- a/mitm_tooling/utilities/python_utils.py +++ b/mitm_tooling/utilities/python_utils.py @@ -1,5 +1,6 @@ +import itertools from collections.abc import Sequence, Mapping -from typing import TypeVar, Hashable, Iterable, Callable, Any +from typing import TypeVar, Hashable, Iterable, Callable, Any, Union, TypeVarTuple, Unpack, overload def i_th(i: int, result_constr: type | None = tuple): @@ -15,7 +16,7 @@ T1 = TypeVar('T1') T2 = TypeVar('T2') -def ident(arg: T) -> T: +def identity(arg: T) -> T: return arg @@ -32,7 +33,7 @@ def unpack_singleton(arg: dict[K, T]) -> tuple[K, T]: return k, v -def unpack_inner(arg: Iterable[dict[K, T1]], transform: Callable[[T1], T2] = ident) -> list[tuple[K, T2]]: +def unpack_inner(arg: Iterable[dict[K, T1]], transform: Callable[[T1], T2] = identity) -> list[tuple[K, T2]]: return [(k, transform(v)) for d in arg for k, v in d.items()] @@ -93,6 +94,18 @@ def pick_from_mapping(d: Mapping[K, T], keys: Sequence[K]) -> list[tuple[K, T]]: return [(k, d[k]) for k in keys] +@overload +def unique(*its: Iterable[T]) -> list[T]: + ... + + +Ts = TypeVarTuple('Ts') + + +def unique(*its: Iterable[Union[*Ts]]) -> list[Union[Unpack[Ts]]]: + return list(set(itertools.chain(*its))) + + class ExtraInfoExc(Exception): def __init__(self, msg=None): super().__init__() diff --git a/mitm_tooling/utilities/sql_utils.py b/mitm_tooling/utilities/sql_utils.py index 9e6fbf8..e4c51b8 100644 --- a/mitm_tooling/utilities/sql_utils.py +++ b/mitm_tooling/utilities/sql_utils.py @@ -1,6 +1,7 @@ import sqlalchemy as sa from pydantic import AnyUrl from sqlalchemy import Engine +from typing import Type def qualify(*, table: str, schema: str | None = None, column: str | None = None): @@ -19,5 +20,16 @@ def unqualify(n: str) -> list[str]: def create_sa_engine(db_url: AnyUrl, sqlite_extensions: list[str] | None = None, test_engine: bool = False, **engine_kwargs) -> Engine: engine = sa.create_engine(str(db_url), **engine_kwargs) - return engine + + +def any_url_into_sa_url(url: AnyUrl) -> sa.engine.URL: + return sa.engine.make_url(str(url)) + + +def sa_url_into_any_url(url: sa.engine.URL) -> AnyUrl: + return AnyUrl(url.render_as_string(hide_password=False)) + + +def dialect_cls_from_url(url: AnyUrl) -> Type[sa.engine.Dialect]: + return any_url_into_sa_url(url).get_dialect() diff --git a/pyproject.toml b/pyproject.toml index 457dade..5b43170 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mitm-tooling" -version = "0.3.3" +version = "0.4.0" description = "" authors = ["Leah Tacke genannt Unterberg <leah.tgu@pads.rwth-aachen.de>"] readme = "README.md" diff --git a/test/something.py b/test/something.py index 4285c17..a8eb9c8 100644 --- a/test/something.py +++ b/test/something.py @@ -1,6 +1,8 @@ import os import unittest +from mitm_tooling.transformation.superset.factories.utils import mk_uuid + class MyTestCase(unittest.TestCase): def test_something(self): @@ -8,7 +10,7 @@ class MyTestCase(unittest.TestCase): from mitm_tooling.io.exporting import get_mitm_def def test_sql_repr(self): - from mitm_tooling.representation import Header, HeaderEntry, mk_db_schema + from mitm_tooling.representation import Header, HeaderEntry, mk_sql_rep_schema from mitm_tooling.definition import MITM from mitm_tooling.data_types import MITMDataType h = Header(mitm=MITM.MAED, header_entries=[ @@ -19,7 +21,7 @@ class MyTestCase(unittest.TestCase): HeaderEntry(concept='segment_data', kind='SD', type_name='annotation_info', attributes=['y'], attribute_dtypes=[MITMDataType.Json]), ]) - sql_rep = mk_db_schema(h) + sql_rep = mk_sql_rep_schema(h) print(sql_rep.meta) print() print(sql_rep.concept_tables) @@ -27,7 +29,7 @@ class MyTestCase(unittest.TestCase): print() def test_writing_sqlite(self): - from mitm_tooling.representation import Header, HeaderEntry, mk_db_schema, MITMData, mk_sqlite + from mitm_tooling.representation import Header, HeaderEntry, mk_sql_rep_schema, MITMData, mk_sqlite from mitm_tooling.definition import MITM from mitm_tooling.data_types import MITMDataType h = Header(mitm=MITM.MAED, header_entries=[ @@ -59,12 +61,19 @@ class MyTestCase(unittest.TestCase): mk_sqlite(syn, 'synthetic-variation.sqlite') def test_superset(self): - from mitm_tooling.transformation.superset import mk_inferred_superset_dataset_def - mk_inferred_superset_dataset_def('superset_import', 'synthetic.sqlite') + from mitm_tooling.transformation.superset import write_inferred_superset_dataset_import + write_inferred_superset_dataset_import('superset_import', 'synthetic.sqlite') def test_superset_variation(self): - from mitm_tooling.transformation.superset import mk_inferred_superset_dataset_def - mk_inferred_superset_dataset_def('superset_import_variation', 'synthetic-variation.sqlite') + from mitm_tooling.transformation.superset import write_inferred_superset_dataset_import + write_inferred_superset_dataset_import('superset_import_variation', 'synthetic-variation.sqlite') + + def test_superset_assets(self): + from mitm_tooling.transformation.superset import (write_superset_dashboard_import, mk_superset_dataset_import) + from mitm_tooling.io import importing, MITM + syn = importing.read_zip('synthetic.maed', MITM.MAED) + write_superset_dashboard_import(syn, output_path='superset_dashboard_import', + db_name=f'synthetic-{mk_uuid().hex[:4]}') if __name__ == '__main__': -- GitLab