diff --git a/mitm_tooling/definition/__init__.py b/mitm_tooling/definition/__init__.py index f92e92e3289e1e7dd99cec165c02b5fca9f4779a..b19bb0cafdfc7ce9cdbe33545170f9c8da57b75b 100644 --- a/mitm_tooling/definition/__init__.py +++ b/mitm_tooling/definition/__init__.py @@ -1,5 +1,5 @@ # noinspection PyUnresolvedReferences -from .definition_representation import MITM, ConceptName, RelationName, ConceptLevel, ConceptKind, MITMDefinition, ForeignRelationInfo, OwnedRelations, ConceptProperties +from .definition_representation import MITM, ConceptName, RelationName, ConceptLevel, ConceptKind, MITMDefinition, ForeignRelationInfo, OwnedRelations, ConceptProperties, TypeName # noinspection PyUnresolvedReferences from .registry import get_mitm_def, mitm_definitions from . import definition_representation diff --git a/mitm_tooling/definition/definition_representation.py b/mitm_tooling/definition/definition_representation.py index f34b23f15ffba8908bc56a30a32c5e90c6ddf635..09903f94bd6c052471b6a02df082217646e78d28 100644 --- a/mitm_tooling/definition/definition_representation.py +++ b/mitm_tooling/definition/definition_representation.py @@ -15,6 +15,7 @@ CANON_COLUMN_GROUP_ORDERING: tuple[COLUMN_GROUPS, ...] = ( 'kind', 'type', 'identity-relations', 'inline-relations', 'foreign-relations', 'attributes') mitm_data_types = [v for v in MITMDataType] ConceptName = str +TypeName = str RelationName = str @@ -34,7 +35,7 @@ class MITM(enum.StrEnum): OCEL2 = 'OCEL2' # DPPD = 'DPPD' - +# TODO most dicts here should be frozen dicts, just like tuples over lists class ForeignRelationInfo(pydantic.BaseModel): target_concept: ConceptName fk_relations: dict[RelationName, RelationName] diff --git a/mitm_tooling/extraction/sql/data_models/db_meta.py b/mitm_tooling/extraction/sql/data_models/db_meta.py index 2da47ceb09a8ca0c9d17127b1bc5c3dfd939ec92..f73350600fca9478a56cc89119be8815163c2290 100644 --- a/mitm_tooling/extraction/sql/data_models/db_meta.py +++ b/mitm_tooling/extraction/sql/data_models/db_meta.py @@ -118,7 +118,7 @@ class TableMetaInfo(TableMetaInfoBase): @classmethod def from_sa_table(cls, t: Table, queryable_source: Queryable | None = None, default_schema: str | None = None) -> Self: - fkcs = [ForeignKeyConstraint.from_sa_constraint(fkc, default_schema) for fkc in t.foreign_key_constraints] + fkcs = [ForeignKeyConstraint.from_sa_constraint(fkc, t.schema or default_schema) for fkc in t.foreign_key_constraints] col_props = {c.name: ColumnProperties(nullable=c.nullable, unique=bool(c.unique), part_of_index=any(c.name in ind.columns for ind in t.indexes), part_of_pk=c.primary_key, part_of_fk=len(c.foreign_keys) > 0, @@ -129,7 +129,7 @@ class TableMetaInfo(TableMetaInfoBase): primary_key=[c.name for c in t.primary_key] if t.primary_key else None, indexes=[list(ind.columns.keys()) for ind in t.indexes], foreign_key_constraints=fkcs, - schema_name=t.schema if t.schema else default_schema, column_properties=col_props, + schema_name=t.schema or default_schema, column_properties=col_props, sa_table=t, queryable_source=queryable_source if queryable_source is not None else t) def filter_shallow(self, column_selection: set[ColumnName] | None = None) -> Self: diff --git a/mitm_tooling/extraction/sql/data_models/virtual_view.py b/mitm_tooling/extraction/sql/data_models/virtual_view.py index 44131a8a8b6d3721141fe158f78477b31737a828..3ecbcbe20bcc42e97fbb0f557ef0ae142cf71884 100644 --- a/mitm_tooling/extraction/sql/data_models/virtual_view.py +++ b/mitm_tooling/extraction/sql/data_models/virtual_view.py @@ -35,7 +35,7 @@ class VirtualView(VirtualViewBase): if delete_if_exists: meta.remove(t) else: - return + return None virtual_table = sa.Table(name, meta, *cols, schema=schema) tm = TableMetaInfo.from_sa_table(virtual_table, queryable_source=from_clause, default_schema=schema) diff --git a/mitm_tooling/extraction/sql/mapping/export.py b/mitm_tooling/extraction/sql/mapping/export.py index 050a567b50e43e6e8e10020ea7a0152c91d848c6..caf71f41b8f616be600e780da9a28de7b729ca7b 100644 --- a/mitm_tooling/extraction/sql/mapping/export.py +++ b/mitm_tooling/extraction/sql/mapping/export.py @@ -27,7 +27,7 @@ class Exportable(pydantic.BaseModel): data_providers: dict[ConceptName, list[DataProvider]] filename: str | None = None - def execute_to_memory(self, db_session: Session, validate: bool = False) -> ZippedExport: + def export_to_memory(self, db_session: Session, validate: bool = False) -> ZippedExport: header_entries = [] tables = {} @@ -35,22 +35,22 @@ class Exportable(pydantic.BaseModel): dfs = [] for dp in dps: - df = dp.instance_provider.from_session(db_session) + df = dp.instance_provider.apply_session(db_session) if validate: raise NotImplementedError df = dp.instance_postprocessor.apply(df) dfs.append(df) - header_entries += dp.header_entry_provider.from_df(df) + header_entries += dp.header_entry_provider.apply_df(df) tables[c] = pd.concat(dfs, axis='index', ignore_index=True) - header = Header(mitm=self.mitm, header_entries=header_entries) + header = Header(mitm=self.mitm, header_entries=tuple(header_entries)) filename = self.filename if self.filename else f'{self.mitm}.zip' return ZippedExport(mitm=self.mitm, filename=filename, mitm_data=MITMData(header=header, concept_dfs=tables)) - def stream_to_file(self, db_session: Session, validate: bool = False) -> StreamingZippedExport: + def export_as_stream(self, db_session: Session, validate: bool = False) -> StreamingZippedExport: data_sources = {} for c, dps in self.data_providers.items(): @@ -62,11 +62,11 @@ class Exportable(pydantic.BaseModel): for dp in dps: def local_iter(dp: DataProvider, columns=tuple(concept_file_columns)) -> Iterator[ tuple[pd.DataFrame, list[HeaderEntry]]]: - for df_chunk in dp.instance_provider.from_session_chunked(db_session, STREAMING_CHUNK_SIZE): + for df_chunk in dp.instance_provider.apply_session_chunked(db_session, STREAMING_CHUNK_SIZE): if validate: raise NotImplementedError df_chunk = df_chunk.reindex(columns=list(columns), copy=False) - yield dp.instance_postprocessor.apply(df_chunk), dp.header_entry_provider.from_df(df_chunk) + yield dp.instance_postprocessor.apply(df_chunk), dp.header_entry_provider.apply_df(df_chunk) chunk_iterators.append(local_iter(dp)) diff --git a/mitm_tooling/extraction/sql/mapping/mapping.py b/mitm_tooling/extraction/sql/mapping/mapping.py index 5642ac6e42bb3614915968edc84822e9f9e98a23..54eb83ed63e16ee759efcea0e07895c1e7b5a5fd 100644 --- a/mitm_tooling/extraction/sql/mapping/mapping.py +++ b/mitm_tooling/extraction/sql/mapping/mapping.py @@ -28,6 +28,7 @@ from mitm_tooling.representation.intermediate_representation import HeaderEntry class ColumnContentProvider(pydantic.BaseModel): model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + column_name: str static_value: str | None = None is_present_in_table: bool = False @@ -38,7 +39,10 @@ class ColumnContentProvider(pydantic.BaseModel): static_value: str | None = None) -> Self: return ColumnContentProvider(column_name=tup[0], column_element=tup[1], is_present_in_table=is_present_in_table, static_value=static_value) - + @classmethod + def from_static(cls, name: str, value: str, dt: MITMDataType = MITMDataType.Text) -> Self: + ce = sa.literal(value, dt.sa_sql_type_cls).alias(name) + return ColumnContentProvider(column_name=name, column_element=ce, is_present_in_table=False, static_value=value) @dataclasses.dataclass class HeaderEntryProvider: @@ -53,12 +57,12 @@ class HeaderEntryProvider: def type_arity(self): return len(self.attributes) - def from_session(self, db_session: Session) -> list[HeaderEntry]: + def apply_session(self, db_session: Session) -> list[HeaderEntry]: distinct = db_session.execute( sa.select(self.kind_provider.column_element, self.type_provider.column_element).distinct()).all() - return self.from_iterable(((kind, type_name) for kind, type_name in distinct)) + return self.apply_iterable(((kind, type_name) for kind, type_name in distinct)) - def from_df(self, df: pd.DataFrame) -> list[HeaderEntry]: + def apply_df(self, df: pd.DataFrame) -> list[HeaderEntry]: iterable = None if (k := self.kind_provider.static_value) is not None and (t := self.type_provider.static_value) is not None: iterable = ((k, t),) @@ -70,16 +74,16 @@ class HeaderEntryProvider: iterable = set( df.loc[:, [self.kind_provider.column_name, self.type_provider.column_name]].itertuples(index=None, name=None)) - return self.from_iterable(iterable) + return self.apply_iterable(iterable) - def from_iterable(self, distinct: Iterable[tuple[str, str]]) -> list[HeaderEntry]: - return [HeaderEntry(concept=self.concept, kind=kind, type_name=type_name, attributes=self.attributes, - attribute_dtypes=self.attribute_dtypes) for kind, type_name in distinct] + def apply_iterable(self, distinct: Iterable[tuple[str, str]]) -> list[HeaderEntry]: + return [HeaderEntry(concept=self.concept, kind=kind, type_name=type_name, attributes=tuple(self.attributes), + attribute_dtypes=tuple(self.attribute_dtypes)) for kind, type_name in distinct] @dataclasses.dataclass class InstancesPostProcessor: - transforms: list[TableTransforms] + transforms: list[TableTransforms] = dataclasses.field(default_factory=list) def apply(self, df: pd.DataFrame): return transform_df(df, self.transforms) @@ -89,13 +93,13 @@ class InstancesPostProcessor: class InstancesProvider: virtual_view: VirtualView - def from_session(self, db_session: Session) -> pd.DataFrame: + def apply_session(self, db_session: Session) -> pd.DataFrame: tm = self.virtual_view.table_meta results = db_session.execute(tm.queryable_source.select()).all() df = pd.DataFrame.from_records(results, columns=list(tm.columns)) return df - def from_session_chunked(self, db_session: Session, chunk_size: int) -> Iterable[pd.DataFrame]: + def apply_session_chunked(self, db_session: Session, chunk_size: int) -> Iterable[pd.DataFrame]: tm = self.virtual_view.table_meta results = db_session.execute(tm.queryable_source.select()).partitions(chunk_size) for result_chunk in results: diff --git a/mitm_tooling/io/exporting.py b/mitm_tooling/io/exporting.py index 91e95a824e054830655257120f6912313c7dfc00..c236c810884fd792fee30153b4f8e3401258efa0 100644 --- a/mitm_tooling/io/exporting.py +++ b/mitm_tooling/io/exporting.py @@ -81,7 +81,7 @@ class StreamingZippedExport(FileExport): logger.debug(f'Wrote {len(df_chunk)} rows to {fn} (streaming export).') with zf.open('header.csv', 'w') as hf: - header_df = Header(mitm=self.mitm, header_entries=collected_header_entries).generate_header_df() + header_df = Header(mitm=self.mitm, header_entries=tuple(collected_header_entries)).generate_header_df() write_header_file(header_df, hf) diff --git a/mitm_tooling/representation/intermediate/__init__.py b/mitm_tooling/representation/intermediate/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mitm_tooling/representation/intermediate/deltas.py b/mitm_tooling/representation/intermediate/deltas.py new file mode 100644 index 0000000000000000000000000000000000000000..eb2759fa9611e8edb34a8416f62bc39d9aa9a8d9 --- /dev/null +++ b/mitm_tooling/representation/intermediate/deltas.py @@ -0,0 +1,127 @@ +from abc import ABC +from collections import defaultdict +from typing import Literal, Any + +import pydantic + +from mitm_tooling.data_types import MITMDataType +from mitm_tooling.definition import ConceptName, TypeName +from .header import HeaderEntry, Header + + +class Delta(pydantic.BaseModel, ABC): + kind: Literal['insertion', 'deletion', 'update'] + + +class AttributeDelta(Delta): + pass + + +class AttributeUpdate(AttributeDelta): + kind: Literal['update'] = 'update' + index: int + name: str + dt: MITMDataType + + +class AttributeInsertion(AttributeUpdate): + kind: Literal['insertion'] = 'insertion' + + +class AttributeDeletion(AttributeDelta): + kind: Literal['deletion'] = 'deletion' + index: int + name: str + + +class RowDelta(Delta): + pass + + +class RowInsertion(RowDelta): + kind: Literal['insertion'] = 'insertion' + + +class RowDeletion(RowDelta): + kind: Literal['deletion'] = 'deletion' + + +class RowUpdate(RowDelta): + kind: Literal['update'] = 'update' + affected_identity: tuple[Any, ...] + + +class TypeDelta(Delta): + pass + + +class TypeUpdate(TypeDelta): + kind: Literal['update'] = 'update' + deltas: list[AttributeDelta] + + +class TypeInsertion(TypeDelta): + kind: Literal['insertion'] = 'insertion' + header_entry: HeaderEntry + + +class TypeDeletion(TypeDelta): + kind: Literal['deletion'] = 'deletion' + header_entry: HeaderEntry + + +class HeaderDelta(pydantic.BaseModel): + type_deltas: list[TypeDelta] + attribute_deltas: dict[ConceptName, dict[TypeName, list[AttributeDelta]]] + + +def diff_header_entry(a: HeaderEntry, b: HeaderEntry) -> TypeUpdate | None: + if a == b: + return None + deltas = [] + for i, ((attr_a, dt_a), (attr_b, dt_b)) in enumerate( + zip(zip(a.attributes, a.attribute_dtypes), zip(b.attributes, b.attribute_dtypes))): + if attr_a != attr_b or dt_a != dt_b: + deltas.append(AttributeUpdate(index=i, name=attr_b, dt=dt_b)) + k_a, k_b = a.get_k(), b.get_k() + x = k_b - k_a + if x > 0: + for j, (attr_b, dt_b) in zip(range(k_a, k_b), zip(b.attributes[k_a:], b.attribute_dtypes[k_a:])): + deltas.append(AttributeInsertion(index=j, name=attr_b, dt=dt_b)) + elif x < 0: + for j, (attr_a, dt_a) in zip(range(k_b, k_a), zip(a.attributes[k_b:], a.attribute_dtypes[k_b:])): + deltas.append(AttributeDeletion(index=j, name=attr_a)) + return TypeUpdate(deltas=deltas) + + +def diff_header(a: Header, b: Header) -> HeaderDelta: + type_deltas = [] + attribute_deltas = defaultdict(lambda: defaultdict(list)) + for concept, type_dict_a in a.as_dict.items(): + if concept not in b.as_dict: + type_deltas.extend((TypeDeletion(header_entry=he) for he in type_dict_a.values())) + else: + type_dict_b = b.as_dict[concept] + for type_name, he_a in type_dict_a.items(): + if type_name not in type_dict_b: + type_deltas.append(TypeDeletion(header_entry=he_a)) + else: + he_b = type_dict_b[type_name] + td = diff_header_entry(he_a, he_b) + if td is not None: + type_deltas.append(td) + attribute_deltas[concept][type_name].extend(td.deltas) + + for concept, type_dict_b in b.as_dict.items(): + if concept not in a.as_dict: + type_deltas.extend((TypeInsertion(header_entry=he) for he in type_dict_b.values())) + else: + type_dict_a = a.as_dict[concept] + for type_name, he_b in type_dict_b.items(): + if type_name not in type_dict_a: + type_deltas.append(TypeInsertion(header_entry=he_b)) + else: + # this case should have been handled above + pass + return HeaderDelta(type_deltas=type_deltas, attribute_deltas=attribute_deltas) + diff --git a/mitm_tooling/representation/intermediate/header.py b/mitm_tooling/representation/intermediate/header.py new file mode 100644 index 0000000000000000000000000000000000000000..c799495e8948940d98048e54e04e03d16747ce9d --- /dev/null +++ b/mitm_tooling/representation/intermediate/header.py @@ -0,0 +1,93 @@ +import itertools +import logging +from collections import defaultdict +from collections.abc import Sequence, Mapping +from functools import cached_property +from typing import Self + +import pandas as pd +import pydantic +from pydantic import ConfigDict + +from mitm_tooling.data_types.data_types import MITMDataType +from mitm_tooling.definition import get_mitm_def +from mitm_tooling.definition.definition_representation import ConceptName, MITM, TypeName +from ..common import mk_header_file_columns, ColumnName + + +class HeaderEntry(pydantic.BaseModel): + model_config = ConfigDict(frozen=True) + + concept: ConceptName + kind: str + type_name: TypeName + attributes: tuple[ColumnName, ...] + attribute_dtypes: tuple[MITMDataType, ...] + + @pydantic.model_validator(mode='after') + def attr_check(self): + if not len(self.attributes) == len(self.attribute_dtypes): + raise ValueError('Length of specified attributes and their data types differs.') + return self + + @classmethod + def from_row(cls, row: Sequence[str], mitm: MITM) -> Self: + kind, type_name = row[0], row[1] + concept = get_mitm_def(mitm).inverse_concept_key_map.get(kind) + if not concept: + raise ValueError(f'Encountered unknown concept key: "{kind}".') + + attrs, attr_dts = [], [] + for a, a_dt in zip(row[slice(2, None, 2)], row[slice(3, None, 2)]): + if pd.notna(a) and pd.notna(a_dt): + attrs.append(a) + try: + mitm_dt = MITMDataType(a_dt.lower()) if a_dt else MITMDataType.Unknown + attr_dts.append(mitm_dt) + except ValueError as e: + raise ValueError(f'Encountered unrecognized data type during header import: {a_dt}.') from e + + return HeaderEntry(concept=concept, kind=kind, type_name=type_name, attributes=tuple(attrs), + attribute_dtypes=tuple(attr_dts)) + + def get_k(self) -> int: + return len(self.attributes) + + def to_row(self) -> list[str | None]: + return [self.kind, self.type_name] + list( + itertools.chain(*zip(self.attributes, map(str, self.attribute_dtypes)))) + + +class Header(pydantic.BaseModel): + model_config = ConfigDict(frozen=True) + + mitm: MITM + header_entries: tuple[HeaderEntry, ...] = pydantic.Field(default_factory=tuple) + + @classmethod + def from_df(cls, df: pd.DataFrame, mitm: MITM) -> Self: + return Header(mitm=mitm, header_entries=tuple( + HeaderEntry.from_row(row, mitm) for row in df.itertuples(index=False))) + + def generate_header_df(self) -> pd.DataFrame: + k = max(map(lambda he: he.get_k(), self.header_entries), default=0) + deduplicated = {} + for he in self.header_entries: + deduplicated[(he.kind, he.type_name)] = he + lol = [he.to_row() for he in deduplicated.values()] + return pd.DataFrame(data=lol, columns=mk_header_file_columns(k)) + + def get(self, concept: ConceptName, type_name: TypeName) -> HeaderEntry | None: + return self.as_dict.get(concept, {}).get(type_name) + + @cached_property + def mitm_def(self): + return get_mitm_def(self.mitm) + + @cached_property + def as_dict(self) -> dict[ConceptName, dict[TypeName, HeaderEntry]]: + res = defaultdict(dict) + for he in self.header_entries: + res[he.concept][he.type_name] = he + return dict(res) + diff --git a/mitm_tooling/representation/intermediate_representation.py b/mitm_tooling/representation/intermediate_representation.py index 803b3ba02aaf12e7bb1e4ab99c42e86083fc13b3..268094e3d965ec39ff1a5b275fd8eb9ec659ea83 100644 --- a/mitm_tooling/representation/intermediate_representation.py +++ b/mitm_tooling/representation/intermediate_representation.py @@ -1,91 +1,22 @@ from __future__ import annotations +from functools import cached_property + import itertools import logging -from collections import defaultdict -from collections.abc import Iterator, Iterable, Sequence, Mapping -from typing import Self - import pandas as pd import pydantic -from pydantic import ConfigDict - +from collections import defaultdict +from collections.abc import Iterator, Iterable, Sequence, Mapping from mitm_tooling.data_types.data_types import MITMDataType from mitm_tooling.definition import get_mitm_def -from mitm_tooling.definition.definition_representation import ConceptName, MITM +from mitm_tooling.definition.definition_representation import ConceptName, MITM, TypeName from mitm_tooling.utilities.python_utils import take_first -from .common import mk_header_file_columns, ColumnName - -logger = logging.getLogger('api') - - -class HeaderEntry(pydantic.BaseModel): - concept: ConceptName - kind: str - type_name: str - attributes: list[ColumnName] - attribute_dtypes: list[MITMDataType] - - @pydantic.model_validator(mode='after') - def attr_check(self): - if not len(self.attributes) == len(self.attribute_dtypes): - raise ValueError('Length of specified attributes and their data types differs.') - return self - - @classmethod - def from_row(cls, row: Sequence[str], mitm: MITM) -> Self | None: - kind, type_name = row[0], row[1] - concept = get_mitm_def(mitm).inverse_concept_key_map.get(kind) - if not concept: - logger.error(f'Encountered unknown concept key: "{kind}".') - return None - - attrs, attr_dts = [], [] - for a, a_dt in zip(row[slice(2, None, 2)], row[slice(3, None, 2)]): - if pd.notna(a) and pd.notna(a_dt): - attrs.append(a) - try: - mitm_dt = MITMDataType(a_dt.lower()) if a_dt else MITMDataType.Unknown - attr_dts.append(mitm_dt) - except ValueError: - logger.error(f'Encountered unrecognized data type during header import: {a_dt}.') - return None - return HeaderEntry(concept=concept, kind=kind, type_name=type_name, attributes=attrs, attribute_dtypes=attr_dts) - - def get_k(self) -> int: - return len(self.attributes) - - def to_row(self) -> list[str | None]: - return [self.kind, self.type_name] + list( - itertools.chain(*zip(self.attributes, map(str, self.attribute_dtypes)))) - - -class Header(pydantic.BaseModel): - mitm: MITM - header_entries: list[HeaderEntry] = pydantic.Field(default_factory=list) - - @classmethod - def from_df(cls, df: pd.DataFrame, mitm: MITM) -> Self: - header_entries = [HeaderEntry.from_row(row, mitm) for row in df.itertuples(index=False)] - return Header(mitm=mitm, header_entries=header_entries) - - def generate_header_df(self) -> pd.DataFrame: - k = max(map(lambda he: he.get_k(), self.header_entries), default=0) - deduplicated = {} - for he in self.header_entries: - deduplicated[(he.kind, he.type_name)] = he - lol = [he.to_row() for he in deduplicated.values()] - return pd.DataFrame(data=lol, columns=mk_header_file_columns(k)) - - def get(self, concept: ConceptName, type_name: str) -> HeaderEntry | None: - return self._map.get(concept, {}).get(type_name) - - @property - def _map(self) -> Mapping[ConceptName, Mapping[str, HeaderEntry]]: - res = defaultdict(dict) - for he in self.header_entries: - res[he.concept][he.type_name] = he - return dict(res) +from pydantic import ConfigDict +from typing import Self + +from .intermediate.header import HeaderEntry, Header +from .intermediate.deltas import diff_header class MITMData(Iterable[tuple[ConceptName, pd.DataFrame]], pydantic.BaseModel): diff --git a/mitm_tooling/representation/sql_representation.py b/mitm_tooling/representation/sql_representation.py index fc48f11c4ddbc7325a5607c843389fe141af397e..fdb691e1e54ae24242d567cf373e8598c6cb02d1 100644 --- a/mitm_tooling/representation/sql_representation.py +++ b/mitm_tooling/representation/sql_representation.py @@ -15,7 +15,7 @@ from mitm_tooling.definition import RelationName from mitm_tooling.definition.definition_tools import ColGroupMaps from mitm_tooling.utilities.sql_utils import create_sa_engine, qualify from .common import * -from .intermediate_representation import Header, MITMData +from .intermediate_representation import Header, MITMData, TypeName from .sql.common import * from ..utilities.io_utils import FilePath from ..utilities.backports.sqlchemy_sql_views import create_view @@ -28,7 +28,7 @@ SQL_REPRESENTATION_DEFAULT_SCHEMA = 'main' ColumnsDict = dict[RelationName, sa.Column] ViewsDict = dict[TableName, sa.Table] ConceptTablesDict = dict[ConceptName, sa.Table] -ConceptTypeTablesDict = dict[ConceptName, dict[str, sa.Table]] +ConceptTypeTablesDict = dict[ConceptName, dict[TypeName, sa.Table]] MitMConceptSchemaItemGenerator = Callable[ [MITM, ConceptName, TableName, ColumnsDict, ColumnsDict | None], Generator[ diff --git a/mitm_tooling/transformation/sql/from_intermediate.py b/mitm_tooling/transformation/sql/from_intermediate.py index 891961fb2410004bd4da38c2c9926eb3b6e0ae25..c24cba5f34649b58b9164af0ee9be88962509468 100644 --- a/mitm_tooling/transformation/sql/from_intermediate.py +++ b/mitm_tooling/transformation/sql/from_intermediate.py @@ -16,3 +16,4 @@ def header_into_db_meta(header: Header) -> DBMetaInfo: def mitm_data_into_db_meta(mitm_data: MITMData) -> DBMetaInfo: return header_into_db_meta(mitm_data.header) + diff --git a/mitm_tooling/transformation/sql/into_exportable.py b/mitm_tooling/transformation/sql/into_exportable.py new file mode 100644 index 0000000000000000000000000000000000000000..91387a4f70865dc3ddf400ace097b204ecc78496 --- /dev/null +++ b/mitm_tooling/transformation/sql/into_exportable.py @@ -0,0 +1,26 @@ +from mitm_tooling.extraction.sql.data_models import TableMetaInfo, VirtualView +from mitm_tooling.extraction.sql.mapping import DataProvider, InstancesProvider, \ + HeaderEntryProvider, Exportable +from mitm_tooling.extraction.sql.mapping.mapping import ColumnContentProvider, InstancesPostProcessor +from mitm_tooling.representation import Header, SQLRepresentationSchema + + +def sql_rep_into_exportable(header: Header, sql_rep_schema: SQLRepresentationSchema) -> Exportable: + data_providers = [] + for he in header.header_entries: + if (type_t := sql_rep_schema.type_tables.get(he.concept, {}).get(he.type_name)) is not None: + tm = TableMetaInfo.from_sa_table(type_t) + typing_concept = header.mitm_def.get_properties(he.concept).typing_concept + data_providers.append(DataProvider(instance_provider=InstancesProvider( + virtual_view=VirtualView(table_meta=tm, from_clause=type_t, sa_table=type_t)), + header_entry_provider= + HeaderEntryProvider(concept=he.concept, + table_meta=tm, + kind_provider=ColumnContentProvider.from_static('kind', he.kind), + type_provider=ColumnContentProvider.from_static(typing_concept, + he.type_name), + attributes=list(he.attributes), + attribute_dtypes=list(he.attribute_dtypes)), + instance_postprocessor=InstancesPostProcessor()) + ) + return Exportable(mitm=header.mitm, data_providers=data_providers) diff --git a/mitm_tooling/transformation/sql/into_mappings.py b/mitm_tooling/transformation/sql/into_mappings.py new file mode 100644 index 0000000000000000000000000000000000000000..f8066baaa0bd962bf7ac67d46c5afedab7330936 --- /dev/null +++ b/mitm_tooling/transformation/sql/into_mappings.py @@ -0,0 +1,35 @@ +from mitm_tooling.extraction.sql.data_models import SourceDBType +from mitm_tooling.extraction.sql.mapping import ConceptMapping, ForeignRelation +from mitm_tooling.representation import Header, SQLRepresentationSchema + + +def sql_rep_into_mappings(header: Header, sql_rep_schema: SQLRepresentationSchema) -> list[ConceptMapping]: + mitm_def = header.mitm_def + cms = [] + for he in header.header_entries: + if (type_t := sql_rep_schema.type_tables[he.concept][he.type_name]) is not None: + concept_properties, relations = mitm_def.get(he.concept) + cms.append( + ConceptMapping( + mitm=header.mitm, + concept=he.concept, + base_table=(SourceDBType.OriginalDB, type_t.schema, type_t.name), + kind_col='kind' if 'kind' in type_t.columns else None, + type_col=concept_properties.typing_concept, + identity_columns=list(relations.identity.keys()), + inline_relations=list(relations.inline_relations.keys()), + foreign_relations={ + fk_name: ForeignRelation( + fk_columns=list(fk_info.fk_relations.keys()), + referred_table=(SourceDBType.OriginalDB, + concept_t.schema, + concept_t.name), + ) for fk_name, fk_info in relations.foreign.items() if + (concept_t := sql_rep_schema.concept_tables.get(fk_info.target_concept)) is not None + }, + attributes=list(he.attributes), + attribute_dtypes=list(he.attribute_dtypes), + ) + ) + + return cms diff --git a/pyproject.toml b/pyproject.toml index afd4e4307c00efe2b246d2dbf62d44af8e214e89..c5ad1fa7c84df4624ca62196c08c9fa6bb35be6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mitm-tooling" -version = "0.4.5" +version = "0.4.6" description = "" authors = ["Leah Tacke genannt Unterberg <leah.tgu@pads.rwth-aachen.de>"] readme = "README.md" diff --git a/test/something.py b/test/something.py index cd58d9c592f87cc60b93f4c3dd8f8403ee7fa369..e36b7394c8b39d8d6e3b0c040d8f8ef9e2a73473 100644 --- a/test/something.py +++ b/test/something.py @@ -15,14 +15,14 @@ class MyTestCase(unittest.TestCase): from mitm_tooling.representation import Header, HeaderEntry, mk_sql_rep_schema from mitm_tooling.definition import MITM from mitm_tooling.data_types import MITMDataType - h = Header(mitm=MITM.MAED, header_entries=[ - HeaderEntry(concept='measurement', kind='M', type_name='A', attributes=['x'], - attribute_dtypes=[MITMDataType.Numeric]), - HeaderEntry(concept='segment', kind='S', type_name='annotation', attributes=[], - attribute_dtypes=[]), - HeaderEntry(concept='segment_data', kind='SD', type_name='annotation_info', attributes=['y'], - attribute_dtypes=[MITMDataType.Json]), - ]) + h = Header(mitm=MITM.MAED, header_entries=( + HeaderEntry(concept='measurement', kind='M', type_name='A', attributes=('x',), + attribute_dtypes=(MITMDataType.Numeric,)), + HeaderEntry(concept='segment', kind='S', type_name='annotation', attributes=(), + attribute_dtypes=()), + HeaderEntry(concept='segment_data', kind='SD', type_name='annotation_info', attributes=('y',), + attribute_dtypes=(MITMDataType.Json, )), + )) sql_rep = mk_sql_rep_schema(h) print(sql_rep.meta) print() @@ -34,14 +34,14 @@ class MyTestCase(unittest.TestCase): from mitm_tooling.representation import Header, HeaderEntry, MITMData, mk_sqlite from mitm_tooling.definition import MITM from mitm_tooling.data_types import MITMDataType - h = Header(mitm=MITM.MAED, header_entries=[ - HeaderEntry(concept='measurement', kind='M', type_name='A', attributes=['x'], - attribute_dtypes=[MITMDataType.Numeric]), - HeaderEntry(concept='segment', kind='S', type_name='annotation', attributes=[], - attribute_dtypes=[]), - HeaderEntry(concept='segment_data', kind='SD', type_name='annotation_info', attributes=['y'], - attribute_dtypes=[MITMDataType.Json]), - ]) + h = Header(mitm=MITM.MAED, header_entries=( + HeaderEntry(concept='measurement', kind='M', type_name='A', attributes=('x',), + attribute_dtypes=(MITMDataType.Numeric,)), + HeaderEntry(concept='segment', kind='S', type_name='annotation', attributes=(), + attribute_dtypes=()), + HeaderEntry(concept='segment_data', kind='SD', type_name='annotation_info', attributes=('y',), + attribute_dtypes=(MITMDataType.Json,)), + )) mk_sqlite(MITMData(header=h), file_path='gendb.sqlite') def test_with_synthetic(self):