diff --git a/mitm_tooling/representation/sql/sql_insertion.py b/mitm_tooling/representation/sql/sql_insertion.py index b837ae1e84ed6ac0a6c9ebd828e6a73fb70206d3..8459bba445e9ca5fecd7bea9c535ace0402f11fe 100644 --- a/mitm_tooling/representation/sql/sql_insertion.py +++ b/mitm_tooling/representation/sql/sql_insertion.py @@ -15,7 +15,7 @@ from mitm_tooling.utilities.sql_utils import use_nested_conn, AnyDBBind from ..df import TypedMitMDataFrameStream from ..intermediate.header import Header, HeaderEntry -from .sql_representation import SQLRepresentationSchema, _get_unique_id_col_name, has_type_tables +from .sql_representation import SQLRepresentationSchema, _get_unique_id_col_name, has_type_tables, HeaderMetaTableName def insert_db_schema(bind: EngineOrConnection, sql_rep_schema: SQLRepresentationSchema) -> None: @@ -23,23 +23,23 @@ def insert_db_schema(bind: EngineOrConnection, sql_rep_schema: SQLRepresentation def insert_header_data(bind: EngineOrConnection, sql_rep_schema: SQLRepresentationSchema, header: Header) -> None: - if (meta_tables := sql_rep_schema.meta_tables): + if (meta_tables := sql_rep_schema.meta_tables) is not None: mitm_def_json = header.mitm_def.model_dump(mode='json', by_alias=True, exclude_unset=True, exclude_none=True) with use_nested_conn(bind) as conn: conn.execute( - meta_tables['header_meta_definition'].insert().values([{ - 'mitm': header.mitm, - 'mitm_def': mitm_def_json}])) + meta_tables.header_meta_definition.insert().values([{ + 'mitm': header.mitm, + 'mitm_def': mitm_def_json}])) if header.header_entries: - conn.execute(meta_tables['header_meta_types'].insert().values( + conn.execute(meta_tables.header_meta_types.insert().values( [{'kind': he.kind, 'type': he.type_name, 'concept': he.concept} for he in header.header_entries])) - conn.execute(meta_tables['header_meta_type_attributes'].insert().values([{ + conn.execute(meta_tables.header_meta_type_attributes.insert().values([{ 'kind': he.kind, 'type': he.type_name, 'attribute_order': i, @@ -51,11 +51,11 @@ def insert_header_data(bind: EngineOrConnection, sql_rep_schema: SQLRepresentati def drop_header_data(bind: EngineOrConnection, sql_rep_schema: SQLRepresentationSchema) -> None: - if (meta_tables := sql_rep_schema.meta_tables): + if (meta_tables := sql_rep_schema.meta_tables) is not None: with use_nested_conn(bind) as conn: - meta_tables['header_meta_definition'].drop(conn) - meta_tables['header_meta_type_attributes'].drop(conn) - meta_tables['header_meta_types'].drop(conn) + meta_tables.header_meta_definition.drop(conn) + meta_tables.header_meta_type_attributes.drop(conn) + meta_tables.header_meta_types.drop(conn) conn.commit() @@ -164,13 +164,18 @@ def insert_instances(bind: AnyDBBind, def insert_data(bind: AnyDBBind, gen_sql_rep_schema: Callable[[], SQLRepresentationSchema], - gen_instances: Callable[[], TypedMitMDataFrameStream]) -> SQLRepInsertionResult: + gen_instances: Callable[[], TypedMitMDataFrameStream], + gen_override_header: Callable[[], Header] | None = None) -> SQLRepInsertionResult: sql_rep_schema = gen_sql_rep_schema() insert_db_schema(bind, sql_rep_schema) insertion_result = insert_instances(bind, sql_rep_schema, gen_instances()) + if gen_override_header: + header = gen_override_header() + else: + header = Header(mitm=sql_rep_schema.mitm, header_entries=frozenset(insertion_result.inserted_types)) insert_header_data(bind, sql_rep_schema, - Header(mitm=sql_rep_schema.mitm, header_entries=frozenset(insertion_result.inserted_types))) + header) return insertion_result diff --git a/mitm_tooling/representation/sql/sql_representation.py b/mitm_tooling/representation/sql/sql_representation.py index d57a9793fbc9b2f2f2eb6561ace190f980cb1de6..370ca6ff38f2d7182e693b200802f1c28a5a5156 100644 --- a/mitm_tooling/representation/sql/sql_representation.py +++ b/mitm_tooling/representation/sql/sql_representation.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Callable, Iterable +from enum import StrEnum from typing import TYPE_CHECKING, Generator, TypedDict import pydantic @@ -22,7 +23,15 @@ if TYPE_CHECKING: SQL_REPRESENTATION_DEFAULT_SCHEMA = 'main' -class HeaderMetaTables(TypedDict): +class HeaderMetaTableName(StrEnum): + HeaderMetaDefinition = 'header_meta_definition' + HeaderMetaTypes = 'header_meta_types' + HeaderMetaTypeAttributes = 'header_meta_type_attributes' + + +class HeaderMetaTables(pydantic.BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + header_meta_definition: sa.Table header_meta_types: sa.Table header_meta_type_attributes: sa.Table @@ -211,13 +220,14 @@ def mk_table(meta: sa.MetaData, def mk_header_tables(meta: sa.MetaData, override_schema: SchemaName | None = None) -> HeaderMetaTables: schema = override_schema if override_schema else SQL_REPRESENTATION_DEFAULT_SCHEMA - header_meta_types = sa.Table('header_meta_types', meta, + + header_meta_types = sa.Table(HeaderMetaTableName.HeaderMetaTypes, meta, sa.Column('kind', MITMDataType.Text.sa_sql_type, primary_key=True), sa.Column('type', MITMDataType.Text.sa_sql_type, primary_key=True), sa.Column('concept', MITMDataType.Text.sa_sql_type), schema=schema ) - header_meta_type_attributes = sa.Table('header_meta_type_attributes', meta, + header_meta_type_attributes = sa.Table(HeaderMetaTableName.HeaderMetaTypeAttributes, meta, sa.Column('kind', MITMDataType.Text.sa_sql_type, primary_key=True), sa.Column('type', MITMDataType.Text.sa_sql_type, primary_key=True), sa.Column('attribute_order', @@ -232,7 +242,7 @@ def mk_header_tables(meta: sa.MetaData, schema=schema ) - header_meta_definition = sa.Table('header_meta_definition', + header_meta_definition = sa.Table(HeaderMetaTableName.HeaderMetaDefinition, meta, sa.Column('mitm', MITMDataType.Text.sa_sql_type, primary_key=True), sa.Column('mitm_def', MITMDataType.Json.sa_sql_type), diff --git a/mitm_tooling/transformation/sql/into_intermediate.py b/mitm_tooling/transformation/sql/into_intermediate.py new file mode 100644 index 0000000000000000000000000000000000000000..d072b059342b3a678aeb8f1da46708062b46b4b7 --- /dev/null +++ b/mitm_tooling/transformation/sql/into_intermediate.py @@ -0,0 +1,41 @@ +import pandas as pd +import sqlalchemy as sa + +from mitm_tooling.definition import MITM, get_mitm_def +from mitm_tooling.representation.intermediate import Header, HeaderEntry +from mitm_tooling.representation.sql import SchemaName +from mitm_tooling.representation.sql.sql_representation import HeaderMetaTableName, mk_header_tables +from mitm_tooling.transformation.sql import db_engine_into_db_meta +from mitm_tooling.utilities.sql_utils import use_db_bind, AnyDBBind + + +def mitm_db_into_header(bind: AnyDBBind, override_schema: SchemaName | None = None) -> Header | None: + sa_meta = sa.MetaData() + meta_tables = mk_header_tables(sa_meta, override_schema=override_schema) + with use_db_bind(bind) as conn: + mitm: str | None = conn.execute(sa.select(meta_tables.header_meta_definition.c['mitm'])).scalars().first() + if mitm: + mitm: MITM = MITM(mitm) + mitm_def = get_mitm_def(mitm) + t_left, t_right = meta_tables.header_meta_types, meta_tables.header_meta_type_attributes + j = sa.join(t_left, t_right, isouter=True) + type_attributes = conn.execute( + sa.select(t_left.c['kind'], t_left.c['type'], t_right.c['attribute_order'], t_right.c['attribute_name'], + t_right.c['attribute_data_type']).select_from(j)).all() + df = pd.DataFrame.from_records(type_attributes, + columns=['kind', 'type', 'attribute_order', 'attribute_name', + 'attribute_data_type']) + hes = [] + for (kind, type_name), idx in df.groupby(['kind', 'type']).groups.items(): + attributes_df = df.loc[idx].dropna().sort_values('attribute_order', ascending=True)[ + ['attribute_name', 'attribute_data_type']] + if len(attributes_df) > 0: + attribute_names, attribute_dtypes = zip(*attributes_df.itertuples(index=False)) + else: + attribute_names, attribute_dtypes = (), () + c = mitm_def.inverse_concept_key_map[kind] + hes.append(HeaderEntry(concept=c, kind=kind, type_name=type_name, attributes=tuple(attribute_names), + attribute_dtypes=tuple(attribute_dtypes))) + + return Header(mitm=mitm, header_entries=frozenset(hes)) + return None diff --git a/mitm_tooling/transformation/sql/into_sql.py b/mitm_tooling/transformation/sql/into_sql.py index 6c9733fd3f1fe249743f7df6d00dabb9d1dd083d..805a1d2573afc1d548783ef16034d420998f9a1a 100644 --- a/mitm_tooling/transformation/sql/into_sql.py +++ b/mitm_tooling/transformation/sql/into_sql.py @@ -17,7 +17,7 @@ def insert_mitm_data(bind: EngineOrConnection, sql_rep_schema: SQLRepresentation from mitm_tooling.transformation.df import mitm_data_into_mitm_dataframes return mitm_data_into_mitm_dataframes(mitm_data).typed_stream() - return insert_data(bind, lambda: sql_rep_schema, instances) + return insert_data(bind, lambda: sql_rep_schema, instances, gen_override_header=lambda: mitm_data.header) def insert_exportable(target: AnyDBBind, diff --git a/test/test_inference.py b/test/test_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..c06840f3e241240d89ef5d9c58a197a52adb2b2a --- /dev/null +++ b/test/test_inference.py @@ -0,0 +1,26 @@ +from pydantic import AnyUrl + +from mitm_tooling.utilities.sql_utils import create_sa_engine + + +def test_header_inference(): + from mitm_tooling.representation.intermediate import Header, HeaderEntry + + from mitm_tooling.representation.intermediate import MITMData + from mitm_tooling.transformation.sql.into_sql import mk_sqlite + + from mitm_tooling.definition import MITM + from mitm_tooling.data_types import MITMDataType + h = Header(mitm=MITM.MAED, header_entries=frozenset( + (HeaderEntry(concept='measurement', kind='M', type_name='A', attributes=('x',), + attribute_dtypes=(MITMDataType.Numeric,)), + HeaderEntry(concept='segment', kind='S', type_name='annotation', attributes=(), + attribute_dtypes=()), + HeaderEntry(concept='segment_data', kind='SD', type_name='annotation_info', attributes=('y',), + attribute_dtypes=(MITMDataType.Json,)),) + )) + engine = create_sa_engine(AnyUrl('sqlite:///gendb.sqlite')) + from mitm_tooling.transformation.sql.into_intermediate import mitm_db_into_header + h_inf = mitm_db_into_header(engine) + print(h_inf) + assert h_inf == h \ No newline at end of file