From 855f684e983374ee57fdbc36f2eab33e06b04e25 Mon Sep 17 00:00:00 2001 From: Leah Tacke genannt Unterberg <leah.tgu@pads.rwth-aachen.de> Date: Thu, 24 Apr 2025 10:06:48 +0200 Subject: [PATCH] added db probing to wrap multiple table probes --- mitm_tooling/extraction/sql/db/__init__.py | 2 +- mitm_tooling/extraction/sql/db/db_probing.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/mitm_tooling/extraction/sql/db/__init__.py b/mitm_tooling/extraction/sql/db/__init__.py index ed6feed..3768680 100644 --- a/mitm_tooling/extraction/sql/db/__init__.py +++ b/mitm_tooling/extraction/sql/db/__init__.py @@ -3,7 +3,7 @@ from mitm_tooling.utilities.sql_utils import create_sa_engine # noinspection PyUnresolvedReferences from .db_reflection import connect_and_reflect, derive_table_meta_info # noinspection PyUnresolvedReferences -from .db_probing import create_table_probe, initialize_db_probe, test_query +from .db_probing import create_table_probe, initialize_db_probe, test_query, create_db_probe # noinspection PyUnresolvedReferences from .db_schema_query import SyntacticColumnCondition, SemanticColumnCondition, SyntacticTableCondition, SemanticTableCondition, DBMetaQuery, resolve_db_meta_query, resolve_db_meta_selection # noinspection PyUnresolvedReferences diff --git a/mitm_tooling/extraction/sql/db/db_probing.py b/mitm_tooling/extraction/sql/db/db_probing.py index fdb39d9..925bc6a 100644 --- a/mitm_tooling/extraction/sql/db/db_probing.py +++ b/mitm_tooling/extraction/sql/db/db_probing.py @@ -247,3 +247,11 @@ def create_table_probe(db_session: Session, table_meta: TableMetaInfo, sample_si sampled_values = df.to_dict(orient='list') # {str(c): vs for c, vs in df.to_dict(orient='list').items()} return TableProbe(table_meta=table_meta, row_count=row_count, sampled_values=sampled_values, inferred_types=inferred_types, sample_summaries=sample_summaries) + + +def create_db_probe(db_session: Session, db_meta: DBMetaInfo, sample_size: int = 100) -> DBProbe: + db_probe = initialize_db_probe(db_meta) + table_probes = ((tm.short_table_identifier, create_table_probe(db_session, tm, sample_size=sample_size)) for tm in + db_meta.tables.values()) + db_probe.update_probes(*table_probes) + return db_probe -- GitLab