From 855f684e983374ee57fdbc36f2eab33e06b04e25 Mon Sep 17 00:00:00 2001
From: Leah Tacke genannt Unterberg <leah.tgu@pads.rwth-aachen.de>
Date: Thu, 24 Apr 2025 10:06:48 +0200
Subject: [PATCH] added db probing to wrap multiple table probes

---
 mitm_tooling/extraction/sql/db/__init__.py   | 2 +-
 mitm_tooling/extraction/sql/db/db_probing.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/mitm_tooling/extraction/sql/db/__init__.py b/mitm_tooling/extraction/sql/db/__init__.py
index ed6feed..3768680 100644
--- a/mitm_tooling/extraction/sql/db/__init__.py
+++ b/mitm_tooling/extraction/sql/db/__init__.py
@@ -3,7 +3,7 @@ from mitm_tooling.utilities.sql_utils import create_sa_engine
 # noinspection PyUnresolvedReferences
 from .db_reflection import connect_and_reflect, derive_table_meta_info
 # noinspection PyUnresolvedReferences
-from .db_probing import create_table_probe, initialize_db_probe, test_query
+from .db_probing import create_table_probe, initialize_db_probe, test_query, create_db_probe
 # noinspection PyUnresolvedReferences
 from .db_schema_query import SyntacticColumnCondition, SemanticColumnCondition, SyntacticTableCondition, SemanticTableCondition, DBMetaQuery, resolve_db_meta_query, resolve_db_meta_selection
 # noinspection PyUnresolvedReferences
diff --git a/mitm_tooling/extraction/sql/db/db_probing.py b/mitm_tooling/extraction/sql/db/db_probing.py
index fdb39d9..925bc6a 100644
--- a/mitm_tooling/extraction/sql/db/db_probing.py
+++ b/mitm_tooling/extraction/sql/db/db_probing.py
@@ -247,3 +247,11 @@ def create_table_probe(db_session: Session, table_meta: TableMetaInfo, sample_si
     sampled_values = df.to_dict(orient='list')  # {str(c): vs for c, vs in df.to_dict(orient='list').items()}
     return TableProbe(table_meta=table_meta, row_count=row_count, sampled_values=sampled_values,
                       inferred_types=inferred_types, sample_summaries=sample_summaries)
+
+
+def create_db_probe(db_session: Session, db_meta: DBMetaInfo, sample_size: int = 100) -> DBProbe:
+    db_probe = initialize_db_probe(db_meta)
+    table_probes = ((tm.short_table_identifier, create_table_probe(db_session, tm, sample_size=sample_size)) for tm in
+                    db_meta.tables.values())
+    db_probe.update_probes(*table_probes)
+    return db_probe
-- 
GitLab