From f62f62cf5f34569cdd2a288fe86603f57f8264ee Mon Sep 17 00:00:00 2001
From: Leah Tacke genannt Unterberg <leah.tgu@pads.rwth-aachen.de>
Date: Thu, 6 Feb 2025 17:13:02 +0100
Subject: [PATCH] working on superset definitions

---
 .../extraction/sql/data_models/__init__.py    |   2 +-
 .../superset/dataset_definition.py            | 133 ------------
 .../superset/definitions/__init__.py          |   5 +
 .../superset/definitions/charts.py            |  86 ++++++++
 .../superset/definitions/constants.py         | 183 +++++++++++++++++
 .../superset/definitions/core.py              | 193 ++++++++++++++++++
 .../superset/definitions/high_level.py        | 147 +++++++++++++
 .../superset/definitions/post_processing.py   |  43 ++++
 .../superset/factories/__init__.py            |   0
 .../superset/factories/charts.py              | 108 ++++++++++
 .../transformation/superset/factories/core.py |  90 ++++++++
 .../superset/factories/dataset.py             |  27 +++
 .../superset/factories/datasource.py          |   9 +
 .../superset/factories/query.py               |  33 +++
 .../superset/factories/utils.py               |   7 +
 .../superset/superset_representation.py       |  57 +++---
 test/something.py                             |  26 ++-
 17 files changed, 980 insertions(+), 169 deletions(-)
 delete mode 100644 mitm_tooling/transformation/superset/dataset_definition.py
 create mode 100644 mitm_tooling/transformation/superset/definitions/__init__.py
 create mode 100644 mitm_tooling/transformation/superset/definitions/charts.py
 create mode 100644 mitm_tooling/transformation/superset/definitions/constants.py
 create mode 100644 mitm_tooling/transformation/superset/definitions/core.py
 create mode 100644 mitm_tooling/transformation/superset/definitions/high_level.py
 create mode 100644 mitm_tooling/transformation/superset/definitions/post_processing.py
 create mode 100644 mitm_tooling/transformation/superset/factories/__init__.py
 create mode 100644 mitm_tooling/transformation/superset/factories/charts.py
 create mode 100644 mitm_tooling/transformation/superset/factories/core.py
 create mode 100644 mitm_tooling/transformation/superset/factories/dataset.py
 create mode 100644 mitm_tooling/transformation/superset/factories/datasource.py
 create mode 100644 mitm_tooling/transformation/superset/factories/query.py
 create mode 100644 mitm_tooling/transformation/superset/factories/utils.py

diff --git a/mitm_tooling/extraction/sql/data_models/__init__.py b/mitm_tooling/extraction/sql/data_models/__init__.py
index dfb226d..134a447 100644
--- a/mitm_tooling/extraction/sql/data_models/__init__.py
+++ b/mitm_tooling/extraction/sql/data_models/__init__.py
@@ -1,5 +1,5 @@
 # noinspection PyUnresolvedReferences
-from .db_meta import Queryable, TableMetaInfo, DBMetaInfo, ForeignKeyConstraint, ExplicitTableSelection, \
+from .db_meta import Queryable, ColumnProperties, TableMetaInfo, DBMetaInfo, ForeignKeyConstraint, ExplicitTableSelection, \
     ExplicitColumnSelection, ExplicitSelectionUtils, ColumnName
 # noinspection PyUnresolvedReferences
 from .db_probe import TableProbe, DBProbe, SampleSummary
diff --git a/mitm_tooling/transformation/superset/dataset_definition.py b/mitm_tooling/transformation/superset/dataset_definition.py
deleted file mode 100644
index 92190d4..0000000
--- a/mitm_tooling/transformation/superset/dataset_definition.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from abc import ABC, abstractmethod
-from datetime import datetime, tzinfo
-from typing import Any, Annotated
-
-import pydantic
-from uuid import UUID
-from mitm_tooling.data_types import MITMDataType
-
-BetterUUID = Annotated[
-    UUID,
-    pydantic.BeforeValidator(lambda x: UUID(x) if isinstance(x, str) else x),
-    pydantic.PlainSerializer(lambda x: str(x)),
-    pydantic.Field(
-        description="Better annotation for UUID, parses from string format. Serializes to string format."
-    ),
-]
-
-
-class SupersetDefFile(pydantic.BaseModel, ABC):
-    model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
-
-    @property
-    @abstractmethod
-    def filename(self) -> str:
-        pass
-
-
-class SupersetDatabaseDef(SupersetDefFile):
-    database_name: str
-    sqlalchemy_uri: pydantic.AnyUrl
-    uuid: BetterUUID
-    cache_timeout: str | None = None
-    expose_in_sqllab: bool = True
-    allow_run_async: bool = False
-    allow_ctas: bool = False
-    allow_cvas: bool = False
-    allow_dml: bool = False
-    allow_file_upload: bool = False
-    extra: dict[str, Any] = pydantic.Field(default_factory=lambda: {
-        'allows_virtual_table_explore': True
-    })
-    impersonate_user: bool = False
-    version: str = '1.0.0'
-
-    @property
-    def filename(self):
-        return self.database_name
-
-
-class SupersetMetricDef(pydantic.BaseModel):
-    metric_name: str
-    verbose_name: str
-    expression: str
-    metric_type: str | None = None
-    description: str | None = None
-    d3format: str | None = None
-    currency: str | None = None
-    extra: dict[str, Any] = pydantic.Field(default_factory=dict)
-    warning_text: str | None = None
-
-
-class SupersetColumnDef(pydantic.BaseModel):
-    column_name: str
-    verbose_name: str | None = None
-    is_dttm: bool = False
-    is_active: bool = True
-    type: str = str(MITMDataType.Text.sa_sql_type)
-    advanced_data_type: str | None = None
-    groupby: bool = True
-    filterable: bool = True
-    expression: str | None = None
-    description: str | None = None
-    python_date_format: str = None
-    extra: Annotated[dict[str, Any], pydantic.Field(default_factory=dict)]
-
-
-class SupersetTableDef(SupersetDefFile):
-    model_config = pydantic.ConfigDict(populate_by_name=True)
-
-    table_name: str
-    schema_name: str = pydantic.Field(alias='schema')
-    uuid: BetterUUID
-    main_dttm_col: str | None = None
-    description: str | None = None
-    default_endpoint: str | None = None
-    offset: int = 0
-    cache_timeout: str | None = None
-    catalog: str | None = None
-    sql: str = ''
-    params: Any = None
-    template_params: Any = None
-    filter_select_enabled: bool = True
-    fetch_values_predicate: str | None = None
-    extra: dict[str, Any] = pydantic.Field(default_factory=dict)
-    normalize_columns: bool = False
-    always_filter_main_dttm: bool = False
-    metrics: list[SupersetMetricDef] = pydantic.Field(default_factory=list)
-    columns: list[SupersetColumnDef] = pydantic.Field(default_factory=list)
-
-    @property
-    def filename(self):
-        return self.table_name
-
-
-BetterDatetime = Annotated[datetime,
-pydantic.BeforeValidator(lambda x: datetime.fromisoformat(x) if isinstance(x, str) else x),
-pydantic.PlainSerializer(lambda x: str(x)),
-pydantic.Field(
-    description="Better annotation for datetime, parses from string format. Serializes to string format."
-)]
-
-
-class SupersetMetadataDef(SupersetDefFile):
-    version: str = '1.0.0'
-    type: str = 'Database'
-    timestamp: BetterDatetime = pydantic.Field(default_factory=datetime.utcnow)
-
-    @property
-    def filename(self) -> str:
-        return 'metadata'
-
-
-class SupersetDef(pydantic.BaseModel):
-    database: SupersetDatabaseDef
-    datasets: list[SupersetTableDef]
-    metadata: SupersetMetadataDef = pydantic.Field(default_factory=SupersetMetadataDef)
-
-    def to_folder_structure(self) -> dict[str, Any]:
-        db_name = self.database.database_name
-        db_folder = {db_name: self.database}
-        datasets = list(self.datasets)
-        dataset_folder = {db_name: datasets}
-        return {'databases': [self.database], 'datasets': dataset_folder, '.': self.metadata}
diff --git a/mitm_tooling/transformation/superset/definitions/__init__.py b/mitm_tooling/transformation/superset/definitions/__init__.py
new file mode 100644
index 0000000..9226df6
--- /dev/null
+++ b/mitm_tooling/transformation/superset/definitions/__init__.py
@@ -0,0 +1,5 @@
+from .constants import *
+from .core import *
+from .post_processing import *
+from .charts import *
+from .high_level import *
\ No newline at end of file
diff --git a/mitm_tooling/transformation/superset/definitions/charts.py b/mitm_tooling/transformation/superset/definitions/charts.py
new file mode 100644
index 0000000..6d99e1b
--- /dev/null
+++ b/mitm_tooling/transformation/superset/definitions/charts.py
@@ -0,0 +1,86 @@
+from abc import ABC, abstractmethod
+from datetime import datetime, tzinfo, UTC
+from enum import StrEnum, IntEnum
+from typing import Any, Annotated, Literal, Self, Union
+
+import pydantic
+from uuid import UUID
+
+from pydantic import Field, AnyUrl
+
+from mitm_tooling.data_types import MITMDataType
+from .core import *
+from .constants import *
+
+
+class ChartParams(FormData):
+    datasource: str | DatasourceIdentifier
+    viz_type: SupersetVizType
+    groupby: list[str] = pydantic.Field(default_factory=list)
+    adhoc_filters: list[SupersetAdhocFilter] = pydantic.Field(default_factory=list)
+    row_limit: int = 10000
+    sort_by_metric: bool = True
+    color_scheme: ColorScheme = 'supersetColors'
+    show_legend: bool = True
+    legendType: str = 'scroll'
+    legendOrientation: str = 'top'
+    extra_form_data: dict[str, Any] = pydantic.Field(default_factory=dict)
+    slice_id: SupersetId | None = None
+    dashboards: list[SupersetId] = pydantic.Field(default_factory=list)
+
+
+class PieChartParams(ChartParams):
+    viz_type: Literal[SupersetVizType.PIE] = SupersetVizType.PIE
+    metric: SupersetAdhocMetric
+    show_labels_threshold: int = 5
+    show_labels: bool = True
+    labels_outside: bool = True
+    outerRadius: int = 70
+    innerRadius: int = 30
+    label_type: str = 'key'
+    number_format: str = 'SMART_NUMBER'
+    date_format: str = 'smart_date'
+
+
+class TimeSeriesChartParams(ChartParams):
+    metrics: list[SupersetAdhocMetric]
+    x_axis: ColumnName
+    x_axis_sort_asc: bool = True
+    x_axis_sort_series: str = 'name'
+    x_axis_sort_series_ascending: bool = True
+    x_axis_time_format: str = 'smart_date'
+    x_axis_title_margin: int = 15
+    y_axis_format: str = 'SMART_NUMBER'
+    y_axis_bounds: tuple[float | None, float | None] = (None, None)
+    y_axis_title_margin: int = 15
+    y_axis_title_position: str = 'Left'
+    truncateXAxis: bool = True
+    truncate_metric: bool = True
+    show_empty_columns: bool = True
+    comparison_type: str = 'values'
+    rich_tooltip: bool = True
+    showTooltipTotal: bool = True
+    showTooltipPercentage: bool = True
+    tooltipTimeFormat: str = 'smart_date'
+    sort_series_type: str = 'sum'
+    orientation: str = 'vertical'
+    only_total: bool = True
+    order_desc: bool = True
+    time_grain_sqla: TimeGrain | None = None
+    annotation_layers: AnnotationLayers = pydantic.Field(default_factory=list)
+
+    #
+    # forecastEnabled: bool = False
+    # forecastPeriods: int = 10
+    # forecastInterval: float = 0.8
+
+
+class TimeSeriesBarParams(TimeSeriesChartParams):
+    viz_type: Literal[SupersetVizType.TIMESERIES_BAR] = SupersetVizType.TIMESERIES_BAR
+
+
+class TimeSeriesLineParams(TimeSeriesChartParams):
+    viz_type: Literal[SupersetVizType.TIMESERIES_LINE] = SupersetVizType.TIMESERIES_LINE
+    opacity: float = 0.2
+    markerSize: int = 6
+    seriesType: str = 'line'
diff --git a/mitm_tooling/transformation/superset/definitions/constants.py b/mitm_tooling/transformation/superset/definitions/constants.py
new file mode 100644
index 0000000..f55f4e9
--- /dev/null
+++ b/mitm_tooling/transformation/superset/definitions/constants.py
@@ -0,0 +1,183 @@
+from abc import ABC, abstractmethod
+from datetime import datetime, tzinfo, UTC
+from enum import StrEnum, IntEnum
+from typing import Any, Annotated, Literal, Self, Union
+
+import pydantic
+from uuid import UUID
+
+from pydantic import Field, AnyUrl
+
+from mitm_tooling.data_types import MITMDataType
+from mitm_tooling.representation.intermediate_representation import ColumnName
+
+StrUUID = Annotated[
+    UUID,
+    pydantic.BeforeValidator(lambda x: UUID(x) if isinstance(x, str) else x),
+    pydantic.PlainSerializer(lambda x: str(x)),
+    pydantic.Field(
+        description="Better annotation for UUID. Parses from string format, serializes to string format."
+    )
+]
+
+StrUrl = Annotated[
+    AnyUrl,
+    pydantic.BeforeValidator(lambda x: AnyUrl(x) if isinstance(x, str) else x),
+    pydantic.PlainSerializer(lambda x: str(x)),
+    pydantic.Field(
+        description="Better annotation for AnyUrl. Parses from string format, serializes to string format."
+    )
+]
+
+StrDatetime = Annotated[
+    datetime,
+    pydantic.BeforeValidator(lambda x: datetime.fromisoformat(x) if isinstance(x, str) else x),
+    pydantic.PlainSerializer(lambda x: str(x)),
+    pydantic.Field(
+        description="Better annotation for datetime. Parses from string format, serializes to string format."
+    )
+]
+
+SupersetId = int
+
+FilterValue = Union[bool, StrDatetime, float, int, str]
+FilterValues = Union[FilterValue, list[FilterValue], tuple[FilterValue]]
+
+ColorScheme = Literal['blueToGreen', 'supersetColors']
+
+
+class GenericDataType(IntEnum):
+    NUMERIC = 0
+    STRING = 1
+    TEMPORAL = 2
+    BOOLEAN = 3
+
+    @classmethod
+    def from_mitm_dt(cls, dt: MITMDataType) -> Self:
+        if dt in {MITMDataType.Numeric, MITMDataType.Integer}:
+            return cls.NUMERIC
+        elif dt in {MITMDataType.Datetime}:
+            return cls.TEMPORAL
+        elif dt in {MITMDataType.Boolean}:
+            return cls.BOOLEAN
+        else:
+            return cls.STRING
+
+
+class AnnotationType(StrEnum):
+    Event = 'EVENT'
+    Formula = 'FORMULA'
+    Interval = 'INTERVAL'
+    Timeseries = 'TIME_SERIES'
+
+
+class AnnotationSource(StrEnum):
+    Line = 'line'
+    Native = 'NATIVE'
+    Table = 'table'
+    Undefined = ''
+
+
+class SupersetVizType(StrEnum):
+    PIE = 'pie'
+    TIMESERIES_BAR = 'echarts_timeseries_bar'
+    TIMESERIES_LINE = 'echarts_timeseries_line'
+
+
+class ExpressionType(StrEnum):
+    SIMPLE = 'SIMPLE'
+    SQL = 'SQL'
+
+
+class SupersetAggregate(StrEnum):
+    COUNT = 'COUNT'
+    SUM = 'SUM'
+    MIN = 'MIN'
+    MAX = 'MAX'
+    AVG = 'AVG'
+
+
+class FilterOperator(StrEnum):
+    EQUALS = "=="
+    NOT_EQUALS = "!="
+    GREATER_THAN = ">"
+    LESS_THAN = "<"
+    GREATER_THAN_OR_EQUALS = ">="
+    LESS_THAN_OR_EQUALS = "<="
+    LIKE = "LIKE"
+    NOT_LIKE = "NOT LIKE"
+    ILIKE = "ILIKE"
+    IS_NULL = "IS NULL"
+    IS_NOT_NULL = "IS NOT NULL"
+    IN = "IN"
+    NOT_IN = "NOT IN"
+    IS_TRUE = "IS TRUE"
+    IS_FALSE = "IS FALSE"
+    TEMPORAL_RANGE = "TEMPORAL_RANGE"
+
+
+class FilterStringOperators(StrEnum):
+    EQUALS = "EQUALS"
+    NOT_EQUALS = "NOT_EQUALS"
+    LESS_THAN = "LESS_THAN"
+    GREATER_THAN = "GREATER_THAN"
+    LESS_THAN_OR_EQUAL = "LESS_THAN_OR_EQUAL"
+    GREATER_THAN_OR_EQUAL = "GREATER_THAN_OR_EQUAL"
+    IN = "IN"
+    NOT_IN = "NOT_IN"
+    ILIKE = "ILIKE"
+    LIKE = "LIKE"
+    IS_NOT_NULL = "IS_NOT_NULL"
+    IS_NULL = "IS_NULL"
+    LATEST_PARTITION = "LATEST_PARTITION"
+    IS_TRUE = "IS_TRUE"
+    IS_FALSE = "IS_FALSE"
+
+    @classmethod
+    def from_operator(cls, operator: FilterOperator) -> Self:
+        return getattr(cls, operator.name)
+
+
+class TimeGrain(StrEnum):
+    SECOND = "PT1S"
+    FIVE_SECONDS = "PT5S"
+    THIRTY_SECONDS = "PT30S"
+    MINUTE = "PT1M"
+    FIVE_MINUTES = "PT5M"
+    TEN_MINUTES = "PT10M"
+    FIFTEEN_MINUTES = "PT15M"
+    THIRTY_MINUTES = "PT30M"
+    HALF_HOUR = "PT0.5H"
+    HOUR = "PT1H"
+    SIX_HOURS = "PT6H"
+    DAY = "P1D"
+    WEEK = "P1W"
+    WEEK_STARTING_SUNDAY = "1969-12-28T00:00:00Z/P1W"
+    WEEK_STARTING_MONDAY = "1969-12-29T00:00:00Z/P1W"
+    WEEK_ENDING_SATURDAY = "P1W/1970-01-03T00:00:00Z"
+    WEEK_ENDING_SUNDAY = "P1W/1970-01-04T00:00:00Z"
+    MONTH = "P1M"
+    QUARTER = "P3M"
+    QUARTER_YEAR = "P0.25Y"
+    YEAR = "P1Y"
+
+
+class ChartDataResultFormat(StrEnum):
+    CSV = "csv"
+    JSON = "json"
+    XLSX = "xlsx"
+
+    @classmethod
+    def table_like(cls) -> set[Self]:
+        return {cls.CSV, cls.XLSX}
+
+
+class ChartDataResultType(StrEnum):
+    COLUMNS = "columns"
+    FULL = "full"
+    QUERY = "query"
+    RESULTS = "results"
+    SAMPLES = "samples"
+    TIMEGRAINS = "timegrains"
+    POST_PROCESSED = "post_processed"
+    DRILL_DETAIL = "drill_detail"
diff --git a/mitm_tooling/transformation/superset/definitions/core.py b/mitm_tooling/transformation/superset/definitions/core.py
new file mode 100644
index 0000000..dc0fab3
--- /dev/null
+++ b/mitm_tooling/transformation/superset/definitions/core.py
@@ -0,0 +1,193 @@
+from abc import ABC, abstractmethod
+from datetime import datetime, tzinfo, UTC
+from enum import StrEnum, IntEnum
+from typing import Any, Annotated, Literal, Self, Union
+
+import pydantic
+from uuid import UUID
+
+from pydantic import Field, AnyUrl
+
+from mitm_tooling.data_types import MITMDataType
+from .constants import *
+
+
+class SupersetPostProcessing(pydantic.BaseModel, ABC):
+    @pydantic.computed_field()
+    @property
+    def operation(self) -> str:
+        raise NotImplementedError()
+
+
+class DatasourceIdentifier(pydantic.BaseModel):
+    id: SupersetId
+    type: Literal['table', 'annotation'] = 'table'
+
+    dataset_uuid: StrUUID = pydantic.Field(exclude=True)
+
+    @property
+    def datasource_uid(self):
+        return f'{self.id}__{self.type}'
+
+
+class SupersetColumn(pydantic.BaseModel):
+    column_name: str
+    verbose_name: str | None = None
+    id: SupersetId | None = None
+    is_dttm: bool = False
+    is_active: bool = True
+    type: str = str(MITMDataType.Text.sa_sql_type)
+    type_generic: GenericDataType = GenericDataType.STRING
+    advanced_data_type: str | None = None
+    groupby: bool = True
+    filterable: bool = True
+    expression: str | None = None
+    description: str | None = None
+    python_date_format: str = None
+    extra: dict[str, Any] = pydantic.Field(default_factory=dict)
+
+
+class IdentifiedSupersetColumn(SupersetColumn):
+    id: SupersetId
+
+
+class SupersetMetric(pydantic.BaseModel):
+    metric_name: str
+    verbose_name: str
+    expression: str
+    metric_type: str | None = None
+    description: str | None = None
+    d3format: str | None = None
+    currency: str | None = None
+    extra: dict[str, Any] = Field(default_factory=dict)
+    warning_text: str | None = None
+
+
+class SupersetAdhocFilter(pydantic.BaseModel):
+    clause: str = 'WHERE'
+    subject: ColumnName
+    operator: FilterOperator
+    operatorId: FilterStringOperators | None = None
+    comparator: str | None = 'No filter'
+    expressionType: ExpressionType = ExpressionType.SIMPLE
+    isExtra: bool = False
+    isNew: bool = False
+    sqlExpression: str | None = None
+
+
+class SupersetAdhocMetric(pydantic.BaseModel):
+    label: str
+    column: SupersetColumn
+    expressionType: ExpressionType = ExpressionType.SIMPLE
+    aggregate: SupersetAggregate = SupersetAggregate.COUNT
+    sqlExpression: str | None = None
+    datasourceWarning: bool = False
+    hasCustomLabel: bool = False
+    optionName: str | None = None
+
+
+class SupersetAdhocColumn(pydantic.BaseModel):
+    label: str
+    sqlExpression: str
+    columnType: str = 'BASE_AXIS'
+    expressionType: str = 'SQL'
+    timeGrain: TimeGrain | None = None
+
+
+OrderBy = tuple[SupersetAdhocMetric | str, bool]
+
+
+class AnnotationOverrides(pydantic.BaseModel):
+    time_range: str | None = None
+
+
+class AnnotationLayer(pydantic.BaseModel):
+    name: str
+    value: int
+    annotationType: AnnotationType
+    sourceType: AnnotationSource = AnnotationSource.Table
+    opacity: str = ''
+    overrides: AnnotationOverrides
+    hideLine: bool = False
+    show: bool = False
+    showLabel: bool = False
+    showMarkers: bool = False
+    style: str = 'solid'
+    width: int = 1
+
+
+class TimeAnnotationLayer(AnnotationLayer):
+    annotationType: Literal[AnnotationType.Event, AnnotationType.Interval] = AnnotationType.Event
+    titleColumn: str
+    timeColumn: str = 'time'
+    intervalEndColumn: str = ''
+    color: str | None = None
+    descriptionColumns: list[str] = pydantic.Field(default_factory=list)
+
+
+class QueryObjectFilterClause(pydantic.BaseModel):
+    col: ColumnName
+    op: FilterOperator
+    val: FilterValues | None = None
+    grain: str | None = None
+    isExtra: bool | None = None
+
+    @classmethod
+    def from_adhoc_filter(cls, adhoc_filter: SupersetAdhocFilter) -> Self:
+        return cls(col=adhoc_filter.subject, op=adhoc_filter.operator, val=adhoc_filter.comparator)
+
+
+class QueryObjectExtras(pydantic.BaseModel):
+    having: str = ''
+    where: str = ''
+    time_grain_sqla: TimeGrain | None = None
+
+
+AnnotationLayers = Annotated[list[AnnotationLayer] | None, pydantic.SerializeAsAny]
+PostProcessingList = Annotated[list[SupersetPostProcessing | dict[str, Any]], pydantic.SerializeAsAny]
+
+
+class QueryObject(pydantic.BaseModel):
+    annotation_layers: AnnotationLayers = pydantic.Field(default_factory=list)
+    applied_time_extras: dict[str, str] = pydantic.Field(default_factory=dict)
+    columns: list[ColumnName | SupersetAdhocColumn] = pydantic.Field(default_factory=list)
+    datasource: DatasourceIdentifier | None = None
+    extras: QueryObjectExtras = pydantic.Field(default_factory=QueryObjectExtras)
+    filters: list[QueryObjectFilterClause] = pydantic.Field(default_factory=list)
+    metrics: list[SupersetAdhocMetric] | None = None
+    granularity: str | None = None
+    from_dttm: StrDatetime | None = None
+    to_dttm: StrDatetime | None = None
+    inner_from_dttm: StrDatetime | None = None
+    inner_to_dttm: StrDatetime | None = None
+    is_rowcount: bool = False
+    is_timeseries: bool | None = None
+    order_desc: bool = True
+    orderby: list[OrderBy] = pydantic.Field(default_factory=list)
+    post_processing: PostProcessingList = pydantic.Field(default_factory=list)
+    result_type: ChartDataResultType | None = None
+    row_limit: int | None = None
+    row_offset: int | None = None
+    series_columns: list[ColumnName] = pydantic.Field(default_factory=list)
+    series_limit: int = 0
+    series_limit_metric: SupersetAdhocMetric | None = None
+    time_offsets: list[str] = pydantic.Field(default_factory=list)
+    time_shift: str | None = None
+    time_range: str | None = None
+    url_params: dict[str, str] | None = pydantic.Field(default_factory=dict)
+
+
+class FormData(pydantic.BaseModel):
+    pass
+
+
+class QueryContext(pydantic.BaseModel):
+    model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
+
+    datasource: DatasourceIdentifier
+    queries: list[QueryObject] = pydantic.Field(default_factory=list)
+    form_data: Annotated[FormData | dict[str, Any] | None, pydantic.SerializeAsAny, pydantic.Field(default=None)]
+    result_type: ChartDataResultType = ChartDataResultType.FULL
+    result_format: ChartDataResultFormat = ChartDataResultFormat.JSON
+    force: bool = False
+    custom_cache_timeout: int | None = None
diff --git a/mitm_tooling/transformation/superset/definitions/high_level.py b/mitm_tooling/transformation/superset/definitions/high_level.py
new file mode 100644
index 0000000..1f499b0
--- /dev/null
+++ b/mitm_tooling/transformation/superset/definitions/high_level.py
@@ -0,0 +1,147 @@
+from abc import ABC, abstractmethod
+from collections import defaultdict
+from datetime import datetime, tzinfo, UTC
+from enum import StrEnum, IntEnum
+from typing import Any, Annotated, Literal, Self, Union
+
+import pydantic
+from uuid import UUID
+
+from pydantic import Field, AnyUrl
+
+from mitm_tooling.data_types import MITMDataType
+from .constants import *
+from .core import *
+from .charts import *
+from .post_processing import *
+
+
+class MetadataType(StrEnum):
+    Database = 'Database'
+    SqlaTable = 'SqlaTable'
+    Slice = 'Slice'
+    Chart = 'Chart'
+
+
+class SupersetDefFile(pydantic.BaseModel, ABC):
+    model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
+
+    @property
+    @abstractmethod
+    def filename(self) -> str:
+        pass
+
+
+class SupersetMetadataDef(SupersetDefFile):
+    version: str = '1.0.0'
+    type: MetadataType = MetadataType.SqlaTable
+    timestamp: StrDatetime = pydantic.Field(default_factory=lambda: datetime.now(UTC))
+
+    @property
+    def filename(self) -> str:
+        return 'metadata'
+
+
+class SupersetDatabaseDef(SupersetDefFile):
+    database_name: str
+    sqlalchemy_uri: StrUrl
+    uuid: StrUUID
+    cache_timeout: str | None = None
+    expose_in_sqllab: bool = True
+    allow_run_async: bool = False
+    allow_ctas: bool = False
+    allow_cvas: bool = False
+    allow_dml: bool = False
+    allow_file_upload: bool = False
+    extra: dict[str, Any] = pydantic.Field(default_factory=lambda: {
+        'allows_virtual_table_explore': True
+    })
+    impersonate_user: bool = False
+    version: str = '1.0.0'
+    ssh_tunnel: None = None
+
+    @property
+    def filename(self):
+        return self.database_name
+
+
+class SupersetDatasetDef(SupersetDefFile):
+    model_config = pydantic.ConfigDict(populate_by_name=True)
+
+    table_name: str
+    schema_name: str = pydantic.Field(alias='schema')
+    uuid: StrUUID
+    database_uuid: StrUUID
+    main_dttm_col: str | None = None
+    description: str | None = None
+    default_endpoint: str | None = None
+    offset: int = 0
+    cache_timeout: str | None = None
+    catalog: str | None = None
+    sql: str | None = None
+    params: Any = None
+    template_params: Any = None
+    filter_select_enabled: bool = True
+    fetch_values_predicate: str | None = None
+    extra: dict[str, Any] = pydantic.Field(default_factory=dict)
+    normalize_columns: bool = False
+    always_filter_main_dttm: bool = False
+    metrics: list[SupersetMetric] = pydantic.Field(default_factory=list)
+    columns: list[SupersetColumn] = pydantic.Field(default_factory=list)
+    version: str = '1.0.0'
+
+    @property
+    def filename(self):
+        return self.table_name
+
+
+class SupersetChartDef(SupersetDefFile):
+    uuid: StrUUID
+    slice_name: str
+    viz_type: SupersetVizType
+    dataset_uuid: StrUUID
+    description: str | None = None
+    certified_by: str | None = None
+    certification_details: str | None = None
+    params: Annotated[ChartParams | dict[str, Any], pydantic.SerializeAsAny, pydantic.Field(default_factory=dict)]
+    query_context: Annotated[pydantic.Json | QueryContext | None, pydantic.PlainSerializer(
+        lambda x: x.model_dump_json(by_alias=True, exclude_none=True) if isinstance(x, pydantic.BaseModel) else x,
+        return_type=pydantic.Json), pydantic.Field(default=None)]
+    cache_timeout: int | None = None
+    version: str = '1.0.0'
+    is_managed_externally: bool = False
+    external_url: StrUrl | None = None
+
+    @property
+    def filename(self) -> str:
+        return f'{self.slice_name}_{self.dataset_uuid}'
+
+
+class SupersetDashboardDef(SupersetDefFile):
+
+    @property
+    def filename(self) -> str:
+        return 'dashboard'
+
+
+class SupersetAssetsDef(pydantic.BaseModel):
+    databases: list[SupersetDatabaseDef] | None = None
+    datasets: list[SupersetDatasetDef] | None = None
+    charts: list[SupersetChartDef] | None = None
+    dashboards: list[SupersetDashboardDef] | None = None
+    metadata: SupersetMetadataDef = pydantic.Field(default_factory=SupersetMetadataDef)
+
+    def to_folder_structure(self) -> dict[str, Any]:
+        folder = {'.': self.metadata}
+        dbs = {}
+        if self.databases:
+            dbs |= {db.uuid: db.database_name for db in self.databases}
+            folder['databases'] = [db for db in self.databases]
+        if self.datasets:
+            db_dss = defaultdict(list)
+            for ds in self.datasets:
+                db_dss[dbs[ds.database_uuid]].append(ds)
+            folder['datasets'] = db_dss
+        if self.charts:
+            folder['charts'] = self.charts
+        return {'my_import': folder}
diff --git a/mitm_tooling/transformation/superset/definitions/post_processing.py b/mitm_tooling/transformation/superset/definitions/post_processing.py
new file mode 100644
index 0000000..38612f7
--- /dev/null
+++ b/mitm_tooling/transformation/superset/definitions/post_processing.py
@@ -0,0 +1,43 @@
+import pydantic
+
+from .core import *
+
+
+class PivotOperator(pydantic.BaseModel):
+    operator: str = 'mean'
+
+
+class PivotOptions(pydantic.BaseModel):
+    aggregates: list[dict[ColumnName, PivotOperator]]
+    columns: list[ColumnName] = pydantic.Field(default_factory=list)
+    index: list[ColumnName] = pydantic.Field(default_factory=list)
+    drop_missing_columns: bool = False
+
+
+class Pivot(SupersetPostProcessing):
+    @property
+    def operation(self) -> str:
+        return 'pivot'
+
+    options: PivotOptions
+
+
+class RenameOptions(pydantic.BaseModel):
+    columns: dict[ColumnName, ColumnName | None] = pydantic.Field(default_factory=dict)
+    level: int = 0
+    inplace: bool | None = True
+
+
+class Rename(SupersetPostProcessing):
+    @property
+    def operation(self) -> str:
+        return 'flatten'
+
+    options: RenameOptions
+
+
+class Flatten(SupersetPostProcessing):
+    @property
+    def operation(self) -> str:
+        return 'flatten'
+
diff --git a/mitm_tooling/transformation/superset/factories/__init__.py b/mitm_tooling/transformation/superset/factories/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/mitm_tooling/transformation/superset/factories/charts.py b/mitm_tooling/transformation/superset/factories/charts.py
new file mode 100644
index 0000000..63a28c8
--- /dev/null
+++ b/mitm_tooling/transformation/superset/factories/charts.py
@@ -0,0 +1,108 @@
+from pydantic import UUID4
+
+from mitm_tooling.data_types import MITMDataType
+from .core import mk_empty_adhoc_time_filter, mk_adhoc_metric, mk_pivot_post_processing, mk_time_avg_post_processing, \
+    mk_adhoc_column
+from .query import mk_query_object, mk_query_object_filter_clause, mk_query_context, \
+    mk_empty_query_object_time_filter_clause
+from .utils import mk_uuid
+from ..definitions import SupersetChartDef, PieChartParams, DatasourceIdentifier, ColumnName, SupersetAggregate, \
+    SupersetVizType, QueryContext, TimeSeriesBarParams, TimeGrain, QueryObjectFilterClause, SupersetAdhocFilter, \
+    TimeSeriesLineParams, QueryObjectExtras
+
+
+def mk_pie_chart(name: str, datasource_identifier: DatasourceIdentifier, col: ColumnName, dt: MITMDataType,
+                 groupby_cols: list[ColumnName], uuid: UUID4 | None = None) -> SupersetChartDef:
+    metric = mk_adhoc_metric(col, agg=SupersetAggregate.COUNT, dt=dt)
+    params = PieChartParams(datasource=datasource_identifier,
+                            metric=metric,
+                            groupby=groupby_cols,
+                            adhoc_filters=[mk_empty_adhoc_time_filter()])
+
+    qo = mk_query_object([col], metrics=[metric], filters=[mk_empty_query_object_time_filter_clause()])
+    qc = mk_query_context(datasource=datasource_identifier, queries=[qo], form_data=params)
+
+    return SupersetChartDef(slice_name=name,
+                            viz_type=SupersetVizType.PIE,
+                            dataset_uuid=datasource_identifier.dataset_uuid,
+                            params=params,
+                            query_context=qc,
+                            uuid=uuid or mk_uuid())
+
+
+def mk_time_series_bar_chart(name: str,
+                             datasource_identifier: DatasourceIdentifier,
+                             y_col: ColumnName,
+                             y_dt: MITMDataType, x_col: ColumnName,
+                             groupby_cols: list[ColumnName],
+                             filters: list[SupersetAdhocFilter] | None = None,
+                             uuid: UUID4 | None = None,
+                             time_grain: TimeGrain | None = None) -> SupersetChartDef:
+    metric = mk_adhoc_metric(y_col, agg=SupersetAggregate.COUNT, dt=y_dt)
+    adhoc_filters = [mk_empty_adhoc_time_filter()]
+    if filters:
+        adhoc_filters.extend(filters)
+    params = TimeSeriesBarParams(datasource=datasource_identifier,
+                                 metrics=[metric],
+                                 groupby=groupby_cols,
+                                 adhoc_filters=adhoc_filters,
+                                 x_axis=x_col,
+                                 time_grain_sqla=time_grain
+                                 )
+
+    pp = mk_pivot_post_processing(x_col, cols=[y_col], aggregations={metric.label: 'mean'},
+                                  renames={metric.label: None})
+    adhoc_x = mk_adhoc_column(x_col, timeGrain=time_grain)
+    cols = list(set([adhoc_x, y_col] + groupby_cols))
+    qo = mk_query_object(columns=cols,
+                         metrics=[metric],
+                         filters=[QueryObjectFilterClause.from_adhoc_filter(af) for af in adhoc_filters],
+                         post_processing=pp,
+                         series_columns=[y_col])
+    qc = mk_query_context(datasource=datasource_identifier, queries=[qo], form_data=params)
+
+    return SupersetChartDef(slice_name=name,
+                            viz_type=SupersetVizType.TIMESERIES_BAR,
+                            dataset_uuid=datasource_identifier.dataset_uuid,
+                            params=params,
+                            query_context=qc,
+                            uuid=uuid or mk_uuid())
+
+
+def mk_count_time_series_chart(name: str,
+                               datasource_identifier: DatasourceIdentifier,
+                               groupby_cols: list[ColumnName],
+                               time_col: ColumnName = 'time',
+                               filters: list[SupersetAdhocFilter] | None = None,
+                               uuid: UUID4 | None = None,
+                               time_grain: TimeGrain | None = None):
+    metric = mk_adhoc_metric(time_col, agg=SupersetAggregate.COUNT, dt=MITMDataType.Datetime)
+    adhoc_filters = [mk_empty_adhoc_time_filter()]
+    if filters:
+        adhoc_filters.extend(filters)
+    params = TimeSeriesLineParams(datasource=datasource_identifier,
+                                  metrics=[metric],
+                                  groupby=groupby_cols,
+                                  adhoc_filters=adhoc_filters,
+                                  x_axis=time_col,
+                                  time_grain_sqla=time_grain
+                                  )
+
+    pp = mk_pivot_post_processing(time_col, cols=groupby_cols, aggregations={metric.label: 'mean'},
+                                  renames={metric.label: None})
+    adhoc_time_col = mk_adhoc_column(time_col, timeGrain=time_grain)
+    cols = list(set([adhoc_time_col] + groupby_cols))
+    qo = mk_query_object(columns=cols,
+                         metrics=[metric],
+                         filters=[QueryObjectFilterClause.from_adhoc_filter(af) for af in adhoc_filters],
+                         post_processing=pp,
+                         series_columns=groupby_cols,
+                         extras=QueryObjectExtras(time_grain_sqla=time_grain))
+    qc = mk_query_context(datasource=datasource_identifier, queries=[qo], form_data=params)
+
+    return SupersetChartDef(slice_name=name,
+                            viz_type=SupersetVizType.TIMESERIES_LINE,
+                            dataset_uuid=datasource_identifier.dataset_uuid,
+                            params=params,
+                            query_context=qc,
+                            uuid=uuid or mk_uuid())
diff --git a/mitm_tooling/transformation/superset/factories/core.py b/mitm_tooling/transformation/superset/factories/core.py
new file mode 100644
index 0000000..3e08279
--- /dev/null
+++ b/mitm_tooling/transformation/superset/factories/core.py
@@ -0,0 +1,90 @@
+import uuid
+from typing import overload
+
+import pydantic
+
+from ..definitions import *
+import sqlalchemy as sa
+
+
+def mk_pivot_post_processing(index_col: ColumnName, cols: list[ColumnName], aggregations: dict[ColumnName, str],
+                             renames: dict[ColumnName, ColumnName] | None = None) -> list[
+    SupersetPostProcessing]:
+    pp = [Pivot(options=PivotOptions(aggregates=[{c: PivotOperator(operator=m)} for c, m in aggregations.items()],
+                                     columns=cols,
+                                     index=[index_col]))]
+    if renames:
+        pp.append(Rename(options=RenameOptions(columns={c: rn for c, rn in renames.items()})))
+    pp.append(Flatten())
+    return pp
+
+
+def mk_count_pivot_post_processing(cols: list[ColumnName], agg_cols: list[ColumnName]) -> list[SupersetPostProcessing]:
+    return mk_pivot_post_processing('time', cols, aggregations={f'AVG({c})': 'mean' for c in agg_cols})
+
+
+def mk_time_avg_post_processing(cols: list[ColumnName], agg_cols: list[ColumnName]) -> list[SupersetPostProcessing]:
+    return mk_pivot_post_processing('time', cols, aggregations={f'AVG({c})': 'mean' for c in agg_cols})
+
+
+def mk_adhoc_metric(col: ColumnName, agg: SupersetAggregate = SupersetAggregate.AVG,
+                    dt: MITMDataType = MITMDataType.Numeric, col_id: int | None = None,
+                    **kwargs) -> SupersetAdhocMetric:
+    return SupersetAdhocMetric(label=f'{agg}({col})', aggregate=agg,
+                               column=mk_column(col, dt, col_id), **kwargs)
+
+
+def mk_adhoc_metrics(cols: list[ColumnName], agg: SupersetAggregate = SupersetAggregate.AVG,
+                     dt: MITMDataType = MITMDataType.Numeric, **kwargs) -> list[
+    SupersetAdhocMetric]:
+    return [mk_adhoc_metric(c, agg=agg, dt=dt, **kwargs) for c in cols]
+
+
+def mk_metric(col: ColumnName, agg: SupersetAggregate, **kwargs) -> SupersetMetric:
+    name = f'{agg}({col})'
+    return SupersetMetric(metric_name=name, verbose_name=name, expression=name, **kwargs)
+
+
+def mk_metrics(cols: list[ColumnName], agg: SupersetAggregate = SupersetAggregate.AVG, **kwargs) -> list[
+    SupersetMetric]:
+    return [mk_metric(c, agg, **kwargs) for c in cols]
+
+
+@overload
+def mk_column(col: ColumnName, dt: MITMDataType, dialect: sa.Dialect | None = None, **kwargs) -> SupersetColumn:
+    pass
+
+
+@overload
+def mk_column(col: ColumnName, dt: MITMDataType, col_id: SupersetId,
+              dialect: sa.Dialect | None = None, **kwargs) -> IdentifiedSupersetColumn:
+    pass
+
+
+def mk_column(col: ColumnName, dt: MITMDataType, col_id: SupersetId | None = None,
+              dialect: sa.Dialect | None = None, **kwargs) -> SupersetColumn:
+    args = dict(column_name=col,
+                is_dttm=dt is MITMDataType.Datetime,
+                groupby=dt not in {MITMDataType.Json,
+                                   MITMDataType.Numeric},
+                type=(dt.sa_sql_type or MITMDataType.Text.sa_sql_type).compile(
+                    dialect=dialect),
+                type_generic=GenericDataType.from_mitm_dt(dt)) | kwargs
+    if col_id is not None:
+        return IdentifiedSupersetColumn(**args, id=col_id)
+    else:
+        return SupersetColumn(**args)
+
+
+def mk_adhoc_column(col: ColumnName, **kwargs) -> SupersetAdhocColumn:
+    return SupersetAdhocColumn(label=col, sqlExpression=col, **kwargs)
+
+
+def mk_adhoc_filter(col: ColumnName, op: FilterOperator, comp: str | None = 'No Filter',
+                    **kwargs) -> SupersetAdhocFilter:
+    return SupersetAdhocFilter(subject=col, operator=op, operatorId=FilterStringOperators.from_operator(op),
+                               comparator=comp, **kwargs)
+
+
+def mk_empty_adhoc_time_filter() -> SupersetAdhocFilter:
+    return mk_adhoc_filter('time', FilterOperator.TEMPORAL_RANGE)
diff --git a/mitm_tooling/transformation/superset/factories/dataset.py b/mitm_tooling/transformation/superset/factories/dataset.py
new file mode 100644
index 0000000..e9ab09d
--- /dev/null
+++ b/mitm_tooling/transformation/superset/factories/dataset.py
@@ -0,0 +1,27 @@
+import pydantic
+import sqlalchemy as sa
+from mitm_tooling.transformation.superset.definitions import SupersetDatasetDef, SupersetMetric
+from mitm_tooling.data_types import MITMDataType
+from mitm_tooling.extraction.sql.data_models import TableMetaInfo
+from .core import mk_column, mk_metric
+from .utils import mk_uuid
+from ..definitions import SupersetAggregate
+
+
+def mk_dataset_def(tm: TableMetaInfo, database_uuid: pydantic.UUID4, dialect: sa.Dialect | None = None,
+                   uuid: pydantic.UUID4 | None = None) -> SupersetDatasetDef:
+    cols = []
+    metrics = [mk_metric('', SupersetAggregate.COUNT)]
+    for c in tm.columns:
+        dt = tm.column_properties[c].mitm_data_type
+        cols.append(
+            mk_column(c, dt, dialect=dialect),
+        )
+        if dt in {MITMDataType.Numeric, MITMDataType.Integer}:
+            metrics.extend((
+                mk_metric(c, SupersetAggregate.AVG),
+                mk_metric(c, SupersetAggregate.SUM),
+            ))
+
+    return SupersetDatasetDef(table_name=tm.name, schema=tm.schema_name, uuid=uuid or mk_uuid(),
+                              database_uuid=database_uuid, columns=cols, metrics=metrics)
diff --git a/mitm_tooling/transformation/superset/factories/datasource.py b/mitm_tooling/transformation/superset/factories/datasource.py
new file mode 100644
index 0000000..8b2bf5e
--- /dev/null
+++ b/mitm_tooling/transformation/superset/factories/datasource.py
@@ -0,0 +1,9 @@
+import pydantic
+from pydantic import AnyUrl, UUID4
+
+from .utils import mk_uuid
+from ..definitions import SupersetDatabaseDef
+
+
+def mk_datasource(name: str, sqlalchemy_uri: AnyUrl, uuid: UUID4 | None = None) -> SupersetDatabaseDef:
+    return SupersetDatabaseDef(database_name=name, sqlalchemy_uri=sqlalchemy_uri, uuid=uuid or mk_uuid())
diff --git a/mitm_tooling/transformation/superset/factories/query.py b/mitm_tooling/transformation/superset/factories/query.py
new file mode 100644
index 0000000..8ec1d00
--- /dev/null
+++ b/mitm_tooling/transformation/superset/factories/query.py
@@ -0,0 +1,33 @@
+
+from ..definitions import QueryObject, QueryContext, ColumnName, DatasourceIdentifier, SupersetAdhocMetric, \
+    SupersetAdhocFilter, QueryObjectFilterClause, FormData, FilterValues, FilterOperator, SupersetPostProcessing
+from .core import mk_adhoc_metric, mk_adhoc_metrics
+
+
+def mk_query_object_filter_clause(col: ColumnName, op: FilterOperator,
+                                  val: FilterValues | None = None, **kwargs) -> QueryObjectFilterClause:
+    return QueryObjectFilterClause(col=col, op=op, val=val, **kwargs)
+
+
+def mk_empty_query_object_time_filter_clause() -> QueryObjectFilterClause:
+    return mk_query_object_filter_clause('time', FilterOperator.TEMPORAL_RANGE)
+
+
+def mk_query_object(columns: list[ColumnName],
+                    metrics: list[SupersetAdhocMetric],
+                    filters: list[QueryObjectFilterClause],
+                    orderby: list[tuple[SupersetAdhocMetric, bool]] | None = None,
+                    post_processing: list[SupersetPostProcessing] | None = None,
+                    row_limit: int | None = 10_000,
+                    **kwargs) -> QueryObject:
+    if orderby is None:
+        orderby = [(metrics[0], 0)]
+    if post_processing is None:
+        post_processing = []
+    return QueryObject(columns=columns, metrics=metrics, filters=filters, orderby=orderby, post_processing=post_processing,
+                       row_limit=row_limit, **kwargs)
+
+
+def mk_query_context(datasource: DatasourceIdentifier, queries: list[QueryObject], form_data: FormData,
+                     **kwargs) -> QueryContext:
+    return QueryContext(datasource=datasource, queries=queries, form_data=form_data, **kwargs)
diff --git a/mitm_tooling/transformation/superset/factories/utils.py b/mitm_tooling/transformation/superset/factories/utils.py
new file mode 100644
index 0000000..d98c6f0
--- /dev/null
+++ b/mitm_tooling/transformation/superset/factories/utils.py
@@ -0,0 +1,7 @@
+import uuid
+
+import pydantic
+
+
+def mk_uuid() -> pydantic.UUID4:
+    return uuid.uuid4()
diff --git a/mitm_tooling/transformation/superset/superset_representation.py b/mitm_tooling/transformation/superset/superset_representation.py
index 7fe29aa..3f60b2b 100644
--- a/mitm_tooling/transformation/superset/superset_representation.py
+++ b/mitm_tooling/transformation/superset/superset_representation.py
@@ -1,6 +1,8 @@
 import os.path
 import uuid
 import zipfile
+from typing import TypedDict, Unpack
+
 import sqlalchemy as sa
 import yaml
 from pydantic import AnyUrl
@@ -14,14 +16,21 @@ from mitm_tooling.representation.sql_representation import MITMData, mk_sqlite,
 
 from mitm_tooling.data_types import MITMDataType
 
-from .dataset_definition import SupersetTableDef, SupersetColumnDef, SupersetDatabaseDef, SupersetDef, SupersetDefFile
+from .definitions import SupersetDatasetDef, SupersetColumn, SupersetDatabaseDef, SupersetAssetsDef, \
+    SupersetDefFile, SupersetMetadataDef, SupersetMetric, GenericDataType
+from .factories.dataset import mk_dataset_def
+from .factories.datasource import mk_datasource
+
+
+class KWArguments(TypedDict, total=False):
+    sqlalchemy_uri: AnyUrl
 
 
 def tentative_superset_mount_url(db_name: str) -> AnyUrl:
     return AnyUrl(f'sqlite:////mounted-files/{db_name}.sqlite?check_same_thread=false')
 
 
-def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetDef):
+def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetAssetsDef):
     folder_structure = superset_def.to_folder_structure()
     with use_bytes_io(target, expected_file_ext='.zip', mode='wb', create_file_if_necessary=True) as f:
         with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zf:
@@ -30,7 +39,7 @@ def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetDef):
                     fn = f'{arg.filename}.yaml'
                     if prefix:
                         fn = os.path.join(prefix, fn)
-                    dump = arg.model_dump(by_alias=True, mode='python')
+                    dump = arg.model_dump(by_alias=True, mode='python', exclude_none=True)
                     s = yaml.dump(dump, default_flow_style=False)
 
                     zf.writestr(fn, s)
@@ -55,47 +64,37 @@ def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetDef):
             mk_node(folder_structure)
 
 
-def write_superset_def(output_path: FilePath, superset_def: SupersetDef):
+def write_superset_def(output_path: FilePath, superset_def: SupersetAssetsDef):
     write_superset_def_as_zip(output_path, superset_def)
 
 
-def infer_superset_dataset_def(sqlite_file_path: FilePath) -> SupersetDef:
+def infer_superset_dataset_def(sqlite_file_path: FilePath, **kwargs: Unpack[KWArguments]) -> SupersetAssetsDef:
     engine = create_sa_engine(AnyUrl(f'sqlite:///{str(sqlite_file_path)}'))
     meta, _ = connect_and_reflect(engine)
     db_meta = DBMetaInfo.from_sa_meta(meta, default_schema=SQL_REPRESENTATION_DEFAULT_SCHEMA)
 
+    database_uuid = uuid.uuid4()
     datasets = []
     for schema_name, schema_tables in db_meta.db_structure.items():
-        for table_name, table in schema_tables.items():
-            cols = []
-            for c in table.columns:
-                dt = table.column_properties[c].mitm_data_type
-
-                cols.append(
-                    SupersetColumnDef(column_name=c,
-                                      is_dttm=dt is MITMDataType.Datetime,
-                                      groupby=dt not in {MITMDataType.Json,
-                                                         MITMDataType.Numeric,
-                                                         MITMDataType.Datetime},
-                                      type=(dt.sa_sql_type or MITMDataType.Text.sa_sql_type).compile(
-                                          dialect=engine.dialect)
-                                      ))
-            datasets.append(
-                SupersetTableDef(table_name=table_name, schema_name=schema_name, uuid=uuid.uuid4(), columns=cols))
+        for table_name, tm in schema_tables.items():
+            datasets.append(mk_dataset_def(tm, database_uuid, dialect=engine.dialect))
 
     db_name = os.path.splitext(os.path.basename(sqlite_file_path))[0]
-    return SupersetDef(
-        database=SupersetDatabaseDef(database_name=db_name,
-                                     sqlalchemy_uri=tentative_superset_mount_url(db_name),
-                                     uuid=uuid.uuid4()),
+    sqlalchemy_uri = kwargs.get('sqlalchemy_uri', tentative_superset_mount_url(db_name))
+    return SupersetAssetsDef(
+        databases=[mk_datasource(name=db_name,
+                                 sqlalchemy_uri=sqlalchemy_uri,
+                                 uuid=database_uuid)],
         datasets=datasets)
 
 
-def mk_inferred_superset_dataset_def(output_path: FilePath, sqlite_file_path: FilePath):
-    write_superset_def(output_path, infer_superset_dataset_def(sqlite_file_path))
+def mk_inferred_superset_dataset_def(output_path: FilePath, sqlite_file_path: FilePath, **kwargs: Unpack[KWArguments]):
+    dataset_def = infer_superset_dataset_def(sqlite_file_path, **kwargs)
+    write_superset_def(output_path, dataset_def)
 
 
 def mk_superset_dataset_def(mitm_data: MITMData, sqlite_file_path: str | None = ':memory:',
-                            definition_file_path: str | None = 'superset_definition.zip'):
+                            definition_file_path: str | None = 'superset_definition.zip',
+                            **kwargs: Unpack[KWArguments]):
     engine, sql_rep_schema = mk_sqlite(mitm_data, file_path=sqlite_file_path)
-    mk_inferred_superset_dataset_def(definition_file_path, sqlite_file_path)
+    mk_inferred_superset_dataset_def(definition_file_path, sqlite_file_path, **kwargs)
diff --git a/test/something.py b/test/something.py
index 069b720..4285c17 100644
--- a/test/something.py
+++ b/test/something.py
@@ -2,7 +2,6 @@ import os
 import unittest
 
 
-
 class MyTestCase(unittest.TestCase):
     def test_something(self):
         import mitm_tooling
@@ -20,10 +19,11 @@ class MyTestCase(unittest.TestCase):
             HeaderEntry(concept='segment_data', kind='SD', type_name='annotation_info', attributes=['y'],
                         attribute_dtypes=[MITMDataType.Json]),
         ])
-        meta, tables = mk_db_schema(h)
-        print(meta)
+        sql_rep = mk_db_schema(h)
+        print(sql_rep.meta)
         print()
-        print(tables)
+        print(sql_rep.concept_tables)
+        print(sql_rep.type_tables)
         print()
 
     def test_writing_sqlite(self):
@@ -45,12 +45,26 @@ class MyTestCase(unittest.TestCase):
         from mitm_tooling.io import importing
         from mitm_tooling.definition import MITM
         syn = importing.read_zip('synthetic.maed', MITM.MAED)
-        os.remove('synthetic.sqlite')
+        if os.path.exists('synthetic.sqlite'):
+            os.remove('synthetic.sqlite')
         mk_sqlite(syn, 'synthetic.sqlite')
 
+    def test_with_synthetic_variation(self):
+        from mitm_tooling.representation import mk_sqlite
+        from mitm_tooling.io import importing
+        from mitm_tooling.definition import MITM
+        syn = importing.read_zip('synthetic-variation.maed', MITM.MAED)
+        if os.path.exists('synthetic-variation.sqlite'):
+            os.remove('synthetic-variation.sqlite')
+        mk_sqlite(syn, 'synthetic-variation.sqlite')
+
     def test_superset(self):
         from mitm_tooling.transformation.superset import mk_inferred_superset_dataset_def
-        mk_inferred_superset_dataset_def('superset_import.zip', 'synthetic.sqlite')
+        mk_inferred_superset_dataset_def('superset_import', 'synthetic.sqlite')
+
+    def test_superset_variation(self):
+        from mitm_tooling.transformation.superset import mk_inferred_superset_dataset_def
+        mk_inferred_superset_dataset_def('superset_import_variation', 'synthetic-variation.sqlite')
 
 
 if __name__ == '__main__':
-- 
GitLab