diff --git a/mitm_tooling/extraction/sql/data_models/__init__.py b/mitm_tooling/extraction/sql/data_models/__init__.py index dfb226d78013b6c733300f94f53033691d08aefc..134a447aabfd06c76826bdbd77bcf442c5430a72 100644 --- a/mitm_tooling/extraction/sql/data_models/__init__.py +++ b/mitm_tooling/extraction/sql/data_models/__init__.py @@ -1,5 +1,5 @@ # noinspection PyUnresolvedReferences -from .db_meta import Queryable, TableMetaInfo, DBMetaInfo, ForeignKeyConstraint, ExplicitTableSelection, \ +from .db_meta import Queryable, ColumnProperties, TableMetaInfo, DBMetaInfo, ForeignKeyConstraint, ExplicitTableSelection, \ ExplicitColumnSelection, ExplicitSelectionUtils, ColumnName # noinspection PyUnresolvedReferences from .db_probe import TableProbe, DBProbe, SampleSummary diff --git a/mitm_tooling/transformation/superset/dataset_definition.py b/mitm_tooling/transformation/superset/dataset_definition.py deleted file mode 100644 index 5eb9c405187cba11d3ed0e330f9ff85562720e80..0000000000000000000000000000000000000000 --- a/mitm_tooling/transformation/superset/dataset_definition.py +++ /dev/null @@ -1,140 +0,0 @@ -from abc import ABC, abstractmethod -from datetime import datetime, tzinfo -from typing import Any, Annotated, Literal - -import pydantic -from uuid import UUID - -from pydantic import Field - -from mitm_tooling.data_types import MITMDataType - -BetterUUID = Annotated[ - UUID, - pydantic.BeforeValidator(lambda x: UUID(x) if isinstance(x, str) else x), - pydantic.PlainSerializer(lambda x: str(x)), - pydantic.Field( - description="Better annotation for UUID, parses from string format. Serializes to string format." - ), -] - - -class SupersetDefFile(pydantic.BaseModel, ABC): - model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) - - @property - @abstractmethod - def filename(self) -> str: - pass - - -class SupersetDatabaseDef(SupersetDefFile): - database_name: str - sqlalchemy_uri: pydantic.AnyUrl - uuid: BetterUUID - cache_timeout: str | None = None - expose_in_sqllab: bool = True - allow_run_async: bool = False - allow_ctas: bool = False - allow_cvas: bool = False - allow_dml: bool = False - allow_file_upload: bool = False - extra: dict[str, Any] = pydantic.Field(default_factory=lambda: { - 'allows_virtual_table_explore': True - }) - impersonate_user: bool = False - version: str = '1.0.0' - ssh_tunnel: None = None - - @property - def filename(self): - return self.database_name - - -class SupersetMetricDef(pydantic.BaseModel): - metric_name: str - verbose_name: str - expression: str - metric_type: str | None = None - description: str | None = None - d3format: str | None = None - currency: str | None = None - extra: dict[str, Any] = Field(default_factory=dict) - warning_text: str | None = None - - -class SupersetColumnDef(pydantic.BaseModel): - column_name: str - verbose_name: str | None = None - is_dttm: bool = False - is_active: bool = True - type: str = str(MITMDataType.Text.sa_sql_type) - advanced_data_type: str | None = None - groupby: bool = True - filterable: bool = True - expression: str | None = None - description: str | None = None - python_date_format: str = None - extra: dict[str, Any] = pydantic.Field(default_factory=dict) - - -class SupersetDatasetDef(SupersetDefFile): - model_config = pydantic.ConfigDict(populate_by_name=True) - - table_name: str - schema_name: str = pydantic.Field(alias='schema') - uuid: BetterUUID - database_uuid: BetterUUID - main_dttm_col: str | None = None - description: str | None = None - default_endpoint: str | None = None - offset: int = 0 - cache_timeout: str | None = None - catalog: str | None = None - sql: str | None = None - params: Any = None - template_params: Any = None - filter_select_enabled: bool = True - fetch_values_predicate: str | None = None - extra: dict[str, Any] = pydantic.Field(default_factory=dict) - normalize_columns: bool = False - always_filter_main_dttm: bool = False - metrics: list[SupersetMetricDef] = pydantic.Field(default_factory=list) - columns: list[SupersetColumnDef] = pydantic.Field(default_factory=list) - version: str = '1.0.0' - - @property - def filename(self): - return self.table_name - - -StrDatetime = Annotated[datetime, -pydantic.BeforeValidator(lambda x: datetime.fromisoformat(x) if isinstance(x, str) else x), -pydantic.PlainSerializer(lambda x: str(x)), -pydantic.Field( - description="Better annotation for datetime, parses from string format. Serializes to string format." -)] - -MetadataType = Literal['Database', 'SqlaTable', 'Slice'] - - -class SupersetMetadataDef(SupersetDefFile): - version: str = '1.0.0' - type: MetadataType = 'SqlaTable' - timestamp: StrDatetime = pydantic.Field(default_factory=datetime.utcnow) - - @property - def filename(self) -> str: - return 'metadata' - - -class SupersetDef(pydantic.BaseModel): - database: SupersetDatabaseDef - datasets: list[SupersetDatasetDef] - metadata: SupersetMetadataDef = pydantic.Field(default_factory=SupersetMetadataDef) - - def to_folder_structure(self) -> dict[str, Any]: - db_name = self.database.database_name - folder = {'.': self.metadata, 'databases': [{db_name: self.database}], - 'datasets': {db_name: list(self.datasets)}} - return {'my_import': folder} diff --git a/mitm_tooling/transformation/superset/definitions/__init__.py b/mitm_tooling/transformation/superset/definitions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9226df6ada7db584d8851ae1567f6372c800234f --- /dev/null +++ b/mitm_tooling/transformation/superset/definitions/__init__.py @@ -0,0 +1,5 @@ +from .constants import * +from .core import * +from .post_processing import * +from .charts import * +from .high_level import * \ No newline at end of file diff --git a/mitm_tooling/transformation/superset/definitions/charts.py b/mitm_tooling/transformation/superset/definitions/charts.py new file mode 100644 index 0000000000000000000000000000000000000000..6d99e1bd6a4773318f06d5453c0003e087a017e9 --- /dev/null +++ b/mitm_tooling/transformation/superset/definitions/charts.py @@ -0,0 +1,86 @@ +from abc import ABC, abstractmethod +from datetime import datetime, tzinfo, UTC +from enum import StrEnum, IntEnum +from typing import Any, Annotated, Literal, Self, Union + +import pydantic +from uuid import UUID + +from pydantic import Field, AnyUrl + +from mitm_tooling.data_types import MITMDataType +from .core import * +from .constants import * + + +class ChartParams(FormData): + datasource: str | DatasourceIdentifier + viz_type: SupersetVizType + groupby: list[str] = pydantic.Field(default_factory=list) + adhoc_filters: list[SupersetAdhocFilter] = pydantic.Field(default_factory=list) + row_limit: int = 10000 + sort_by_metric: bool = True + color_scheme: ColorScheme = 'supersetColors' + show_legend: bool = True + legendType: str = 'scroll' + legendOrientation: str = 'top' + extra_form_data: dict[str, Any] = pydantic.Field(default_factory=dict) + slice_id: SupersetId | None = None + dashboards: list[SupersetId] = pydantic.Field(default_factory=list) + + +class PieChartParams(ChartParams): + viz_type: Literal[SupersetVizType.PIE] = SupersetVizType.PIE + metric: SupersetAdhocMetric + show_labels_threshold: int = 5 + show_labels: bool = True + labels_outside: bool = True + outerRadius: int = 70 + innerRadius: int = 30 + label_type: str = 'key' + number_format: str = 'SMART_NUMBER' + date_format: str = 'smart_date' + + +class TimeSeriesChartParams(ChartParams): + metrics: list[SupersetAdhocMetric] + x_axis: ColumnName + x_axis_sort_asc: bool = True + x_axis_sort_series: str = 'name' + x_axis_sort_series_ascending: bool = True + x_axis_time_format: str = 'smart_date' + x_axis_title_margin: int = 15 + y_axis_format: str = 'SMART_NUMBER' + y_axis_bounds: tuple[float | None, float | None] = (None, None) + y_axis_title_margin: int = 15 + y_axis_title_position: str = 'Left' + truncateXAxis: bool = True + truncate_metric: bool = True + show_empty_columns: bool = True + comparison_type: str = 'values' + rich_tooltip: bool = True + showTooltipTotal: bool = True + showTooltipPercentage: bool = True + tooltipTimeFormat: str = 'smart_date' + sort_series_type: str = 'sum' + orientation: str = 'vertical' + only_total: bool = True + order_desc: bool = True + time_grain_sqla: TimeGrain | None = None + annotation_layers: AnnotationLayers = pydantic.Field(default_factory=list) + + # + # forecastEnabled: bool = False + # forecastPeriods: int = 10 + # forecastInterval: float = 0.8 + + +class TimeSeriesBarParams(TimeSeriesChartParams): + viz_type: Literal[SupersetVizType.TIMESERIES_BAR] = SupersetVizType.TIMESERIES_BAR + + +class TimeSeriesLineParams(TimeSeriesChartParams): + viz_type: Literal[SupersetVizType.TIMESERIES_LINE] = SupersetVizType.TIMESERIES_LINE + opacity: float = 0.2 + markerSize: int = 6 + seriesType: str = 'line' diff --git a/mitm_tooling/transformation/superset/definitions/constants.py b/mitm_tooling/transformation/superset/definitions/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..f55f4e9652b716500f7ddedad45055518c951128 --- /dev/null +++ b/mitm_tooling/transformation/superset/definitions/constants.py @@ -0,0 +1,183 @@ +from abc import ABC, abstractmethod +from datetime import datetime, tzinfo, UTC +from enum import StrEnum, IntEnum +from typing import Any, Annotated, Literal, Self, Union + +import pydantic +from uuid import UUID + +from pydantic import Field, AnyUrl + +from mitm_tooling.data_types import MITMDataType +from mitm_tooling.representation.intermediate_representation import ColumnName + +StrUUID = Annotated[ + UUID, + pydantic.BeforeValidator(lambda x: UUID(x) if isinstance(x, str) else x), + pydantic.PlainSerializer(lambda x: str(x)), + pydantic.Field( + description="Better annotation for UUID. Parses from string format, serializes to string format." + ) +] + +StrUrl = Annotated[ + AnyUrl, + pydantic.BeforeValidator(lambda x: AnyUrl(x) if isinstance(x, str) else x), + pydantic.PlainSerializer(lambda x: str(x)), + pydantic.Field( + description="Better annotation for AnyUrl. Parses from string format, serializes to string format." + ) +] + +StrDatetime = Annotated[ + datetime, + pydantic.BeforeValidator(lambda x: datetime.fromisoformat(x) if isinstance(x, str) else x), + pydantic.PlainSerializer(lambda x: str(x)), + pydantic.Field( + description="Better annotation for datetime. Parses from string format, serializes to string format." + ) +] + +SupersetId = int + +FilterValue = Union[bool, StrDatetime, float, int, str] +FilterValues = Union[FilterValue, list[FilterValue], tuple[FilterValue]] + +ColorScheme = Literal['blueToGreen', 'supersetColors'] + + +class GenericDataType(IntEnum): + NUMERIC = 0 + STRING = 1 + TEMPORAL = 2 + BOOLEAN = 3 + + @classmethod + def from_mitm_dt(cls, dt: MITMDataType) -> Self: + if dt in {MITMDataType.Numeric, MITMDataType.Integer}: + return cls.NUMERIC + elif dt in {MITMDataType.Datetime}: + return cls.TEMPORAL + elif dt in {MITMDataType.Boolean}: + return cls.BOOLEAN + else: + return cls.STRING + + +class AnnotationType(StrEnum): + Event = 'EVENT' + Formula = 'FORMULA' + Interval = 'INTERVAL' + Timeseries = 'TIME_SERIES' + + +class AnnotationSource(StrEnum): + Line = 'line' + Native = 'NATIVE' + Table = 'table' + Undefined = '' + + +class SupersetVizType(StrEnum): + PIE = 'pie' + TIMESERIES_BAR = 'echarts_timeseries_bar' + TIMESERIES_LINE = 'echarts_timeseries_line' + + +class ExpressionType(StrEnum): + SIMPLE = 'SIMPLE' + SQL = 'SQL' + + +class SupersetAggregate(StrEnum): + COUNT = 'COUNT' + SUM = 'SUM' + MIN = 'MIN' + MAX = 'MAX' + AVG = 'AVG' + + +class FilterOperator(StrEnum): + EQUALS = "==" + NOT_EQUALS = "!=" + GREATER_THAN = ">" + LESS_THAN = "<" + GREATER_THAN_OR_EQUALS = ">=" + LESS_THAN_OR_EQUALS = "<=" + LIKE = "LIKE" + NOT_LIKE = "NOT LIKE" + ILIKE = "ILIKE" + IS_NULL = "IS NULL" + IS_NOT_NULL = "IS NOT NULL" + IN = "IN" + NOT_IN = "NOT IN" + IS_TRUE = "IS TRUE" + IS_FALSE = "IS FALSE" + TEMPORAL_RANGE = "TEMPORAL_RANGE" + + +class FilterStringOperators(StrEnum): + EQUALS = "EQUALS" + NOT_EQUALS = "NOT_EQUALS" + LESS_THAN = "LESS_THAN" + GREATER_THAN = "GREATER_THAN" + LESS_THAN_OR_EQUAL = "LESS_THAN_OR_EQUAL" + GREATER_THAN_OR_EQUAL = "GREATER_THAN_OR_EQUAL" + IN = "IN" + NOT_IN = "NOT_IN" + ILIKE = "ILIKE" + LIKE = "LIKE" + IS_NOT_NULL = "IS_NOT_NULL" + IS_NULL = "IS_NULL" + LATEST_PARTITION = "LATEST_PARTITION" + IS_TRUE = "IS_TRUE" + IS_FALSE = "IS_FALSE" + + @classmethod + def from_operator(cls, operator: FilterOperator) -> Self: + return getattr(cls, operator.name) + + +class TimeGrain(StrEnum): + SECOND = "PT1S" + FIVE_SECONDS = "PT5S" + THIRTY_SECONDS = "PT30S" + MINUTE = "PT1M" + FIVE_MINUTES = "PT5M" + TEN_MINUTES = "PT10M" + FIFTEEN_MINUTES = "PT15M" + THIRTY_MINUTES = "PT30M" + HALF_HOUR = "PT0.5H" + HOUR = "PT1H" + SIX_HOURS = "PT6H" + DAY = "P1D" + WEEK = "P1W" + WEEK_STARTING_SUNDAY = "1969-12-28T00:00:00Z/P1W" + WEEK_STARTING_MONDAY = "1969-12-29T00:00:00Z/P1W" + WEEK_ENDING_SATURDAY = "P1W/1970-01-03T00:00:00Z" + WEEK_ENDING_SUNDAY = "P1W/1970-01-04T00:00:00Z" + MONTH = "P1M" + QUARTER = "P3M" + QUARTER_YEAR = "P0.25Y" + YEAR = "P1Y" + + +class ChartDataResultFormat(StrEnum): + CSV = "csv" + JSON = "json" + XLSX = "xlsx" + + @classmethod + def table_like(cls) -> set[Self]: + return {cls.CSV, cls.XLSX} + + +class ChartDataResultType(StrEnum): + COLUMNS = "columns" + FULL = "full" + QUERY = "query" + RESULTS = "results" + SAMPLES = "samples" + TIMEGRAINS = "timegrains" + POST_PROCESSED = "post_processed" + DRILL_DETAIL = "drill_detail" diff --git a/mitm_tooling/transformation/superset/definitions/core.py b/mitm_tooling/transformation/superset/definitions/core.py new file mode 100644 index 0000000000000000000000000000000000000000..dc0fab3a6f6a19cc44817145cb301d1c9306ac91 --- /dev/null +++ b/mitm_tooling/transformation/superset/definitions/core.py @@ -0,0 +1,193 @@ +from abc import ABC, abstractmethod +from datetime import datetime, tzinfo, UTC +from enum import StrEnum, IntEnum +from typing import Any, Annotated, Literal, Self, Union + +import pydantic +from uuid import UUID + +from pydantic import Field, AnyUrl + +from mitm_tooling.data_types import MITMDataType +from .constants import * + + +class SupersetPostProcessing(pydantic.BaseModel, ABC): + @pydantic.computed_field() + @property + def operation(self) -> str: + raise NotImplementedError() + + +class DatasourceIdentifier(pydantic.BaseModel): + id: SupersetId + type: Literal['table', 'annotation'] = 'table' + + dataset_uuid: StrUUID = pydantic.Field(exclude=True) + + @property + def datasource_uid(self): + return f'{self.id}__{self.type}' + + +class SupersetColumn(pydantic.BaseModel): + column_name: str + verbose_name: str | None = None + id: SupersetId | None = None + is_dttm: bool = False + is_active: bool = True + type: str = str(MITMDataType.Text.sa_sql_type) + type_generic: GenericDataType = GenericDataType.STRING + advanced_data_type: str | None = None + groupby: bool = True + filterable: bool = True + expression: str | None = None + description: str | None = None + python_date_format: str = None + extra: dict[str, Any] = pydantic.Field(default_factory=dict) + + +class IdentifiedSupersetColumn(SupersetColumn): + id: SupersetId + + +class SupersetMetric(pydantic.BaseModel): + metric_name: str + verbose_name: str + expression: str + metric_type: str | None = None + description: str | None = None + d3format: str | None = None + currency: str | None = None + extra: dict[str, Any] = Field(default_factory=dict) + warning_text: str | None = None + + +class SupersetAdhocFilter(pydantic.BaseModel): + clause: str = 'WHERE' + subject: ColumnName + operator: FilterOperator + operatorId: FilterStringOperators | None = None + comparator: str | None = 'No filter' + expressionType: ExpressionType = ExpressionType.SIMPLE + isExtra: bool = False + isNew: bool = False + sqlExpression: str | None = None + + +class SupersetAdhocMetric(pydantic.BaseModel): + label: str + column: SupersetColumn + expressionType: ExpressionType = ExpressionType.SIMPLE + aggregate: SupersetAggregate = SupersetAggregate.COUNT + sqlExpression: str | None = None + datasourceWarning: bool = False + hasCustomLabel: bool = False + optionName: str | None = None + + +class SupersetAdhocColumn(pydantic.BaseModel): + label: str + sqlExpression: str + columnType: str = 'BASE_AXIS' + expressionType: str = 'SQL' + timeGrain: TimeGrain | None = None + + +OrderBy = tuple[SupersetAdhocMetric | str, bool] + + +class AnnotationOverrides(pydantic.BaseModel): + time_range: str | None = None + + +class AnnotationLayer(pydantic.BaseModel): + name: str + value: int + annotationType: AnnotationType + sourceType: AnnotationSource = AnnotationSource.Table + opacity: str = '' + overrides: AnnotationOverrides + hideLine: bool = False + show: bool = False + showLabel: bool = False + showMarkers: bool = False + style: str = 'solid' + width: int = 1 + + +class TimeAnnotationLayer(AnnotationLayer): + annotationType: Literal[AnnotationType.Event, AnnotationType.Interval] = AnnotationType.Event + titleColumn: str + timeColumn: str = 'time' + intervalEndColumn: str = '' + color: str | None = None + descriptionColumns: list[str] = pydantic.Field(default_factory=list) + + +class QueryObjectFilterClause(pydantic.BaseModel): + col: ColumnName + op: FilterOperator + val: FilterValues | None = None + grain: str | None = None + isExtra: bool | None = None + + @classmethod + def from_adhoc_filter(cls, adhoc_filter: SupersetAdhocFilter) -> Self: + return cls(col=adhoc_filter.subject, op=adhoc_filter.operator, val=adhoc_filter.comparator) + + +class QueryObjectExtras(pydantic.BaseModel): + having: str = '' + where: str = '' + time_grain_sqla: TimeGrain | None = None + + +AnnotationLayers = Annotated[list[AnnotationLayer] | None, pydantic.SerializeAsAny] +PostProcessingList = Annotated[list[SupersetPostProcessing | dict[str, Any]], pydantic.SerializeAsAny] + + +class QueryObject(pydantic.BaseModel): + annotation_layers: AnnotationLayers = pydantic.Field(default_factory=list) + applied_time_extras: dict[str, str] = pydantic.Field(default_factory=dict) + columns: list[ColumnName | SupersetAdhocColumn] = pydantic.Field(default_factory=list) + datasource: DatasourceIdentifier | None = None + extras: QueryObjectExtras = pydantic.Field(default_factory=QueryObjectExtras) + filters: list[QueryObjectFilterClause] = pydantic.Field(default_factory=list) + metrics: list[SupersetAdhocMetric] | None = None + granularity: str | None = None + from_dttm: StrDatetime | None = None + to_dttm: StrDatetime | None = None + inner_from_dttm: StrDatetime | None = None + inner_to_dttm: StrDatetime | None = None + is_rowcount: bool = False + is_timeseries: bool | None = None + order_desc: bool = True + orderby: list[OrderBy] = pydantic.Field(default_factory=list) + post_processing: PostProcessingList = pydantic.Field(default_factory=list) + result_type: ChartDataResultType | None = None + row_limit: int | None = None + row_offset: int | None = None + series_columns: list[ColumnName] = pydantic.Field(default_factory=list) + series_limit: int = 0 + series_limit_metric: SupersetAdhocMetric | None = None + time_offsets: list[str] = pydantic.Field(default_factory=list) + time_shift: str | None = None + time_range: str | None = None + url_params: dict[str, str] | None = pydantic.Field(default_factory=dict) + + +class FormData(pydantic.BaseModel): + pass + + +class QueryContext(pydantic.BaseModel): + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + datasource: DatasourceIdentifier + queries: list[QueryObject] = pydantic.Field(default_factory=list) + form_data: Annotated[FormData | dict[str, Any] | None, pydantic.SerializeAsAny, pydantic.Field(default=None)] + result_type: ChartDataResultType = ChartDataResultType.FULL + result_format: ChartDataResultFormat = ChartDataResultFormat.JSON + force: bool = False + custom_cache_timeout: int | None = None diff --git a/mitm_tooling/transformation/superset/definitions/high_level.py b/mitm_tooling/transformation/superset/definitions/high_level.py new file mode 100644 index 0000000000000000000000000000000000000000..1f499b0b76d37436afe84256955511642fbf542e --- /dev/null +++ b/mitm_tooling/transformation/superset/definitions/high_level.py @@ -0,0 +1,147 @@ +from abc import ABC, abstractmethod +from collections import defaultdict +from datetime import datetime, tzinfo, UTC +from enum import StrEnum, IntEnum +from typing import Any, Annotated, Literal, Self, Union + +import pydantic +from uuid import UUID + +from pydantic import Field, AnyUrl + +from mitm_tooling.data_types import MITMDataType +from .constants import * +from .core import * +from .charts import * +from .post_processing import * + + +class MetadataType(StrEnum): + Database = 'Database' + SqlaTable = 'SqlaTable' + Slice = 'Slice' + Chart = 'Chart' + + +class SupersetDefFile(pydantic.BaseModel, ABC): + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + @property + @abstractmethod + def filename(self) -> str: + pass + + +class SupersetMetadataDef(SupersetDefFile): + version: str = '1.0.0' + type: MetadataType = MetadataType.SqlaTable + timestamp: StrDatetime = pydantic.Field(default_factory=lambda: datetime.now(UTC)) + + @property + def filename(self) -> str: + return 'metadata' + + +class SupersetDatabaseDef(SupersetDefFile): + database_name: str + sqlalchemy_uri: StrUrl + uuid: StrUUID + cache_timeout: str | None = None + expose_in_sqllab: bool = True + allow_run_async: bool = False + allow_ctas: bool = False + allow_cvas: bool = False + allow_dml: bool = False + allow_file_upload: bool = False + extra: dict[str, Any] = pydantic.Field(default_factory=lambda: { + 'allows_virtual_table_explore': True + }) + impersonate_user: bool = False + version: str = '1.0.0' + ssh_tunnel: None = None + + @property + def filename(self): + return self.database_name + + +class SupersetDatasetDef(SupersetDefFile): + model_config = pydantic.ConfigDict(populate_by_name=True) + + table_name: str + schema_name: str = pydantic.Field(alias='schema') + uuid: StrUUID + database_uuid: StrUUID + main_dttm_col: str | None = None + description: str | None = None + default_endpoint: str | None = None + offset: int = 0 + cache_timeout: str | None = None + catalog: str | None = None + sql: str | None = None + params: Any = None + template_params: Any = None + filter_select_enabled: bool = True + fetch_values_predicate: str | None = None + extra: dict[str, Any] = pydantic.Field(default_factory=dict) + normalize_columns: bool = False + always_filter_main_dttm: bool = False + metrics: list[SupersetMetric] = pydantic.Field(default_factory=list) + columns: list[SupersetColumn] = pydantic.Field(default_factory=list) + version: str = '1.0.0' + + @property + def filename(self): + return self.table_name + + +class SupersetChartDef(SupersetDefFile): + uuid: StrUUID + slice_name: str + viz_type: SupersetVizType + dataset_uuid: StrUUID + description: str | None = None + certified_by: str | None = None + certification_details: str | None = None + params: Annotated[ChartParams | dict[str, Any], pydantic.SerializeAsAny, pydantic.Field(default_factory=dict)] + query_context: Annotated[pydantic.Json | QueryContext | None, pydantic.PlainSerializer( + lambda x: x.model_dump_json(by_alias=True, exclude_none=True) if isinstance(x, pydantic.BaseModel) else x, + return_type=pydantic.Json), pydantic.Field(default=None)] + cache_timeout: int | None = None + version: str = '1.0.0' + is_managed_externally: bool = False + external_url: StrUrl | None = None + + @property + def filename(self) -> str: + return f'{self.slice_name}_{self.dataset_uuid}' + + +class SupersetDashboardDef(SupersetDefFile): + + @property + def filename(self) -> str: + return 'dashboard' + + +class SupersetAssetsDef(pydantic.BaseModel): + databases: list[SupersetDatabaseDef] | None = None + datasets: list[SupersetDatasetDef] | None = None + charts: list[SupersetChartDef] | None = None + dashboards: list[SupersetDashboardDef] | None = None + metadata: SupersetMetadataDef = pydantic.Field(default_factory=SupersetMetadataDef) + + def to_folder_structure(self) -> dict[str, Any]: + folder = {'.': self.metadata} + dbs = {} + if self.databases: + dbs |= {db.uuid: db.database_name for db in self.databases} + folder['databases'] = [db for db in self.databases] + if self.datasets: + db_dss = defaultdict(list) + for ds in self.datasets: + db_dss[dbs[ds.database_uuid]].append(ds) + folder['datasets'] = db_dss + if self.charts: + folder['charts'] = self.charts + return {'my_import': folder} diff --git a/mitm_tooling/transformation/superset/definitions/post_processing.py b/mitm_tooling/transformation/superset/definitions/post_processing.py new file mode 100644 index 0000000000000000000000000000000000000000..38612f70cd9002a6404eef4cf12adf6dbc482111 --- /dev/null +++ b/mitm_tooling/transformation/superset/definitions/post_processing.py @@ -0,0 +1,43 @@ +import pydantic + +from .core import * + + +class PivotOperator(pydantic.BaseModel): + operator: str = 'mean' + + +class PivotOptions(pydantic.BaseModel): + aggregates: list[dict[ColumnName, PivotOperator]] + columns: list[ColumnName] = pydantic.Field(default_factory=list) + index: list[ColumnName] = pydantic.Field(default_factory=list) + drop_missing_columns: bool = False + + +class Pivot(SupersetPostProcessing): + @property + def operation(self) -> str: + return 'pivot' + + options: PivotOptions + + +class RenameOptions(pydantic.BaseModel): + columns: dict[ColumnName, ColumnName | None] = pydantic.Field(default_factory=dict) + level: int = 0 + inplace: bool | None = True + + +class Rename(SupersetPostProcessing): + @property + def operation(self) -> str: + return 'flatten' + + options: RenameOptions + + +class Flatten(SupersetPostProcessing): + @property + def operation(self) -> str: + return 'flatten' + diff --git a/mitm_tooling/transformation/superset/factories/__init__.py b/mitm_tooling/transformation/superset/factories/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mitm_tooling/transformation/superset/factories/charts.py b/mitm_tooling/transformation/superset/factories/charts.py new file mode 100644 index 0000000000000000000000000000000000000000..63a28c8d69ac28de8da021abf683f2ad12bad422 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/charts.py @@ -0,0 +1,108 @@ +from pydantic import UUID4 + +from mitm_tooling.data_types import MITMDataType +from .core import mk_empty_adhoc_time_filter, mk_adhoc_metric, mk_pivot_post_processing, mk_time_avg_post_processing, \ + mk_adhoc_column +from .query import mk_query_object, mk_query_object_filter_clause, mk_query_context, \ + mk_empty_query_object_time_filter_clause +from .utils import mk_uuid +from ..definitions import SupersetChartDef, PieChartParams, DatasourceIdentifier, ColumnName, SupersetAggregate, \ + SupersetVizType, QueryContext, TimeSeriesBarParams, TimeGrain, QueryObjectFilterClause, SupersetAdhocFilter, \ + TimeSeriesLineParams, QueryObjectExtras + + +def mk_pie_chart(name: str, datasource_identifier: DatasourceIdentifier, col: ColumnName, dt: MITMDataType, + groupby_cols: list[ColumnName], uuid: UUID4 | None = None) -> SupersetChartDef: + metric = mk_adhoc_metric(col, agg=SupersetAggregate.COUNT, dt=dt) + params = PieChartParams(datasource=datasource_identifier, + metric=metric, + groupby=groupby_cols, + adhoc_filters=[mk_empty_adhoc_time_filter()]) + + qo = mk_query_object([col], metrics=[metric], filters=[mk_empty_query_object_time_filter_clause()]) + qc = mk_query_context(datasource=datasource_identifier, queries=[qo], form_data=params) + + return SupersetChartDef(slice_name=name, + viz_type=SupersetVizType.PIE, + dataset_uuid=datasource_identifier.dataset_uuid, + params=params, + query_context=qc, + uuid=uuid or mk_uuid()) + + +def mk_time_series_bar_chart(name: str, + datasource_identifier: DatasourceIdentifier, + y_col: ColumnName, + y_dt: MITMDataType, x_col: ColumnName, + groupby_cols: list[ColumnName], + filters: list[SupersetAdhocFilter] | None = None, + uuid: UUID4 | None = None, + time_grain: TimeGrain | None = None) -> SupersetChartDef: + metric = mk_adhoc_metric(y_col, agg=SupersetAggregate.COUNT, dt=y_dt) + adhoc_filters = [mk_empty_adhoc_time_filter()] + if filters: + adhoc_filters.extend(filters) + params = TimeSeriesBarParams(datasource=datasource_identifier, + metrics=[metric], + groupby=groupby_cols, + adhoc_filters=adhoc_filters, + x_axis=x_col, + time_grain_sqla=time_grain + ) + + pp = mk_pivot_post_processing(x_col, cols=[y_col], aggregations={metric.label: 'mean'}, + renames={metric.label: None}) + adhoc_x = mk_adhoc_column(x_col, timeGrain=time_grain) + cols = list(set([adhoc_x, y_col] + groupby_cols)) + qo = mk_query_object(columns=cols, + metrics=[metric], + filters=[QueryObjectFilterClause.from_adhoc_filter(af) for af in adhoc_filters], + post_processing=pp, + series_columns=[y_col]) + qc = mk_query_context(datasource=datasource_identifier, queries=[qo], form_data=params) + + return SupersetChartDef(slice_name=name, + viz_type=SupersetVizType.TIMESERIES_BAR, + dataset_uuid=datasource_identifier.dataset_uuid, + params=params, + query_context=qc, + uuid=uuid or mk_uuid()) + + +def mk_count_time_series_chart(name: str, + datasource_identifier: DatasourceIdentifier, + groupby_cols: list[ColumnName], + time_col: ColumnName = 'time', + filters: list[SupersetAdhocFilter] | None = None, + uuid: UUID4 | None = None, + time_grain: TimeGrain | None = None): + metric = mk_adhoc_metric(time_col, agg=SupersetAggregate.COUNT, dt=MITMDataType.Datetime) + adhoc_filters = [mk_empty_adhoc_time_filter()] + if filters: + adhoc_filters.extend(filters) + params = TimeSeriesLineParams(datasource=datasource_identifier, + metrics=[metric], + groupby=groupby_cols, + adhoc_filters=adhoc_filters, + x_axis=time_col, + time_grain_sqla=time_grain + ) + + pp = mk_pivot_post_processing(time_col, cols=groupby_cols, aggregations={metric.label: 'mean'}, + renames={metric.label: None}) + adhoc_time_col = mk_adhoc_column(time_col, timeGrain=time_grain) + cols = list(set([adhoc_time_col] + groupby_cols)) + qo = mk_query_object(columns=cols, + metrics=[metric], + filters=[QueryObjectFilterClause.from_adhoc_filter(af) for af in adhoc_filters], + post_processing=pp, + series_columns=groupby_cols, + extras=QueryObjectExtras(time_grain_sqla=time_grain)) + qc = mk_query_context(datasource=datasource_identifier, queries=[qo], form_data=params) + + return SupersetChartDef(slice_name=name, + viz_type=SupersetVizType.TIMESERIES_LINE, + dataset_uuid=datasource_identifier.dataset_uuid, + params=params, + query_context=qc, + uuid=uuid or mk_uuid()) diff --git a/mitm_tooling/transformation/superset/factories/core.py b/mitm_tooling/transformation/superset/factories/core.py new file mode 100644 index 0000000000000000000000000000000000000000..3e08279acd72fdf805653965fad21d7ac2ac4433 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/core.py @@ -0,0 +1,90 @@ +import uuid +from typing import overload + +import pydantic + +from ..definitions import * +import sqlalchemy as sa + + +def mk_pivot_post_processing(index_col: ColumnName, cols: list[ColumnName], aggregations: dict[ColumnName, str], + renames: dict[ColumnName, ColumnName] | None = None) -> list[ + SupersetPostProcessing]: + pp = [Pivot(options=PivotOptions(aggregates=[{c: PivotOperator(operator=m)} for c, m in aggregations.items()], + columns=cols, + index=[index_col]))] + if renames: + pp.append(Rename(options=RenameOptions(columns={c: rn for c, rn in renames.items()}))) + pp.append(Flatten()) + return pp + + +def mk_count_pivot_post_processing(cols: list[ColumnName], agg_cols: list[ColumnName]) -> list[SupersetPostProcessing]: + return mk_pivot_post_processing('time', cols, aggregations={f'AVG({c})': 'mean' for c in agg_cols}) + + +def mk_time_avg_post_processing(cols: list[ColumnName], agg_cols: list[ColumnName]) -> list[SupersetPostProcessing]: + return mk_pivot_post_processing('time', cols, aggregations={f'AVG({c})': 'mean' for c in agg_cols}) + + +def mk_adhoc_metric(col: ColumnName, agg: SupersetAggregate = SupersetAggregate.AVG, + dt: MITMDataType = MITMDataType.Numeric, col_id: int | None = None, + **kwargs) -> SupersetAdhocMetric: + return SupersetAdhocMetric(label=f'{agg}({col})', aggregate=agg, + column=mk_column(col, dt, col_id), **kwargs) + + +def mk_adhoc_metrics(cols: list[ColumnName], agg: SupersetAggregate = SupersetAggregate.AVG, + dt: MITMDataType = MITMDataType.Numeric, **kwargs) -> list[ + SupersetAdhocMetric]: + return [mk_adhoc_metric(c, agg=agg, dt=dt, **kwargs) for c in cols] + + +def mk_metric(col: ColumnName, agg: SupersetAggregate, **kwargs) -> SupersetMetric: + name = f'{agg}({col})' + return SupersetMetric(metric_name=name, verbose_name=name, expression=name, **kwargs) + + +def mk_metrics(cols: list[ColumnName], agg: SupersetAggregate = SupersetAggregate.AVG, **kwargs) -> list[ + SupersetMetric]: + return [mk_metric(c, agg, **kwargs) for c in cols] + + +@overload +def mk_column(col: ColumnName, dt: MITMDataType, dialect: sa.Dialect | None = None, **kwargs) -> SupersetColumn: + pass + + +@overload +def mk_column(col: ColumnName, dt: MITMDataType, col_id: SupersetId, + dialect: sa.Dialect | None = None, **kwargs) -> IdentifiedSupersetColumn: + pass + + +def mk_column(col: ColumnName, dt: MITMDataType, col_id: SupersetId | None = None, + dialect: sa.Dialect | None = None, **kwargs) -> SupersetColumn: + args = dict(column_name=col, + is_dttm=dt is MITMDataType.Datetime, + groupby=dt not in {MITMDataType.Json, + MITMDataType.Numeric}, + type=(dt.sa_sql_type or MITMDataType.Text.sa_sql_type).compile( + dialect=dialect), + type_generic=GenericDataType.from_mitm_dt(dt)) | kwargs + if col_id is not None: + return IdentifiedSupersetColumn(**args, id=col_id) + else: + return SupersetColumn(**args) + + +def mk_adhoc_column(col: ColumnName, **kwargs) -> SupersetAdhocColumn: + return SupersetAdhocColumn(label=col, sqlExpression=col, **kwargs) + + +def mk_adhoc_filter(col: ColumnName, op: FilterOperator, comp: str | None = 'No Filter', + **kwargs) -> SupersetAdhocFilter: + return SupersetAdhocFilter(subject=col, operator=op, operatorId=FilterStringOperators.from_operator(op), + comparator=comp, **kwargs) + + +def mk_empty_adhoc_time_filter() -> SupersetAdhocFilter: + return mk_adhoc_filter('time', FilterOperator.TEMPORAL_RANGE) diff --git a/mitm_tooling/transformation/superset/factories/dataset.py b/mitm_tooling/transformation/superset/factories/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..e9ab09d4fa326eff018cb38630930b8ebc652201 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/dataset.py @@ -0,0 +1,27 @@ +import pydantic +import sqlalchemy as sa +from mitm_tooling.transformation.superset.definitions import SupersetDatasetDef, SupersetMetric +from mitm_tooling.data_types import MITMDataType +from mitm_tooling.extraction.sql.data_models import TableMetaInfo +from .core import mk_column, mk_metric +from .utils import mk_uuid +from ..definitions import SupersetAggregate + + +def mk_dataset_def(tm: TableMetaInfo, database_uuid: pydantic.UUID4, dialect: sa.Dialect | None = None, + uuid: pydantic.UUID4 | None = None) -> SupersetDatasetDef: + cols = [] + metrics = [mk_metric('', SupersetAggregate.COUNT)] + for c in tm.columns: + dt = tm.column_properties[c].mitm_data_type + cols.append( + mk_column(c, dt, dialect=dialect), + ) + if dt in {MITMDataType.Numeric, MITMDataType.Integer}: + metrics.extend(( + mk_metric(c, SupersetAggregate.AVG), + mk_metric(c, SupersetAggregate.SUM), + )) + + return SupersetDatasetDef(table_name=tm.name, schema=tm.schema_name, uuid=uuid or mk_uuid(), + database_uuid=database_uuid, columns=cols, metrics=metrics) diff --git a/mitm_tooling/transformation/superset/factories/datasource.py b/mitm_tooling/transformation/superset/factories/datasource.py new file mode 100644 index 0000000000000000000000000000000000000000..8b2bf5e4a856de85e7b4ce1f104913a01a25a490 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/datasource.py @@ -0,0 +1,9 @@ +import pydantic +from pydantic import AnyUrl, UUID4 + +from .utils import mk_uuid +from ..definitions import SupersetDatabaseDef + + +def mk_datasource(name: str, sqlalchemy_uri: AnyUrl, uuid: UUID4 | None = None) -> SupersetDatabaseDef: + return SupersetDatabaseDef(database_name=name, sqlalchemy_uri=sqlalchemy_uri, uuid=uuid or mk_uuid()) diff --git a/mitm_tooling/transformation/superset/factories/query.py b/mitm_tooling/transformation/superset/factories/query.py new file mode 100644 index 0000000000000000000000000000000000000000..8ec1d00b9a6a6c6382562d629973f645bf54fc70 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/query.py @@ -0,0 +1,33 @@ + +from ..definitions import QueryObject, QueryContext, ColumnName, DatasourceIdentifier, SupersetAdhocMetric, \ + SupersetAdhocFilter, QueryObjectFilterClause, FormData, FilterValues, FilterOperator, SupersetPostProcessing +from .core import mk_adhoc_metric, mk_adhoc_metrics + + +def mk_query_object_filter_clause(col: ColumnName, op: FilterOperator, + val: FilterValues | None = None, **kwargs) -> QueryObjectFilterClause: + return QueryObjectFilterClause(col=col, op=op, val=val, **kwargs) + + +def mk_empty_query_object_time_filter_clause() -> QueryObjectFilterClause: + return mk_query_object_filter_clause('time', FilterOperator.TEMPORAL_RANGE) + + +def mk_query_object(columns: list[ColumnName], + metrics: list[SupersetAdhocMetric], + filters: list[QueryObjectFilterClause], + orderby: list[tuple[SupersetAdhocMetric, bool]] | None = None, + post_processing: list[SupersetPostProcessing] | None = None, + row_limit: int | None = 10_000, + **kwargs) -> QueryObject: + if orderby is None: + orderby = [(metrics[0], 0)] + if post_processing is None: + post_processing = [] + return QueryObject(columns=columns, metrics=metrics, filters=filters, orderby=orderby, post_processing=post_processing, + row_limit=row_limit, **kwargs) + + +def mk_query_context(datasource: DatasourceIdentifier, queries: list[QueryObject], form_data: FormData, + **kwargs) -> QueryContext: + return QueryContext(datasource=datasource, queries=queries, form_data=form_data, **kwargs) diff --git a/mitm_tooling/transformation/superset/factories/utils.py b/mitm_tooling/transformation/superset/factories/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d98c6f0eb4b3a62b0351911a1d4eef85ebf64558 --- /dev/null +++ b/mitm_tooling/transformation/superset/factories/utils.py @@ -0,0 +1,7 @@ +import uuid + +import pydantic + + +def mk_uuid() -> pydantic.UUID4: + return uuid.uuid4() diff --git a/mitm_tooling/transformation/superset/superset_representation.py b/mitm_tooling/transformation/superset/superset_representation.py index 44d2368512f5c665fda9e22284f19651d2d21755..3f60b2bdd794b9ce075489c5d0a0905c35c81ea7 100644 --- a/mitm_tooling/transformation/superset/superset_representation.py +++ b/mitm_tooling/transformation/superset/superset_representation.py @@ -1,6 +1,8 @@ import os.path import uuid import zipfile +from typing import TypedDict, Unpack + import sqlalchemy as sa import yaml from pydantic import AnyUrl @@ -14,15 +16,21 @@ from mitm_tooling.representation.sql_representation import MITMData, mk_sqlite, from mitm_tooling.data_types import MITMDataType -from .dataset_definition import SupersetDatasetDef, SupersetColumnDef, SupersetDatabaseDef, SupersetDef, \ - SupersetDefFile, SupersetMetadataDef, SupersetMetricDef +from .definitions import SupersetDatasetDef, SupersetColumn, SupersetDatabaseDef, SupersetAssetsDef, \ + SupersetDefFile, SupersetMetadataDef, SupersetMetric, GenericDataType +from .factories.dataset import mk_dataset_def +from .factories.datasource import mk_datasource + + +class KWArguments(TypedDict, total=False): + sqlalchemy_uri: AnyUrl def tentative_superset_mount_url(db_name: str) -> AnyUrl: return AnyUrl(f'sqlite:////mounted-files/{db_name}.sqlite?check_same_thread=false') -def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetDef): +def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetAssetsDef): folder_structure = superset_def.to_folder_structure() with use_bytes_io(target, expected_file_ext='.zip', mode='wb', create_file_if_necessary=True) as f: with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zf: @@ -31,7 +39,7 @@ def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetDef): fn = f'{arg.filename}.yaml' if prefix: fn = os.path.join(prefix, fn) - dump = arg.model_dump(by_alias=True, mode='python') + dump = arg.model_dump(by_alias=True, mode='python', exclude_none=True) s = yaml.dump(dump, default_flow_style=False) zf.writestr(fn, s) @@ -56,11 +64,11 @@ def write_superset_def_as_zip(target: ByteSink, superset_def: SupersetDef): mk_node(folder_structure) -def write_superset_def(output_path: FilePath, superset_def: SupersetDef): +def write_superset_def(output_path: FilePath, superset_def: SupersetAssetsDef): write_superset_def_as_zip(output_path, superset_def) -def infer_superset_dataset_def(sqlite_file_path: FilePath) -> SupersetDef: +def infer_superset_dataset_def(sqlite_file_path: FilePath, **kwargs: Unpack[KWArguments]) -> SupersetAssetsDef: engine = create_sa_engine(AnyUrl(f'sqlite:///{str(sqlite_file_path)}')) meta, _ = connect_and_reflect(engine) db_meta = DBMetaInfo.from_sa_meta(meta, default_schema=SQL_REPRESENTATION_DEFAULT_SCHEMA) @@ -68,51 +76,25 @@ def infer_superset_dataset_def(sqlite_file_path: FilePath) -> SupersetDef: database_uuid = uuid.uuid4() datasets = [] for schema_name, schema_tables in db_meta.db_structure.items(): - for table_name, table in schema_tables.items(): - cols = [] - metrics = [SupersetMetricDef(metric_name='COUNT(*)', verbose_name='Count', expression='COUNT(*)')] - for c in table.columns: - dt = table.column_properties[c].mitm_data_type - cols.append( - SupersetColumnDef(column_name=c, - is_dttm=dt is MITMDataType.Datetime, - groupby=dt not in {MITMDataType.Json, - MITMDataType.Numeric}, - type=(dt.sa_sql_type or MITMDataType.Text.sa_sql_type).compile( - dialect=engine.dialect) - )) - if dt in {MITMDataType.Numeric, MITMDataType.Integer}: - metrics.extend(( - SupersetMetricDef(metric_name=f'AVG({c})', verbose_name=f'AVG({c})', expression=f'AVG({c})'), - SupersetMetricDef(metric_name=f'SUM({c})', verbose_name=f'SUM({c})', expression=f'SUM({c})') - )) - - datasets.append( - SupersetDatasetDef(table_name=table_name, schema_name=schema_name, uuid=uuid.uuid4(), - database_uuid=database_uuid, columns=cols, metrics=metrics)) + for table_name, tm in schema_tables.items(): + datasets.append(mk_dataset_def(tm, database_uuid, dialect=engine.dialect)) db_name = os.path.splitext(os.path.basename(sqlite_file_path))[0] - return SupersetDef( - database=SupersetDatabaseDef(database_name=db_name, - sqlalchemy_uri=tentative_superset_mount_url(db_name), - uuid=database_uuid), + sqlalchemy_uri = kwargs.get('sqlalchemy_uri', tentative_superset_mount_url(db_name)) + return SupersetAssetsDef( + databases=[mk_datasource(name=db_name, + sqlalchemy_uri=sqlalchemy_uri, + uuid=database_uuid)], datasets=datasets) -def mk_inferred_superset_dataset_def(output_path: FilePath, sqlite_file_path: FilePath): - dataset_def = infer_superset_dataset_def(sqlite_file_path) +def mk_inferred_superset_dataset_def(output_path: FilePath, sqlite_file_path: FilePath, **kwargs: Unpack[KWArguments]): + dataset_def = infer_superset_dataset_def(sqlite_file_path, **kwargs) write_superset_def(output_path, dataset_def) -def mk_inferred_superset_defs(output_path_base: FilePath, sqlite_file_path: FilePath): - dataset_def = infer_superset_dataset_def(sqlite_file_path) - a = dataset_def.model_copy(update={'metadata': SupersetMetadataDef(type='Database')}) - b = dataset_def.model_copy(update={'metadata': SupersetMetadataDef(type='SqlaTable')}) - write_superset_def(output_path_base + '_db.zip', a) - write_superset_def(output_path_base + '_ds.zip', b) - - def mk_superset_dataset_def(mitm_data: MITMData, sqlite_file_path: str | None = ':memory:', - definition_file_path: str | None = 'superset_definition.zip'): + definition_file_path: str | None = 'superset_definition.zip', + **kwargs: Unpack[KWArguments]): engine, sql_rep_schema = mk_sqlite(mitm_data, file_path=sqlite_file_path) - mk_inferred_superset_dataset_def(definition_file_path, sqlite_file_path) + mk_inferred_superset_dataset_def(definition_file_path, sqlite_file_path, **kwargs) diff --git a/test/something.py b/test/something.py index 41ca5d18a652bdeb9aafb1c54c0fe2e85be73521..b5dd7e3fed0818513cc46fc4718ece56e81c9439 100644 --- a/test/something.py +++ b/test/something.py @@ -46,13 +46,26 @@ class MyTestCase(unittest.TestCase): from mitm_tooling.io import importing from mitm_tooling.definition import MITM syn = importing.read_zip('synthetic.maed', MITM.MAED) - os.remove('synthetic.sqlite') + if os.path.exists('synthetic.sqlite'): + os.remove('synthetic.sqlite') mk_sqlite(syn, 'synthetic.sqlite') + def test_with_synthetic_variation(self): + from mitm_tooling.representation import mk_sqlite + from mitm_tooling.io import importing + from mitm_tooling.definition import MITM + syn = importing.read_zip('synthetic-variation.maed', MITM.MAED) + if os.path.exists('synthetic-variation.sqlite'): + os.remove('synthetic-variation.sqlite') + mk_sqlite(syn, 'synthetic-variation.sqlite') + def test_superset(self): - from mitm_tooling.transformation.superset import mk_inferred_superset_dataset_def, mk_inferred_superset_defs + from mitm_tooling.transformation.superset import mk_inferred_superset_dataset_def mk_inferred_superset_dataset_def('superset_import', 'synthetic.sqlite') - mk_inferred_superset_defs('superset_import', 'synthetic.sqlite') + + def test_superset_variation(self): + from mitm_tooling.transformation.superset import mk_inferred_superset_dataset_def + mk_inferred_superset_dataset_def('superset_import_variation', 'synthetic-variation.sqlite') if __name__ == '__main__':