Skip to content
Snippets Groups Projects
Commit a64af7e5 authored by Leah Tacke genannt Unterberg's avatar Leah Tacke genannt Unterberg
Browse files

starting to add mitm db

parent f18ec5be
Branches
No related tags found
No related merge requests found
Showing
with 2916 additions and 74 deletions
......@@ -22,9 +22,6 @@
# unique random secure passwords and SECRET_KEY.
# -----------------------------------------------------------------------
x-superset-user: &superset-user root
x-superset-depends-on: &superset-depends-on
- db
- redis
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ./docker:/app/docker
......@@ -193,7 +190,6 @@ services:
required: true
- path: docker/.env-local # optional override
required: false
depends_on: *superset-depends-on
volumes: *superset-volumes
superset-worker:
......@@ -274,6 +270,7 @@ services:
healthcheck:
test: ["CMD-SHELL", "celery inspect ping -A superset.tasks.celery_app:app -d celery@$$HOSTNAME"]
#superset-mitm-service:
# build:
# context: "C:/Users/leah/PycharmProjects/superset-mitm-service"
......
......@@ -74,3 +74,10 @@ SUPERSET_LOG_LEVEL=info
MITM_API_HOST=superset-mitm-service
MITM_API_PORT=8180
GLOBAL_ASYNC_QUERIES_JWT_SECRET=c7ced009fa8f8fe00319cdfb137ed97ee7373243daff44f18b50a77d57cd1fb173b2195d4c683a1d4bef15383a423fb274250e5ed2b1b6ed40114952c2fff79a41e92e91e86739894c7c1fe40ad64fcba07e6da23fa76c2b36fa2bcb49db192b061f9bd99627cdf8dfb9b4ffcde33005eade4431cd662eb0265112132e0c478545b62860fa6c1a5a0e3c3d55aa4c121c648becbea68d2ea353fa2165ad543cf3ddc5fa783ec1b53eb738f4ad1cc76fffe526cdfdff070ee409eb8186334ecefffaf385da1f780e6a347169fb0252f606e9f3b9f9b759a5a02de2ecadec531149981898d12dbd8f661bba828a1cf1d4d713848a42ee7b811d0f44870ef7e53fd3
MITM_DATABASE_DIALECT=
MITM_DATABASE_USER=
MITM_DATABASE_PASSWORD=
MITM_DATABASE_HOST=
MITM_DATABASE_PORT=
MITM_DATABASE_DB=
......@@ -100,9 +100,6 @@ dependencies = [
"wtforms>=2.3.3, <4",
"wtforms-json",
"xlsxwriter>=3.0.7, <3.1",
# NEW MITM RELATED
#"requests>=2.0.0"
#"mitm-tooling>=0.3.1"
]
[project.optional-dependencies]
......@@ -207,6 +204,10 @@ development = [
"sqloxide",
"statsd",
]
# NEW MITM RELATED
#"requests>=2.0.0"
#"mitm-tooling>=0.3.1"
mitm = ["pydantic>=2.0"]
[project.urls]
homepage = "https://superset.apache.org/"
......
......@@ -49,6 +49,7 @@ cffi==1.17.1
# via
# cryptography
# pynacl
# trio
charset-normalizer==3.4.0
# via requests
click==8.1.7
......@@ -72,6 +73,7 @@ click-repl==0.3.0
colorama==0.4.6
# via
# apache-superset (pyproject.toml)
# click
# flask-appbuilder
cron-descriptor==1.4.5
# via apache-superset (pyproject.toml)
......@@ -148,8 +150,7 @@ greenlet==3.0.3
# via
# apache-superset (pyproject.toml)
# shillelagh
gunicorn==23.0.0
# via apache-superset (pyproject.toml)
# sqlalchemy
h11==0.14.0
# via wsproto
hashids==1.3.1
......@@ -237,7 +238,6 @@ packaging==24.2
# apache-superset (pyproject.toml)
# apispec
# deprecation
# gunicorn
# limits
# marshmallow
# marshmallow-sqlalchemy
......@@ -396,6 +396,8 @@ vine==5.1.0
# amqp
# celery
# kombu
waitress==3.0.2
# via apache-superset (pyproject.toml)
wcwidth==0.2.13
# via prompt-toolkit
websocket-client==1.8.0
......
......@@ -16,4 +16,5 @@
# specific language governing permissions and limitations
# under the License.
#
-e .[development,bigquery,cors,druid,gevent,gsheets,mysql,postgres,presto,prophet,trino,thumbnails]
-e .[development,bigquery,cors,druid,gevent,gsheets,mysql,postgres,presto,prophet,trino,thumbnails,mitm]
datamodel-code-generator>=0.28.0
......@@ -10,6 +10,8 @@ amqp==5.3.1
# via
# -c requirements/base.txt
# kombu
annotated-types==0.7.0
# via pydantic
apispec==6.3.0
# via
# -c requirements/base.txt
......@@ -18,6 +20,8 @@ apsw==3.46.0.0
# via
# -c requirements/base.txt
# shillelagh
argcomplete==3.5.3
# via datamodel-code-generator
attrs==24.2.0
# via
# -c requirements/base.txt
......@@ -42,6 +46,8 @@ billiard==4.2.1
# via
# -c requirements/base.txt
# celery
black==25.1.0
# via datamodel-code-generator
blinker==1.9.0
# via
# -c requirements/base.txt
......@@ -80,7 +86,9 @@ cffi==1.17.1
# via
# -c requirements/base.txt
# cryptography
# gevent
# pynacl
# trio
cfgv==3.4.0
# via pre-commit
charset-normalizer==3.4.0
......@@ -91,6 +99,7 @@ click==8.1.7
# via
# -c requirements/base.txt
# apache-superset
# black
# celery
# click-didyoumean
# click-option-group
......@@ -120,7 +129,10 @@ colorama==0.4.6
# via
# -c requirements/base.txt
# apache-superset
# click
# flask-appbuilder
# pytest
# tqdm
contourpy==1.0.7
# via matplotlib
coverage==7.6.8
......@@ -141,6 +153,8 @@ cryptography==43.0.3
# pyopenssl
cycler==0.12.1
# via matplotlib
datamodel-code-generator==0.28.1
# via -r requirements/development.in
db-dtypes==1.3.1
# via pandas-gbq
defusedxml==0.7.1
......@@ -251,6 +265,8 @@ freezegun==1.5.1
# via apache-superset
future==1.0.0
# via pyhive
genson==1.3.0
# via datamodel-code-generator
geographiclib==2.0
# via
# -c requirements/base.txt
......@@ -306,6 +322,7 @@ greenlet==3.0.3
# apache-superset
# gevent
# shillelagh
# sqlalchemy
grpcio==1.68.0
# via
# apache-superset
......@@ -313,10 +330,6 @@ grpcio==1.68.0
# grpcio-status
grpcio-status==1.60.1
# via google-api-core
gunicorn==23.0.0
# via
# -c requirements/base.txt
# apache-superset
h11==0.14.0
# via
# -c requirements/base.txt
......@@ -351,12 +364,16 @@ importlib-resources==6.4.5
# -c requirements/base.txt
# limits
# prophet
inflect==5.6.2
# via datamodel-code-generator
iniconfig==2.0.0
# via pytest
isodate==0.7.2
# via
# -c requirements/base.txt
# apache-superset
isort==6.0.1
# via datamodel-code-generator
itsdangerous==2.2.0
# via
# -c requirements/base.txt
......@@ -365,6 +382,7 @@ itsdangerous==2.2.0
jinja2==3.1.5
# via
# -c requirements/base.txt
# datamodel-code-generator
# flask
# flask-babel
jsonpath-ng==1.7.0
......@@ -439,6 +457,8 @@ msgspec==0.18.6
# via
# -c requirements/base.txt
# flask-session
mypy-extensions==1.0.0
# via black
mysqlclient==2.2.6
# via apache-superset
nh3==0.2.19
......@@ -487,11 +507,12 @@ packaging==24.2
# -c requirements/base.txt
# apache-superset
# apispec
# black
# datamodel-code-generator
# db-dtypes
# deprecation
# docker
# google-cloud-bigquery
# gunicorn
# limits
# marshmallow
# marshmallow-sqlalchemy
......@@ -522,6 +543,8 @@ parsedatetime==2.6
# apache-superset
pathable==0.4.3
# via jsonschema-spec
pathspec==0.12.1
# via black
pgsanity==0.2.9
# via
# -c requirements/base.txt
......@@ -533,6 +556,7 @@ pillow==10.3.0
platformdirs==3.9.1
# via
# -c requirements/base.txt
# black
# requests-cache
# virtualenv
pluggy==1.5.0
......@@ -595,6 +619,12 @@ pycparser==2.22
# via
# -c requirements/base.txt
# cffi
pydantic==2.10.6
# via
# apache-superset
# datamodel-code-generator
pydantic-core==2.27.2
# via pydantic
pydata-google-auth==1.9.0
# via pandas-gbq
pydruid==0.6.9
......@@ -677,6 +707,8 @@ pytz==2024.2
# flask-babel
# pandas
# trino
pywin32==308
# via docker
pyxlsb==1.0.10
# via
# -c requirements/base.txt
......@@ -686,6 +718,7 @@ pyyaml==6.0.2
# -c requirements/base.txt
# apache-superset
# apispec
# datamodel-code-generator
# jsonschema-spec
# pre-commit
redis==4.6.0
......@@ -800,6 +833,8 @@ tabulate==0.8.10
# via
# -c requirements/base.txt
# apache-superset
tomli==2.2.1
# via datamodel-code-generator
tqdm==4.67.1
# via
# cmdstanpy
......@@ -822,6 +857,8 @@ typing-extensions==4.12.2
# apache-superset
# flask-limiter
# limits
# pydantic
# pydantic-core
# selenium
# shillelagh
tzdata==2024.2
......@@ -830,6 +867,7 @@ tzdata==2024.2
# celery
# kombu
# pandas
# tzlocal
tzlocal==5.2
# via trino
url-normalize==1.4.3
......@@ -851,6 +889,10 @@ vine==5.1.0
# kombu
virtualenv==20.29.2
# via pre-commit
waitress==3.0.2
# via
# -c requirements/base.txt
# apache-superset
wcwidth==0.2.13
# via
# -c requirements/base.txt
......
services:
minio:
container_name: minio
environment:
MINIO_ROOT_USER: miniouser
MINIO_ROOT_PASSWORD: miniopassword
image: minio/minio:latest
ports:
- "9001:9001"
- "9000:9000"
entrypoint: sh
command: '-c ''mkdir -p /minio_data/starrocks && minio server /minio_data --console-address ":9001"'''
healthcheck:
test: ["CMD", "mc", "ready", "local"]
interval: 5s
timeout: 5s
retries: 5
minio_mc:
# This service is short lived, it does this:
# - starts up
# - checks to see if the MinIO service `minio` is ready
# - creates a MinIO Access Key that the StarRocks services will use
# - exits
image: minio/mc:latest
entrypoint:
- sh
- -c
- |
until mc ls minio > /dev/null 2>&1; do
sleep 0.5
done
mc alias set myminio http://minio:9000 miniouser miniopassword
mc admin user svcacct add --access-key AAAAAAAAAAAAAAAAAAAA \
--secret-key BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB \
myminio \
miniouser
depends_on:
minio:
condition: service_healthy
starrocks-fe:
image: starrocks/fe-ubuntu:3.3-latest
hostname: starrocks-fe
container_name: starrocks-fe
user: root
command:
- /bin/bash
- -c
- |
echo "# enable shared data, set storage type, set endpoint" >> /opt/starrocks/fe/conf/fe.conf
echo "run_mode = shared_data" >> /opt/starrocks/fe/conf/fe.conf
echo "cloud_native_storage_type = S3" >> /opt/starrocks/fe/conf/fe.conf
echo "aws_s3_endpoint = minio:9000" >> /opt/starrocks/fe/conf/fe.conf
echo "# set the path in MinIO" >> /opt/starrocks/fe/conf/fe.conf
echo "aws_s3_path = starrocks" >> /opt/starrocks/fe/conf/fe.conf
echo "# credentials for MinIO object read/write" >> /opt/starrocks/fe/conf/fe.conf
echo "aws_s3_access_key = AAAAAAAAAAAAAAAAAAAA" >> /opt/starrocks/fe/conf/fe.conf
echo "aws_s3_secret_key = BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" >> /opt/starrocks/fe/conf/fe.conf
echo "aws_s3_use_instance_profile = false" >> /opt/starrocks/fe/conf/fe.conf
echo "aws_s3_use_aws_sdk_default_behavior = false" >> /opt/starrocks/fe/conf/fe.conf
echo "# Set this to false if you do not want default" >> /opt/starrocks/fe/conf/fe.conf
echo "# storage created in the object storage using" >> /opt/starrocks/fe/conf/fe.conf
echo "# the details provided above" >> /opt/starrocks/fe/conf/fe.conf
echo "enable_load_volume_from_conf = true" >> /opt/starrocks/fe/conf/fe.conf
/opt/starrocks/fe/bin/start_fe.sh --host_type FQDN
ports:
- 8030:8030
- 9020:9020
- 9030:9030
healthcheck:
test: 'mysql -u root -h starrocks-fe -P 9030 -e "show frontends\G" |grep "Alive: true"'
interval: 10s
timeout: 5s
retries: 3
depends_on:
minio:
condition: service_healthy
starrocks-cn:
image: starrocks/cn-ubuntu:3.3-latest
command:
- /bin/bash
- -c
- |
sleep 15s;
ulimit -u 65535;
ulimit -n 65535;
mysql --connect-timeout 2 -h starrocks-fe -P9030 -uroot -e "ALTER SYSTEM ADD COMPUTE NODE \"starrocks-cn:9050\";"
/opt/starrocks/cn/bin/start_cn.sh
environment:
- HOST_TYPE=FQDN
ports:
- 8040:8040
hostname: starrocks-cn
container_name: starrocks-cn
user: root
depends_on:
starrocks-fe:
condition: service_healthy
restart: true
minio:
condition: service_healthy
healthcheck:
test: 'mysql -u root -h starrocks-fe -P 9030 -e "SHOW COMPUTE NODES\G" |grep "Alive: true"'
interval: 10s
timeout: 5s
retries: 3
......@@ -48,6 +48,7 @@ from pandas._libs.parsers import STR_NA_VALUES
from sqlalchemy.engine.url import URL
from sqlalchemy.orm.query import Query
from docker.pythonpath_dev.superset_config_docker import MITM_DATABASE_URI
from superset.advanced_data_type.plugins.internet_address import internet_address
from superset.advanced_data_type.plugins.internet_port import internet_port
from superset.advanced_data_type.types import AdvancedDataType
......@@ -560,6 +561,10 @@ DEFAULT_FEATURE_FLAGS: dict[str, bool] = {
"SLACK_ENABLE_AVATARS": False,
# Allow users to optionally specify date formats in email subjects, which will be parsed if enabled. # noqa: E501
"DATE_FORMAT_IN_EMAIL_SUBJECT": False,
###################################
# NEW
###################################
"MITM_SUPPORT" : False,
}
# ------------------------------
......@@ -1922,8 +1927,8 @@ CATALOGS_SIMPLIFIED_MIGRATION: bool = False
#
MITM_API_BASEURL = 'http://0.0.0.0:8180/'
MITM_API_TIMEOUT = int(timedelta(seconds=300).total_seconds())
MITM_API_TIMEOUT = int(timedelta(minutes=5).total_seconds())
MITM_DATABASE_URI = 'sqlite:///:memory:'
# -------------------------------------------------------------------
# * WARNING: STOP EDITING HERE *
......
......@@ -42,4 +42,7 @@ class CallMitMService(BaseSupersetApi):
def get_result(self, cache_key: str):
# cache_key = request.args['cache_key']
data = external_service_call_manager.retrieve_job_result_json(cache_key)
if data is not None:
return self.response(200, **data)
else:
return self.response(404)
set windows-powershell := true
api-schema:
datamodel-codegen --input openapi.yaml --input-file-type openapi --output schema.py --output-model-type pydantic_v2.BaseModel --use-union-operator --use-field-description --target-python-version 3.11 --use-standard-collections --use-subclass-enum --capitalise-enum-members
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -723,6 +723,7 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
# NEW
def configure_api_forwarding(self) -> None:
if feature_flag_manager.is_feature_enabled("MITM_SUPPORT"):
external_service_call_manager_factory.init_app(self.superset_app)
......
from __future__ import annotations
import builtins
import logging
import textwrap
from ast import literal_eval
from contextlib import closing, contextmanager, nullcontext, suppress
from copy import deepcopy
from datetime import datetime
from functools import lru_cache
from inspect import signature
from typing import Any, Callable, cast, TYPE_CHECKING
import numpy
import pandas as pd
import sqlalchemy as sqla
import sshtunnel
from flask import g, request
import sqlalchemy as sa
from flask_appbuilder import Model
from sqlalchemy import (
Boolean,
Column,
create_engine,
DateTime,
ForeignKey,
Integer,
MetaData,
String,
Table as SqlaTable,
Text,
)
from sqlalchemy.engine import Connection, Dialect, Engine
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.engine.url import URL
from sqlalchemy.exc import NoSuchModuleError
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm import relationship
from sqlalchemy.pool import NullPool
from sqlalchemy.schema import UniqueConstraint
from sqlalchemy.sql import ColumnElement, expression, Select
from superset import app, db, db_engine_specs, is_feature_enabled
from superset.commands.database.exceptions import DatabaseInvalidError
from superset.constants import LRU_CACHE_MAX_SIZE, PASSWORD_MASK
from superset.databases.utils import make_url_safe
from superset.db_engine_specs.base import MetricType, TimeGrain
from superset.extensions import (
cache_manager,
encrypted_field_factory,
event_logger,
security_manager,
ssh_manager_factory,
from superset.models.helpers import AuditMixinNullable, ImportExportMixin
metadata = Model.metadata # pylint: disable=no-member
mitm_dataset_tables = sa.Table(
"mitm_dataset_tables",
metadata,
Column("id", Integer, primary_key=True),
Column("mitm_dataset_id", Integer,
ForeignKey("mitm_datasets.id", ondelete="CASCADE")),
Column("table_id", Integer, ForeignKey("tables.id", ondelete="CASCADE")),
UniqueConstraint("mitm_dataset_id", "table_id"),
)
mitm_dataset_slices = sa.Table(
"mitm_dataset_slices",
metadata,
Column("id", Integer, primary_key=True),
Column("mitm_dataset_id", Integer,
ForeignKey("mitm_datasets.id", ondelete="CASCADE")),
Column("slice_id", Integer, ForeignKey("slices.id", ondelete="CASCADE")),
UniqueConstraint("mitm_dataset_id", "slice_id"),
)
mitm_dataset_dashboards = sa.Table(
"mitm_dataset_dashboards",
metadata,
Column("id", Integer, primary_key=True),
Column("mitm_dataset_id", Integer,
ForeignKey("mitm_datasets.id", ondelete="CASCADE")),
Column("dashboard_id", Integer, ForeignKey("dashboards.id", ondelete="CASCADE")),
UniqueConstraint("mitm_dataset_id", "dashboard_id"),
)
from superset.models.helpers import AuditMixinNullable, ImportExportMixin, UUIDMixin
class MitMDataset(Model, AuditMixinNullable,
......@@ -59,17 +51,20 @@ class MitMDataset(Model, AuditMixinNullable,
"""An ORM object that stores MitM Dataset related information"""
__tablename__ = 'mitm_datasets'
type = 'table'
id = Column(Integer, primary_key=True)
dataset_name = Column(String(255), nullable=False)
mitm = Column(String(255), nullable=False)
mitm = Column(String(127), nullable=False)
database_id = Column(Integer, ForeignKey('dbs.id', ondelete='CASCADE'),
nullable=False)
database = relationship('Database', foreign_keys=[database_id])
tables = relationship('SqlaTable', secondary='mitm_dataset_tables')
slices = relationship('Slice', secondary='mitm_dataset_slices')
dashboards = relationship('Dashboard', secondary='mitm_dataset_dashboards')
export_fields = ['id', 'dataset_name', 'mitm', 'database_id']
export_parent = ['database']
export_children = ['tables', 'slices', 'dashboards']
@property
def database_name(self) -> str:
......
......@@ -11,6 +11,7 @@ from celery.result import AsyncResult
from flask import Request, Flask
from werkzeug.datastructures import Headers
from superset import is_feature_enabled
from superset.exceptions import CacheLoadError
from superset.utils import json
......@@ -153,20 +154,20 @@ class ExternalServiceCallManager:
self.update_job(job_metadata, status='error', errors=errors or [])
def retrieve_job_result(self, cache_key: str) -> Any | None:
try:
return self.cache.get(cache_key)
except CacheLoadError as ex:
logger.error(f'Retrieval of job result failed: {ex.message}')
raise
if v := self.cache.get(cache_key):
return v
else:
logger.error(f'Retrieval of job result failed: {cache_key}')
def retrieve_job_result_json(self, cache_key: str) -> str | int | float | \
dict[str, Any] | None:
v = self.retrieve_job_result(cache_key)
if v := self.cache.get(cache_key):
try:
if v is not None:
return json.loads(v, encoding='utf-8')
except JSONDecodeError as ex:
logger.error(f'JSON decoding of retrieved job result failed: {ex.msg}')
raise
logger.error(
f'JSON decoding of job result failed: {cache_key} with {ex.msg}')
class ExternalServiceCallManagerFactory:
......@@ -174,6 +175,7 @@ class ExternalServiceCallManagerFactory:
self._external_service_call_manager: ExternalServiceCallManager = None # type: ignore
def init_app(self, app: Flask) -> None:
is_feature_enabled
self._external_service_call_manager = ExternalServiceCallManager()
self._external_service_call_manager.init_app(app)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment