diff --git a/MITM_SUPPORT_INFO.md b/MITM_SUPPORT_INFO.md index 75fc52a3f908e99910e14f4276f80233ab96e8fc..1643914cda1a906b9818279d6711472b997036b8 100644 --- a/MITM_SUPPORT_INFO.md +++ b/MITM_SUPPORT_INFO.md @@ -119,6 +119,7 @@ To make passing around large results more feasible, there are some new (streamin ## Architecture Overview The following figure gives a partial overview over the main components of the discussed architecture. +Additions are colored in green.  ## Kubernetes Helm Chart diff --git a/docker-compose.yml b/docker-compose.yml index 0d9ce495c426da6dc0d3ed7251546d862b67300d..cb1a7dc6f0af1cda4d305a82e014b4053c200f6e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,9 +32,7 @@ x-superset-volumes: &superset-volumes - ./tests:/app/tests x-common-build: &common-build context: . - target: ${SUPERSET_BUILD_TARGET:-dev} # can use `dev` (default) or `lean` - cache_from: - - apache/superset-cache:3.10-slim-bookworm + target: mitm-superset # ${SUPERSET_BUILD_TARGET:-mitm-superset} # can use `dev` (default) or `lean`. for mitm support, use mitm-support args: DEV_MODE: "true" INCLUDE_CHROMIUM: ${INCLUDE_CHROMIUM:-false} diff --git a/helm/superset/templates/deployment-beat.yaml b/helm/superset/templates/deployment-beat.yaml index ff298f478d4d32d131741d9580a819d062184194..8c5be60dad822a16776dd889eab5c9db06372738 100644 --- a/helm/superset/templates/deployment-beat.yaml +++ b/helm/superset/templates/deployment-beat.yaml @@ -61,6 +61,8 @@ spec: labels: app: "{{ template "superset.name" . }}-celerybeat" release: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: "{{ template "superset.name" . }}-celerybeat" {{- if .Values.extraLabels }} {{- toYaml .Values.extraLabels | nindent 8 }} {{- end }} diff --git a/helm/superset/templates/deployment-flower.yaml b/helm/superset/templates/deployment-flower.yaml index 179df9309dd4928a1929350e83209bf8e94a979a..853ed0d0cb003aba3fc2d7f84d327e70f65b53bf 100644 --- a/helm/superset/templates/deployment-flower.yaml +++ b/helm/superset/templates/deployment-flower.yaml @@ -50,6 +50,8 @@ spec: labels: app: "{{ template "superset.name" . }}-flower" release: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: "{{ template "superset.name" . }}-flower" {{- if .Values.extraLabels }} {{- toYaml .Values.extraLabels | nindent 8 }} {{- end }} diff --git a/helm/superset/templates/deployment-worker.yaml b/helm/superset/templates/deployment-worker.yaml index 96ee915f290efa5e1bede4c74a7ab7bf053139ce..b5bedfa5f2520f179ffe14f3001d8bb079f4b257 100644 --- a/helm/superset/templates/deployment-worker.yaml +++ b/helm/superset/templates/deployment-worker.yaml @@ -67,6 +67,8 @@ spec: labels: app: {{ template "superset.name" . }}-worker release: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: "{{ template "superset.name" . }}-worker" {{- if .Values.extraLabels }} {{- toYaml .Values.extraLabels | nindent 8 }} {{- end }} diff --git a/helm/superset/templates/deployment-ws.yaml b/helm/superset/templates/deployment-ws.yaml index 868b84309ffbbbc0d0cdb3da34c6a6675a5a07be..64702f5b06e6dff2f91e0b438e25e16bf60fe9a5 100644 --- a/helm/superset/templates/deployment-ws.yaml +++ b/helm/superset/templates/deployment-ws.yaml @@ -53,6 +53,8 @@ spec: labels: app: "{{ template "superset.name" . }}-ws" release: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: "{{ template "superset.name" . }}-ws" {{- if .Values.extraLabels }} {{- toYaml .Values.extraLabels | nindent 8 }} {{- end }} diff --git a/helm/superset/templates/deployment.yaml b/helm/superset/templates/deployment.yaml index 444ec103da66ce6d4e1f3c2f1d7a552e69934da3..b4cb6a6186962b6b06264ccc40ad3ac5a190c88c 100644 --- a/helm/superset/templates/deployment.yaml +++ b/helm/superset/templates/deployment.yaml @@ -69,6 +69,8 @@ spec: labels: app: {{ template "superset.name" . }} release: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: {{ template "superset.name" . }} {{- if .Values.extraLabels }} {{- toYaml .Values.extraLabels | nindent 8 }} {{- end }} diff --git a/helm/superset/values.yaml b/helm/superset/values.yaml index 5b68a10c02a0162ec937900b68b6ea3d8be2b7e2..3d8536b93796222f4ec5522480da52e638897a3e 100644 --- a/helm/superset/values.yaml +++ b/helm/superset/values.yaml @@ -66,10 +66,12 @@ envFromSecret: '{{ template "superset.fullname" . }}-env' envFromSecrets: [] # -- Extra environment variables that will be passed into pods -extraEnv: - SUPERSET_FEATURE_MITM_SUPPORT: "true" - SUPERSET_FEATURE_ENABLE_TEMPLATE_PROCESSING: "true" - SUPERSET_FEATURE_GLOBAL_ASYNC_QUERIES: "true" +extraEnv: {} + # in theory, feature flags can be set via env vars + # SUPERSET_FEATURE_MITM_SUPPORT: "true" + # SUPERSET_FEATURE_ENABLE_TEMPLATE_PROCESSING: "true" + # SUPERSET_FEATURE_GLOBAL_ASYNC_QUERIES: "true" + # Different gunicorn settings, refer to the gunicorn documentation # https://docs.gunicorn.org/en/stable/settings.html# # These variables are used as Flags at the gunicorn startup @@ -102,6 +104,8 @@ extraEnvRaw: [] # -- Extra environment variables to pass as secrets extraSecretEnv: + # set as placeholder, so that application actually starts up for testing + # of course, these must be overwritten in an actual deployment SUPERSET_SECRET_KEY: 'nOBaaOEq/Ria4ciFqoA+4bD0sOo9STz3Iy5JLPAIBe1fuZ/G1vSCkij0' GLOBAL_ASYNC_QUERIES_JWT_SECRET: '227C7xppPiFraqrc+TTLwre0WZMCDOR6XdVaTOwGoMHt8/b+nYEqDWpx' # MAPBOX_API_KEY: ... @@ -148,6 +152,9 @@ extraVolumeMounts: [] # WARNING: the order is not guaranteed # Files can be passed as helm --set-file configOverrides.my-override=my-file.py configOverrides: + feature_flags: | + FEATURE_FLAGS = {'ENABLE_TEMPLATE_PROCESSING': True, 'GLOBAL_ASYNC_QUERIES': True, + 'MITM_SUPPORT': True} celery: | import os from celery.schedules import crontab @@ -305,19 +312,19 @@ ingress: # hosts: # - chart-example.local -resources: {} +resources: # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following # lines, adjust them as necessary, and remove the curly braces after 'resources:'. # The limits below will apply to all Superset components. To set individual resource limitations refer to the pod specific values below. # The pod specific values will overwrite anything that is set here. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi + limits: + cpu: 250m + memory: 1Gi + requests: + cpu: 100m + memory: 128Mi # -- Custom hostAliases for all superset pods ## https://kubernetes.io/docs/tasks/network/customize-hosts-file-for-pods/ @@ -407,9 +414,12 @@ supersetNode: # -- TopologySpreadConstrains to be added to supersetNode deployments topologySpreadConstraints: [] # -- Annotations to be added to supersetNode pods - podAnnotations: {} + podAnnotations: + sidecar.istio.io/proxyCPULimit: "100m" + sidecar.istio.io/proxyMemoryLimit: "128Mi" # -- Labels to be added to supersetNode pods podLabels: {} + startupProbe: httpGet: path: /health @@ -504,17 +514,19 @@ supersetWorker: # -- TopologySpreadConstrains to be added to supersetWorker deployments topologySpreadConstraints: [] # -- Annotations to be added to supersetWorker pods - podAnnotations: {} + podAnnotations: + sidecar.istio.io/proxyCPULimit: "100m" + sidecar.istio.io/proxyMemoryLimit: "128Mi" # -- Labels to be added to supersetWorker pods podLabels: {} # -- Resource settings for the supersetWorker pods - these settings overwrite might existing values from the global resources object defined above. - resources: {} - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi + resources: + limits: + cpu: 500m + memory: 2Gi + requests: + cpu: 150m + memory: 1Gi podSecurityContext: {} containerSecurityContext: {} strategy: {} @@ -584,7 +596,9 @@ supersetCeleryBeat: # -- TopologySpreadConstrains to be added to supersetCeleryBeat deployments topologySpreadConstraints: [] # -- Annotations to be added to supersetCeleryBeat pods - podAnnotations: {} + podAnnotations: + sidecar.istio.io/proxyCPULimit: "100m" + sidecar.istio.io/proxyMemoryLimit: "128Mi" # -- Labels to be added to supersetCeleryBeat pods podLabels: {} # -- Resource settings for the CeleryBeat pods - these settings overwrite might existing values from the global resources object defined above. @@ -677,7 +691,9 @@ supersetCeleryFlower: # -- TopologySpreadConstrains to be added to supersetCeleryFlower deployments topologySpreadConstraints: [] # -- Annotations to be added to supersetCeleryFlower pods - podAnnotations: {} + podAnnotations: + sidecar.istio.io/proxyCPULimit: "100m" + sidecar.istio.io/proxyMemoryLimit: "128Mi" # -- Labels to be added to supersetCeleryFlower pods podLabels: {} # -- Resource settings for the CeleryBeat pods - these settings overwrite might existing values from the global resources object defined above. @@ -753,7 +769,9 @@ supersetWebsockets: affinity: {} # -- TopologySpreadConstrains to be added to supersetWebsockets deployments topologySpreadConstraints: [] - podAnnotations: {} + podAnnotations: + sidecar.istio.io/proxyCPULimit: "100m" + sidecar.istio.io/proxyMemoryLimit: "128Mi" podLabels: {} strategy: {} podSecurityContext: {} @@ -794,13 +812,13 @@ init: # cause the process to be killed due to OOM if it exceeds limit # Make sure you are giving a strong password for the admin user creation( else make sure you are changing after setup) # Also change the admin email to your own custom email. - resources: {} - # limits: - # cpu: - # memory: - # requests: - # cpu: - # memory: + resources: + limits: + cpu: 500m + memory: 2Gi + requests: + cpu: 100m + memory: 1Gi # -- Command # @default -- a `superset_init.sh` command command: @@ -862,7 +880,8 @@ init: # -- Launch additional containers into init job pod extraContainers: [] ## Annotations to be added to init job pods - podAnnotations: {} + podAnnotations: + sidecar.istio.io/inject: "false" # Labels to be added to init job pods podLabels: {} podSecurityContext: {} @@ -960,6 +979,9 @@ redis: ## ## docker registry secret names (list) # pullSecrets: nil + podAnnotations: + sidecar.istio.io/proxyCPULimit: "100m" + sidecar.istio.io/proxyMemoryLimit: "128Mi" ## ## Configure persistence persistence: @@ -978,7 +1000,7 @@ superset-mitm-service: enabled: true connections: - origin: '{{ template "superset.fullname" . }}-worker:{{ .Values.service.port }}' # "maed-exporter-frontend-svc.mdata.svc.cluster.local" + origin: '{{ template "superset.name" . }}-worker:{{ .Values.service.port }}' # "maed-exporter-frontend-svc.mdata.svc.cluster.local" mitmDB: mitm_database_dialect: "postgresql" @@ -990,6 +1012,17 @@ superset-mitm-service: service: port: "8180" + podAnnotations: + sidecar.istio.io/proxyCPULimit: "100m" + sidecar.istio.io/proxyMemoryLimit: "128Mi" + + resources: + limits: + cpu: 500m + memory: 1Gi + requests: + cpu: 100m + memory: 128Mi nodeSelector: {} diff --git a/justfile b/justfile index 9d5409c3c2142c6f03f99e72d68dc1da6c4bc072..ea609b86dd07a9bf49391beb671930e7f62cacd0 100644 --- a/justfile +++ b/justfile @@ -21,6 +21,16 @@ frontcont: build-dep: wsl ./scripts/uv-pip-compile.sh +compile-requirements: + uv pip compile pyproject.toml requirements/base.in -o requirements-local/base.txt + uv pip compile pyproject.toml requirements/development.in -o requirements-local/development.txt + +install-requirements: + uv pip install -r requirements-local/base.txt + uv pip install requirements-local/python_ldap-3.4.4-cp311-cp311-win_amd64.whl + uv pip install -r requirements-local/development.txt + + rebuild-service: docker compose up -d --build --force-recreate superset-mitm-service diff --git a/kube.just b/kube.just index 213fa26814d1445f3af0ecb77d336a67744fd667..351f4ab787b3f4b9b8d45ade145c0ded5a7fb76b 100644 --- a/kube.just +++ b/kube.just @@ -1,11 +1,14 @@ -CHART_NAME := "mitm-superset" CHART_FOLDER := "superset" +CHART_NAME := "mitm-superset" +DEPENDENCY_CHART := "superset-mitm-service" + +helm-login: + helm registry login registry-1.docker.io -u leahtgu helm-package: cd helm/ && helm package {{CHART_NAME}} helm-push a: helm-package - # helm registry login registry-1.docker.io -u leahtgu cd helm/ && helm push {{a}} oci://registry-1.docker.io/leahtgu helm *args: @@ -23,32 +26,65 @@ helm-install-dry: helm-uninstall: @just helm uninstall {{CHART_NAME}} -kube-forward: - #! powershell - $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app.kubernetes.io/name={{CHART_NAME}},app.kubernetes.io/instance={{CHART_NAME}}" -o jsonpath="{.items[0].metadata.name}" - $CONTAINER_PORT = kubectl --context=c4c get pod --namespace mdata $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}" - Write-Output "Visit http://127.0.0.1:8080 to use your application" - kubectl --context=c4c --namespace mdata port-forward $POD_NAME 8080:$CONTAINER_PORT +helm-abort: + @just helm-uninstall + @just kube delete job mitm-superset-init-db + kube *arg: kubectl --context=c4c -n mdata {{arg}} -kubeinto pod: +kubeforward name=DEPENDENCY_CHART port="8881": + #! powershell + $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app.kubernetes.io/instance={{CHART_NAME}},app.kubernetes.io/name={{name}}" -o jsonpath="{.items[0].metadata.name}" + $CONTAINER_PORT = kubectl --context=c4c get pod --namespace mdata $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}" + Write-Output "Visit http://127.0.0.1:{{port}} to use your application" + kubectl --context=c4c --namespace mdata port-forward $POD_NAME {{port}}:$CONTAINER_PORT + +kubeforward-suff suffix="" port="8880": + #! powershell + $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app={{CHART_NAME}}{{suffix}},release={{CHART_NAME}}" -o jsonpath="{.items[0].metadata.name}" + $CONTAINER_PORT = kubectl --context=c4c get pod --namespace mdata $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}" + Write-Output "Visit http://127.0.0.1:{{port}} to use your application" + kubectl --context=c4c --namespace mdata port-forward $POD_NAME {{port}}:$CONTAINER_PORT + +kubeinto-pod pod: kubectl --context=c4c --namespace mdata exec --stdin --tty {{pod}} -- /bin/bash -kubeinto2 svc=CHART_NAME inst=CHART_NAME: +kubeinto name=DEPENDENCY_CHART: + #! powershell + $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app.kubernetes.io/instance={{CHART_NAME}},app.kubernetes.io/name={{name}}" -o jsonpath="{.items[0].metadata.name}" + just kubeinto-pod $POD_NAME + +kubeinto-suff suffix="": #! powershell - $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app=mitm-superset-worker" -o jsonpath="{.items[0].metadata.name}" - kubectl --context=c4c --namespace mdata exec --stdin --tty $POD_NAME -- /bin/bash + $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app={{CHART_NAME}}{{suffix}},release={{CHART_NAME}}" -o jsonpath="{.items[0].metadata.name}" + just kubeinto-pod $POD_NAME -kubelogs svc="{{CHART_NAME}}": +kubelogs name=DEPENDENCY_CHART *args: #! powershell - $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app.kubernetes.io/name={{svc}},app.kubernetes.io/instance={{CHART_NAME}}" -o jsonpath="{.items[0].metadata.name}" - kubectl --context=c4c --namespace mdata logs $POD_NAME + $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app.kubernetes.io/instance={{CHART_NAME}},app.kubernetes.io/name={{name}}" -o jsonpath="{.items[0].metadata.name}" + just kube logs $POD_NAME {{args}} + +kubelogs-suff suffix="" *args: + #! powershell + $POD_NAME = kubectl --context=c4c get pods --namespace mdata -l "app={{CHART_NAME}}{{suffix}},release={{CHART_NAME}}" -o jsonpath="{.items[0].metadata.name}" + just kube logs $POD_NAME {{args}} kube-clear-pvc: @just kube delete pvc data-mitm-superset-postgresql-0 -helm-abort: - @just helm-uninstall - @just kube delete job mitm-superset-init-db +kf-sup: + @just kubeforward-suff "" 8880 + +kf-mitm: + @just kubeforward "" 8881 + +ki-sup: + @just kubeinto-suff + +ki-work: + @just kubeinto-suff "-worker" + +ki-mitm: + @just kubeinto-mitm diff --git a/requirements/development.in b/requirements/development.in index 514536023a2e4015d9d61be5ba26f37a8ff28b5c..512975f0bab0960b1a923857c46a981f9b90d2f1 100644 --- a/requirements/development.in +++ b/requirements/development.in @@ -18,3 +18,4 @@ # -e .[development,bigquery,cors,druid,gevent,gsheets,mysql,postgres,presto,prophet,trino,thumbnails,mitm] datamodel-code-generator>=0.28.0 +debugpy diff --git a/requirements/development.txt b/requirements/development.txt index 24ed0d147d1dc926cdc0717088bf09d2af607f5a..f71a61441af94a82cd85f0fab180bc899b8d6957 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -153,6 +153,8 @@ datamodel-code-generator==0.28.4 # via -r requirements/development.in db-dtypes==1.4.2 # via pandas-gbq +debugpy==1.8.14 + # via -r requirements/development.in defusedxml==0.7.1 # via # -c requirements/base.txt diff --git a/superset-frontend/src/features/externalServices/hooks/asyncExternalService.ts b/superset-frontend/src/features/externalServices/hooks/asyncExternalService.ts index 983adbf04d14a2ad32a7612084f0ea9eaaba8487..0e526fec66357361d5f0a38090c92346c1473f56 100644 --- a/superset-frontend/src/features/externalServices/hooks/asyncExternalService.ts +++ b/superset-frontend/src/features/externalServices/hooks/asyncExternalService.ts @@ -2,7 +2,6 @@ import { ClientErrorObject, getClientErrorObject, JsonObject, - logging, parseErrorJson, RequestConfig, SupersetClient, @@ -63,6 +62,7 @@ export const waitForAsyncData = async (asyncResponse: AsyncJobMetadata) => const jobId = asyncResponse.job_id; const listener = async (asyncEvent: AsyncJobMetadata) => { switch (asyncEvent.status) { + // we only care about done and error here case JOB_STATUS.DONE: { if ('result_url' in asyncEvent) { let { data, status } = await fetchJsonResult(asyncEvent); @@ -84,9 +84,6 @@ export const waitForAsyncData = async (asyncResponse: AsyncJobMetadata) => reject(err); break; } - default: { - logging.warn('received event with status', asyncEvent.status); - } } removeListener(jobId); }; @@ -116,7 +113,7 @@ export const asyncCall = async (requestConfig: RequestConfig) => { export const asyncAction = async ( requestConfig: RequestConfig, ): Promise<AsyncActionResult> => { - return asyncCall(requestConfig) as Promise<AsyncActionResult>; + return await asyncCall(requestConfig) as Promise<AsyncActionResult>; }; type CallExternalServiceProps = { @@ -132,11 +129,10 @@ export const callExternalService = async ({ }: CallExternalServiceProps) => { const endpoint = extServiceCallApiEndpoint + '/' + String(extService) + '/' + path; - const contentType = - payload instanceof FormData ? 'multipart/form-data' : 'application/json'; + const headers = { ...(payload && !(payload instanceof FormData)) && {'Content-Type': 'application/json'} } return asyncCall({ endpoint, - headers: { 'Content-Type': contentType }, + headers: headers, postPayload: payload, }); }; diff --git a/superset-frontend/src/features/mitmDatasets/UploadMitMDatasetModal/index.tsx b/superset-frontend/src/features/mitmDatasets/UploadMitMDatasetModal/index.tsx index 26365430fe2cd98b2a48ac5bf348a71bbb88b5cd..e06bbd84e2e60087e8845852ccc83250481efc82 100644 --- a/superset-frontend/src/features/mitmDatasets/UploadMitMDatasetModal/index.tsx +++ b/superset-frontend/src/features/mitmDatasets/UploadMitMDatasetModal/index.tsx @@ -128,7 +128,7 @@ const UploadMitMDatasetModal: FunctionComponent< setIsLoading(true); asyncAction({ endpoint: '/api/v1/mitm_dataset/upload/', - headers: { 'Content-Type': 'multipart/form-data' }, + //headers: { 'Content-Type': 'multipart/form-data' }, postPayload: formData, }) .then(r => { diff --git a/superset-frontend/src/features/mitmDatasets/common/JsonPreviewCell.tsx b/superset-frontend/src/features/mitmDatasets/common/JsonPreviewCell.tsx index 59b56e9404f9611fb78cbdd3c075974ecd9bd110..9da9753a449e243fedec7a28dbd5600ade2d5285 100644 --- a/superset-frontend/src/features/mitmDatasets/common/JsonPreviewCell.tsx +++ b/superset-frontend/src/features/mitmDatasets/common/JsonPreviewCell.tsx @@ -2,6 +2,7 @@ import { useState } from 'react'; import { Icons } from '../../../components/Icons'; import { SupersetTheme, t } from '@superset-ui/core'; import Modal from '../../../components/Modal'; +import Button from '../../../components/Button'; export const JsonPreviewCell = ({ value }: { value: Record<string, any> }) => { const [showModal, setShowModal] = useState(false); @@ -38,7 +39,11 @@ export const JsonPreviewCell = ({ value }: { value: Record<string, any> }) => { show={showModal} onHide={() => setShowModal(false)} title={t('MitM Header Details')} - footer={<button onClick={() => setShowModal(false)}>{t('Close')}</button>} + footer={ + <Button buttonStyle={"secondary"} onClick={() => setShowModal(false)}> + {t('Close')} + </Button> + } width="600px" > <pre diff --git a/superset/commands/mitm/mitm_dataset/importers/v1/__init__.py b/superset/commands/mitm/mitm_dataset/importers/v1/__init__.py index 0dc8128ba6d2453c80f4f0b1406d2907e4bd4cfc..d94fae2606a521aa2d31c46b1591307935e46f74 100644 --- a/superset/commands/mitm/mitm_dataset/importers/v1/__init__.py +++ b/superset/commands/mitm/mitm_dataset/importers/v1/__init__.py @@ -3,7 +3,7 @@ from collections import defaultdict from copy import deepcopy from sqlalchemy import delete, insert - +import logging from superset.charts.schemas import ImportV1ChartSchema from superset.commands.importers.v1 import ImportModelsCommand from superset.commands.importers.v1.assets import ImportAssetsCommand @@ -21,6 +21,7 @@ from superset.models.slice import Slice from .utils import * from ...utils import get_asset_by_uuid, EXPORT_PREFIX_PATH, handle_serialized_fields +logger = logging.getLogger(__name__) class ImportMitMDatasetsCommand(ImportModelsCommand): dao = MitMDatasetDAO @@ -64,6 +65,8 @@ class ImportMitMDatasetsCommand(ImportModelsCommand): config = insert_ids(config, uuid_id_maps) handle_serialized_fields(config) + logger.info("Importing mitm_dataset '%s'", config) + mitm_dataset = import_mitm_dataset(config, overwrite=overwrite) mitm_dataset_ids[str(mitm_dataset.uuid)] = mitm_dataset.id for rel_obj_type, relation_table in zip(['table', 'slice', 'dashboard'], diff --git a/superset/customization/ext_service_call/api.py b/superset/customization/ext_service_call/api.py index 74d29d5f485d59dee07b901707082f607473ad18..6e20022d89b0ece9d7029d9d8f5ce9e31fd47865 100644 --- a/superset/customization/ext_service_call/api.py +++ b/superset/customization/ext_service_call/api.py @@ -2,11 +2,9 @@ from flask import request from flask_appbuilder import expose from flask_appbuilder.security.decorators import protect from werkzeug import Response -from werkzeug.utils import send_file from superset.exceptions import SupersetException from superset.extensions import external_service_call_manager, event_logger -from superset.utils.core import get_user_id from superset.views.base_api import BaseSupersetApi @@ -25,16 +23,13 @@ class CallExternalService(BaseSupersetApi): 'get_chunked_json_result': 'read', 'get_streamed_raw_result': 'read'} - @expose('/call/<string:ext_service>/<path:subpath>', methods=('POST',)) @event_logger.log_this @protect(allow_browser_login=True) # @permission_name('post') def call_service(self, ext_service: str, subpath: str = '', **kwargs): - from superset.customization.external_service_support.forwardable_request import \ - ForwardableRequestBase, mk_forwardable_request - from superset.customization.external_service_support.service_registry import \ - ExternalService + from superset.customization.external_service_support.interface import \ + parse_channel_id_from_request, mk_forwardable_request, ExternalService try: ext_service = ExternalService(ext_service) except KeyError as exc: @@ -44,8 +39,9 @@ class CallExternalService(BaseSupersetApi): request, file_handling='raw') + async_channel_id = parse_channel_id_from_request(request) job_metadata = external_service_call_manager.submit_async_service_call( - ext_service, forwardable_request) + ext_service, forwardable_request, override_channel_id=async_channel_id) return self.response( 202, diff --git a/superset/customization/external_service_support/common.py b/superset/customization/external_service_support/common.py index ae39726ac7c86cbbe61c78ae5c47bf5c8778067f..931f14f30af98c68ba5566b9598d707c91f7c046 100644 --- a/superset/customization/external_service_support/common.py +++ b/superset/customization/external_service_support/common.py @@ -1,12 +1,16 @@ from __future__ import annotations + +import logging from collections.abc import Callable from typing import Literal, TypedDict, Generator, Optional, Protocol, ParamSpec, \ Concatenate - +from flask import Request from superset.utils.core import get_user_id - +import jwt from superset.security.guest_token import GuestToken +logger = logging.getLogger(__name__) + AsyncResultStatusValue = Literal['pending', 'running', 'error', 'done'] @@ -49,6 +53,9 @@ def mk_job_metadata( job_id=job_id, **kwargs) +def parse_channel_id_from_request(req: Request) -> str: + from superset.extensions import async_query_manager + return async_query_manager.parse_channel_id_from_request(req) def with_user_metadata(job_metadata: AsyncJobMetadata, override_user_id: int | None = None) -> AsyncJobMetadata: diff --git a/superset/customization/external_service_support/interface.py b/superset/customization/external_service_support/interface.py new file mode 100644 index 0000000000000000000000000000000000000000..384a86c808d978f8810a1b6940b5e82b65f76fb1 --- /dev/null +++ b/superset/customization/external_service_support/interface.py @@ -0,0 +1,16 @@ +# pylint: disable=unused-import +# pyright: reportUnusedImport=false +# noqa: F401 + +# noinspection PyUnresolvedReferences +from .service_registry import ExternalService, AsyncForwardableRequestHandler, AsyncInternalHandler +# noinspection PyUnresolvedReferences +from .forwardable_request import mk_forwardable_request, ForwardableRequestBase, FormDataRequest, DatalessForwardableRequest, RawDataRequest, JsonRequest +# noinspection PyUnresolvedReferences +from .common import mk_job_metadata, parse_channel_id_from_request, AsyncJobMetadata, AsyncResultStatus +# noinspection PyUnresolvedReferences +from .service_registry import ExternalService, AsyncForwardableRequestHandler, AsyncInternalHandler +# noinspection PyUnresolvedReferences +from .forwardable_request import mk_forwardable_request, ForwardableRequestBase, FormDataRequest,DatalessForwardableRequest,RawDataRequest,JsonRequest +# noinspection PyUnresolvedReferences +from .common import mk_job_metadata, parse_channel_id_from_request, AsyncJobMetadata, AsyncResultStatus diff --git a/superset/customization/external_service_support/service_call_manager.py b/superset/customization/external_service_support/service_call_manager.py index 54eaac0597572d765f075e02949f9b9d913d63fb..a93357f12ed620c0db0f4e27744313c218d7a3fd 100644 --- a/superset/customization/external_service_support/service_call_manager.py +++ b/superset/customization/external_service_support/service_call_manager.py @@ -5,13 +5,14 @@ import uuid from json import JSONDecodeError from typing import Any, TYPE_CHECKING, Generator, Iterator, Callable, ContextManager +import flask import requests from flask import Flask from flask_caching import Cache from superset.utils import json from .common import AsyncResultStatus, AsyncResultStatusValue, \ - AsyncEventError, AsyncJobMetadata, mk_job_metadata, with_user_metadata + AsyncEventError, AsyncJobMetadata, mk_job_metadata, with_user_metadata, parse_channel_id_from_request if TYPE_CHECKING: from superset.async_events.async_query_manager import AsyncQueryManager @@ -31,14 +32,15 @@ class ExternalServiceCallManager: self.cache: Cache | None = None self._external_services: dict[ ExternalService, ExternalServiceProperties] | None = None - self._sync_service_call_handler : SyncForwardableRequestHandler | None = None - self._async_service_call_handler : AsyncForwardableRequestHandler | None = None + self._sync_service_call_handler: SyncForwardableRequestHandler | None = None + self._async_service_call_handler: AsyncForwardableRequestHandler | None = None def init_app(self, app: Flask): from superset import cache_manager from superset.extensions import async_query_manager from .service_registry import register_services - from .service_call_handlers import get_sync_service_call_handler, get_async_service_call_handler + from .service_call_handlers import get_sync_service_call_handler, \ + get_async_service_call_handler self._async_query_manager: AsyncQueryManager = async_query_manager self.cache: Cache = cache_manager.cache self._external_services: dict[ @@ -85,10 +87,12 @@ class ExternalServiceCallManager: cache_key: str | None = None, stream_into_cache: bool = False, override_user_id: int | None = None, + override_channel_id: str | None = None, **kwargs) -> AsyncJobMetadata: from superset.commands.mitm.external_service import \ ExecuteForwardableRequestAsyncCommand - job_metadata = self._init_job(str(service), override_user_id=override_user_id) + job_metadata = self._init_job(channel_id=override_channel_id or str(service), + override_user_id=override_user_id) complete_request = self.complete_request(service, forwardable_request) async_result = ExecuteForwardableRequestAsyncCommand(job_metadata, @@ -105,15 +109,20 @@ class ExternalServiceCallManager: forwardable_request: ForwardableRequestBase, custom_handler: AsyncForwardableRequestHandler | None = None, override_user_id: int | None = None, + override_channel_id: str | None = None, **kwargs) -> AsyncJobMetadata: - job_metadata = self._init_job(str(service), override_user_id=override_user_id) + job_metadata = self._init_job(channel_id=override_channel_id or str(service), + override_user_id=override_user_id) service_properties = self._external_services[service] complete_request = self.complete_request(service, forwardable_request) handler: AsyncForwardableRequestHandler = custom_handler or self._async_service_call_handler - async_result = handler(job_metadata, complete_request, result_base_url=service_properties.result_endpoint_url, **kwargs) + async_result = handler(job_metadata, + complete_request, + result_base_url=service_properties.result_endpoint_url, + **kwargs) job_metadata['task_id'] = async_result.id return job_metadata @@ -122,8 +131,10 @@ class ExternalServiceCallManager: service: ExternalService, handler: AsyncInternalHandler, override_user_id: int | None = None, + override_channel_id: str | None = None, **kwargs) -> AsyncJobMetadata: - job_metadata = self._init_job(str(service), override_user_id=override_user_id) + job_metadata = self._init_job(channel_id=override_channel_id or str(service), + override_user_id=override_user_id) async_result = handler(job_metadata, **kwargs) job_metadata['task_id'] = async_result.id return job_metadata @@ -131,7 +142,8 @@ class ExternalServiceCallManager: def complete_internally_handled(self, service: ExternalService, job_metadata: AsyncJobMetadata, - result_url_builder: Callable[[str], str] | None = None): + result_url_builder: Callable[ + [str], str] | None = None): service_properties = self._external_services[service] result_url = None if result_url_builder is not None and service_properties.result_endpoint_url is not None: @@ -175,8 +187,7 @@ class ExternalServiceCallManager: Callable[[], ContextManager[ Generator[ Any, None, None]]] | None: - from superset.commands.mitm.caching.cache_data import ReadCacheCommand, \ - ReadStreamedCacheCommand, CacheCommandException + from superset.commands.mitm.caching.cache_data import ReadStreamedCacheCommand, CacheCommandException try: start_read = ReadStreamedCacheCommand(base_cache_key=base_cache_key, cache_instance=self.cache).run( @@ -188,7 +199,7 @@ class ExternalServiceCallManager: def read_job_result(self, cache_key: str, delete_after: bool = True) -> Any | None: from superset.commands.mitm.caching.cache_data import ReadCacheCommand, \ - ReadStreamedCacheCommand, CacheCommandException + CacheCommandException try: v = ReadCacheCommand(cache_key=cache_key, cache_instance=self.cache).run(delete_after=delete_after) @@ -220,7 +231,9 @@ class ExternalServiceCallManager: was_streamed: bool = False, delete_after: bool = False) -> str | int | float | \ dict[str, Any] | None: - if v := self.retrieve_job_result(cache_key, was_streamed=was_streamed, delete_after=delete_after): + if v := self.retrieve_job_result(cache_key, + was_streamed=was_streamed, + delete_after=delete_after): try: if v is not None: return json.loads(v, encoding='utf-8') diff --git a/superset/customization/mitm_datasets/api.py b/superset/customization/mitm_datasets/api.py index c92414b0b946eeb1d7b591cb751e1a83839ccba8..620feeff71ad817f3e7d0b4d8ded05b97bed5aab 100644 --- a/superset/customization/mitm_datasets/api.py +++ b/superset/customization/mitm_datasets/api.py @@ -1,4 +1,5 @@ from __future__ import annotations + import logging from datetime import datetime from io import BytesIO @@ -14,35 +15,35 @@ from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_appbuilder.security.decorators import protect from flask_babel import _ +from superset.commands.importers.exceptions import NoValidFilesFoundError +from superset.commands.importers.v1.utils import get_contents_from_bundle +from superset.commands.mitm.caching.cache_data import WriteCacheCommand, \ + StreamIntoCacheCommand +from superset.commands.mitm.caching.utils import random_cache_key +from superset.commands.mitm.exceptions import * +from superset.commands.mitm.mitm_dataset.create import CreateMitMDatasetCommand +from superset.commands.mitm.mitm_dataset.delete import DeleteMitMDatasetsCommand +from superset.commands.mitm.mitm_dataset.export import ExportMitMDatasetsCommand +from superset.commands.mitm.mitm_dataset.importers.v1 import ImportMitMDatasetsCommand +from superset.commands.mitm.mitm_dataset.update import UpdateMitMDatasetCommand +from superset.commands.mitm.mitm_service.schemas.mitm_service_schema import \ + ValidationError +from superset.commands.mitm.mitm_service.utils import test_access from superset.constants import RouteMethod, MODEL_API_RW_METHOD_PERMISSION_MAP from superset.daos.mitm_dataset import MitMDatasetDAO from superset.databases.filters import DatabaseFilter from superset.models.mitm import MitMDataset, mitm_dataset_tables, \ mitm_dataset_dashboards, mitm_dataset_slices +from superset.utils import json from superset.views.base_api import BaseSupersetModelRestApi, RelatedFieldFilter, \ statsd_metrics, requires_json, requires_form_data from superset.views.filters import BaseFilterRelatedUsers, FilterRelatedOwners from .filters import MitMDatasetFilter from .schemas import MitMDatasetPostSchema, MitMDatasetPutSchema, get_export_ids_schema, \ get_delete_ids_schema, RelatedDashboardSchema, \ - RelatedSliceSchema, RelatedTableSchema, MitMDatasetShowSchema, \ - RelatedDatabaseSchema, UserSchema -from ... import event_logger -from ...commands.importers.exceptions import NoValidFilesFoundError -from ...commands.importers.v1.utils import get_contents_from_bundle -from ...commands.mitm.caching.cache_data import WriteCacheCommand -from ...commands.mitm.caching.utils import random_cache_key -from ...commands.mitm.exceptions import * -from ...commands.mitm.mitm_dataset.create import CreateMitMDatasetCommand -from ...commands.mitm.mitm_dataset.delete import DeleteMitMDatasetsCommand -from ...commands.mitm.mitm_dataset.export import ExportMitMDatasetsCommand -from ...commands.mitm.mitm_dataset.importers.v1 import ImportMitMDatasetsCommand -from ...commands.mitm.mitm_dataset.update import UpdateMitMDatasetCommand -from ...commands.mitm.mitm_service.schemas.mitm_service_schema import ValidationError -from ...commands.mitm.mitm_service.utils import test_access -from ...utils import json -from ...utils.core import get_user_id -from ...views.error_handling import handle_api_exception + RelatedSliceSchema, RelatedTableSchema, RelatedDatabaseSchema, UserSchema +from superset import event_logger +from superset.views.error_handling import handle_api_exception logger = logging.getLogger(__name__) @@ -129,7 +130,8 @@ class MitMDatasetRestApi(BaseSupersetModelRestApi): 'dashboards.uuid', 'dashboards.dashboard_title', ] - show_select_columns = show_columns + ['slices.datasource_id', 'slices.datasource_type'] # this is necessary due to eager query manipulation in the slice model + show_select_columns = show_columns + ['slices.datasource_id', + 'slices.datasource_type'] # this is necessary due to eager query manipulation in the slice model list_columns = [ 'id', 'dataset_name', @@ -152,7 +154,9 @@ class MitMDatasetRestApi(BaseSupersetModelRestApi): 'dashboards.id', 'dashboards.dashboard_title', ] - list_select_columns = list_columns + ['changed_on', 'created_on', 'changed_by_fk'] + ['slices.datasource_id', 'slices.datasource_type'] + list_select_columns = list_columns + ['changed_on', 'created_on', + 'changed_by_fk'] + ['slices.datasource_id', + 'slices.datasource_type'] add_columns = [ 'dataset_name', @@ -176,7 +180,8 @@ class MitMDatasetRestApi(BaseSupersetModelRestApi): 'changed_by', ] - allowed_rel_fields = {'database', 'creator', 'owners', 'tables', 'slices', 'dashboards', + allowed_rel_fields = {'database', 'creator', 'owners', 'tables', 'slices', + 'dashboards', 'created_by', 'changed_by'} base_related_field_filters = { 'owners': [['id', BaseFilterRelatedUsers, lambda: []]], @@ -324,10 +329,13 @@ class MitMDatasetRestApi(BaseSupersetModelRestApi): return self.response(404, message='ID not found') owners = self.owner_schema.dump(model.owners, many=True) - response = { 'owners': {'count': len(owners), 'result': owners}} + response = {'owners': {'count': len(owners), 'result': owners}} rel_assets = MitMDatasetDAO.get_related_assets(pk) - for rel_obj_type, schema in [('tables', self.related_table_schema), ('slices', self.related_slice_schema), ('dashboards', self.related_dashboard_schema)]: - objs = self.related_database_schema.dump(rel_assets[rel_obj_type], many=True) + for rel_obj_type, schema in [('tables', self.related_table_schema), + ('slices', self.related_slice_schema), + ('dashboards', self.related_dashboard_schema)]: + objs = self.related_database_schema.dump(rel_assets[rel_obj_type], + many=True) response[rel_obj_type] = {'count': len(objs), 'result': objs} return self.response( @@ -554,40 +562,33 @@ class MitMDatasetRestApi(BaseSupersetModelRestApi): ) @requires_form_data def upload(self) -> Response: - logger.info('Received upload request') - logger.info(f"Request headers: {request.headers}") - logger.info(f"Request form data: {request.form}") - logger.info(f"Request files: {[file.filename for file in request.files.values()]}") - # logger.info('Content Length: %s', request.content_length) - from flask import current_app - logger.info('Max Content Length: %s', current_app.config['MAX_CONTENT_LENGTH']) - dataset_name = request.form.get('dataset_name') mitm_name = request.form.get('mitm') - zip = request.form.get('mitm_zip') mitm_zip = request.files.get('mitm_zip') - logger.info('Received following form data: %s', request.form) - - if not mitm_zip and not zip:# or mitm_zip.mimetype != 'application/zip': + if not mitm_zip: # or mitm_zip.mimetype != 'application/zip': return self.response_400(message='mitm_zip is missing or is not its mimetype is not application/zip') - mitm_zip = mitm_zip or zip + # TODO switch to streaming cache write mitm_bytes = mitm_zip.read() ck = random_cache_key() WriteCacheCommand(ck, mitm_bytes).run() - from superset.customization.external_service_support.service_registry import \ - ExternalService + from superset.extensions import external_service_call_manager + from superset.customization.external_service_support.interface import \ + ExternalService, parse_channel_id_from_request from superset.tasks.mitm.upload_mitm_dataset import upload_mitm_dataset_task def handler(jm): - return upload_mitm_dataset_task.delay(jm, dataset_name=dataset_name, mitm_zip_cache_key=ck, + return upload_mitm_dataset_task.delay(jm, + dataset_name=dataset_name, + mitm_zip_cache_key=ck, mitm_name=mitm_name) - from superset.extensions import external_service_call_manager job_metadata = external_service_call_manager.submit_async_internally_handled( - ExternalService.MITM, handler) + ExternalService.MITM, + handler, + override_channel_id=parse_channel_id_from_request(request)) return self.response(202, **job_metadata) @@ -603,20 +604,21 @@ class MitMDatasetRestApi(BaseSupersetModelRestApi): if not test_access(pk): return self.response_404() - from superset.customization.external_service_support.service_registry import \ - ExternalService from superset.extensions import external_service_call_manager + from superset.customization.external_service_support.interface import \ + ExternalService, parse_channel_id_from_request from superset.tasks.mitm.download_mitm_dataset import download_mitm_dataset_task def handler(jm): return download_mitm_dataset_task.delay(jm, pk) job_metadata = external_service_call_manager.submit_async_internally_handled( - ExternalService.MITM, handler) + ExternalService.MITM, + handler, + override_channel_id=parse_channel_id_from_request(request)) return self.response(202, **job_metadata) - def generate_visualizations(self, pk: int): ... diff --git a/superset/customization/mitm_service/api.py b/superset/customization/mitm_service/api.py index c4ab87301c46a70dafc354bc9c19856485e18191..85be70be8cfd0f71f045662772b58be629912858 100644 --- a/superset/customization/mitm_service/api.py +++ b/superset/customization/mitm_service/api.py @@ -32,8 +32,8 @@ class CallMitMService(BaseSupersetApi): if not test_access(pk): return self.response_404() - from superset.customization.external_service_support.service_registry import \ - ExternalService + from superset.customization.external_service_support.interface import \ + parse_channel_id_from_request, ExternalService from superset.tasks.mitm.drop_mitm_dataset import drop_mitm_dataset_task def handler(jm): @@ -41,6 +41,6 @@ class CallMitMService(BaseSupersetApi): pk) job_metadata = external_service_call_manager.submit_async_internally_handled( - ExternalService.MITM, handler, user_id=get_user_id()) + ExternalService.MITM, handler, override_channel_id=parse_channel_id_from_request(request)) return self.response(202, **job_metadata) diff --git a/superset/tasks/mitm/upload_mitm_dataset.py b/superset/tasks/mitm/upload_mitm_dataset.py index 8f3654f68c2a2f72d6fd0a228bcc6f090d440bd4..d4d25a27b01b82d5d69fdbb32d7fee6f46cbba1b 100644 --- a/superset/tasks/mitm/upload_mitm_dataset.py +++ b/superset/tasks/mitm/upload_mitm_dataset.py @@ -32,7 +32,7 @@ def upload_mitm_dataset_task(job_metadata: AsyncJobMetadata, with override_user(load_user_from_job_metadata(job_metadata), force=False): try: mitm_zip = ReadCacheCommand(mitm_zip_cache_key).run() - + # TODO switch to cache_key UploadMitMDatasetCommand(mitm_name, dataset_name, mitm_zip=mitm_zip).run() external_service_call_manager.call_completed(job_metadata)