未验证 提交 88964b7d 编写于 作者: M Maxime Beauchemin 提交者: GitHub

Deprecate auto-generated metrics (#5461)

* [WiP] deprecate auto-generated metrics & fix examples

Picking up some leftover pieces not needed anymore since the
MetricsControl

* db merge

* db merge

* fix migration

* Creating metrics required for tests
上级 65f7f292
......@@ -350,13 +350,8 @@ class BaseColumn(AuditMixinNullable, ImportMixin):
verbose_name = Column(String(1024))
is_active = Column(Boolean, default=True)
type = Column(String(32))
groupby = Column(Boolean, default=False)
count_distinct = Column(Boolean, default=False)
sum = Column(Boolean, default=False)
avg = Column(Boolean, default=False)
max = Column(Boolean, default=False)
min = Column(Boolean, default=False)
filterable = Column(Boolean, default=False)
groupby = Column(Boolean, default=True)
filterable = Column(Boolean, default=True)
description = Column(Text)
is_dttm = None
......
......@@ -27,4 +27,4 @@ class DatasourceModelView(SupersetModelView):
raise SupersetException(Markup(
'Cannot delete a datasource that has slices attached to it.'
"Here's the list of associated charts: " +
''.join([o.slice_link for o in obj.slices])))
''.join([o.slice_name for o in obj.slices])))
......@@ -237,12 +237,6 @@ class DruidCluster(Model, AuditMixinNullable, ImportMixin):
if col_obj.type == 'STRING':
col_obj.groupby = True
col_obj.filterable = True
if col_obj.type == 'hyperUnique' or col_obj.type == 'thetaSketch':
col_obj.count_distinct = True
if col_obj.is_num:
col_obj.sum = True
col_obj.min = True
col_obj.max = True
datasource.refresh_metrics()
session.commit()
......@@ -280,8 +274,7 @@ class DruidColumn(Model, BaseColumn):
export_fields = (
'datasource_id', 'column_name', 'is_active', 'type', 'groupby',
'count_distinct', 'sum', 'avg', 'max', 'min', 'filterable',
'description', 'dimension_spec_json', 'verbose_name',
'filterable', 'description', 'dimension_spec_json', 'verbose_name',
)
update_from_object_fields = export_fields
export_parent = 'datasource'
......@@ -306,77 +299,6 @@ class DruidColumn(Model, BaseColumn):
metric_type='count',
json=json.dumps({'type': 'count', 'name': 'count'}),
)
# Somehow we need to reassign this for UDAFs
if self.type in ('DOUBLE', 'FLOAT'):
corrected_type = 'DOUBLE'
else:
corrected_type = self.type
if self.sum and self.is_num:
mt = corrected_type.lower() + 'Sum'
name = 'sum__' + self.column_name
metrics[name] = DruidMetric(
metric_name=name,
metric_type='sum',
verbose_name='SUM({})'.format(self.column_name),
json=json.dumps({
'type': mt, 'name': name, 'fieldName': self.column_name}),
)
if self.avg and self.is_num:
mt = corrected_type.lower() + 'Avg'
name = 'avg__' + self.column_name
metrics[name] = DruidMetric(
metric_name=name,
metric_type='avg',
verbose_name='AVG({})'.format(self.column_name),
json=json.dumps({
'type': mt, 'name': name, 'fieldName': self.column_name}),
)
if self.min and self.is_num:
mt = corrected_type.lower() + 'Min'
name = 'min__' + self.column_name
metrics[name] = DruidMetric(
metric_name=name,
metric_type='min',
verbose_name='MIN({})'.format(self.column_name),
json=json.dumps({
'type': mt, 'name': name, 'fieldName': self.column_name}),
)
if self.max and self.is_num:
mt = corrected_type.lower() + 'Max'
name = 'max__' + self.column_name
metrics[name] = DruidMetric(
metric_name=name,
metric_type='max',
verbose_name='MAX({})'.format(self.column_name),
json=json.dumps({
'type': mt, 'name': name, 'fieldName': self.column_name}),
)
if self.count_distinct:
name = 'count_distinct__' + self.column_name
if self.type == 'hyperUnique' or self.type == 'thetaSketch':
metrics[name] = DruidMetric(
metric_name=name,
verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
metric_type=self.type,
json=json.dumps({
'type': self.type,
'name': name,
'fieldName': self.column_name,
}),
)
else:
metrics[name] = DruidMetric(
metric_name=name,
verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
metric_type='count_distinct',
json=json.dumps({
'type': 'cardinality',
'name': name,
'fieldNames': [self.column_name]}),
)
return metrics
def refresh_metrics(self):
......
......@@ -50,11 +50,9 @@ class DruidColumnInlineView(CompactCRUDMixin, SupersetModelView): # noqa
edit_columns = [
'column_name', 'verbose_name', 'description', 'dimension_spec_json', 'datasource',
'groupby', 'filterable', 'count_distinct', 'sum', 'min', 'max']
'groupby', 'filterable']
add_columns = edit_columns
list_columns = [
'column_name', 'verbose_name', 'type', 'groupby', 'filterable', 'count_distinct',
'sum', 'min', 'max']
list_columns = ['column_name', 'verbose_name', 'type', 'groupby', 'filterable']
can_delete = False
page_size = 500
label_columns = {
......@@ -63,12 +61,6 @@ class DruidColumnInlineView(CompactCRUDMixin, SupersetModelView): # noqa
'datasource': _('Datasource'),
'groupby': _('Groupable'),
'filterable': _('Filterable'),
'count_distinct': _('Count Distinct'),
'sum': _('Sum'),
'min': _('Min'),
'max': _('Max'),
'verbose_name': _('Verbose Name'),
'description': _('Description'),
}
description_columns = {
'filterable': _(
......
......@@ -102,7 +102,7 @@ class TableColumn(Model, BaseColumn):
export_fields = (
'table_id', 'column_name', 'verbose_name', 'is_dttm', 'is_active',
'type', 'groupby', 'count_distinct', 'sum', 'avg', 'max', 'min',
'type', 'groupby',
'filterable', 'expression', 'description', 'python_date_format',
'database_expression',
)
......@@ -185,43 +185,6 @@ class TableColumn(Model, BaseColumn):
self.type or '', dttm)
return s or "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S.%f'))
def get_metrics(self):
# TODO deprecate, this is not needed since MetricsControl
metrics = []
M = SqlMetric # noqa
quoted = self.column_name
if self.sum:
metrics.append(M(
metric_name='sum__' + self.column_name,
metric_type='sum',
expression='SUM({})'.format(quoted),
))
if self.avg:
metrics.append(M(
metric_name='avg__' + self.column_name,
metric_type='avg',
expression='AVG({})'.format(quoted),
))
if self.max:
metrics.append(M(
metric_name='max__' + self.column_name,
metric_type='max',
expression='MAX({})'.format(quoted),
))
if self.min:
metrics.append(M(
metric_name='min__' + self.column_name,
metric_type='min',
expression='MIN({})'.format(quoted),
))
if self.count_distinct:
metrics.append(M(
metric_name='count_distinct__' + self.column_name,
metric_type='count_distinct',
expression='COUNT(DISTINCT {})'.format(quoted),
))
return {m.metric_name: m for m in metrics}
class SqlMetric(Model, BaseMetric):
......@@ -878,7 +841,6 @@ class SqlaTable(Model, BaseDatasource):
self.columns.append(dbcol)
if not any_date_col and dbcol.is_time:
any_date_col = col.name
metrics += dbcol.get_metrics().values()
metrics.append(M(
metric_name='count',
......
......@@ -23,7 +23,7 @@ import pandas as pd
from sqlalchemy import DateTime, String
from superset import db, security_manager
from superset.connectors.sqla.models import TableColumn
from superset.connectors.sqla.models import SqlMetric, TableColumn
from superset.utils.core import get_or_create_main_db
from .helpers import (
config,
......@@ -71,6 +71,12 @@ def load_birth_names():
expression="CASE WHEN state = 'CA' THEN num ELSE 0 END",
))
if not any(col.metric_name == 'sum__num' for col in obj.metrics):
obj.metrics.append(SqlMetric(
metric_name='sum__num',
expression='SUM(num)',
))
db.session.merge(obj)
db.session.commit()
obj.fetch_metadata()
......
......@@ -21,6 +21,7 @@ import pandas as pd
from sqlalchemy import BigInteger, Date, String
from superset import db
from superset.connectors.sqla.models import SqlMetric
from superset.utils import core as utils
from .helpers import (
DATA_FOLDER,
......@@ -67,6 +68,11 @@ def load_country_map_data():
obj = TBL(table_name='birth_france_by_region')
obj.main_dttm_col = 'dttm'
obj.database = utils.get_or_create_main_db()
if not any(col.metric_name == 'avg__2004' for col in obj.metrics):
obj.metrics.append(SqlMetric(
metric_name='avg__2004',
expression='AVG(2004)',
))
db.session.merge(obj)
db.session.commit()
obj.fetch_metadata()
......@@ -79,7 +85,16 @@ def load_country_map_data():
'where': '',
'viz_type': 'country_map',
'entity': 'DEPT_ID',
'metric': 'avg__2004',
'metric': {
'expressionType': 'SIMPLE',
'column': {
'type': 'INT',
'column_name': '2004',
},
'aggregate': 'AVG',
'label': 'Boys',
'optionName': 'metric_112342',
},
'row_limit': 500000,
}
......
......@@ -24,6 +24,7 @@ import pandas as pd
from sqlalchemy import Float, String
from superset import db
from superset.connectors.sqla.models import SqlMetric
from superset.utils import core as utils
from .helpers import DATA_FOLDER, merge_slice, misc_dash_slices, Slice, TBL
......@@ -51,6 +52,13 @@ def load_energy():
tbl = TBL(table_name=tbl_name)
tbl.description = 'Energy consumption'
tbl.database = utils.get_or_create_main_db()
if not any(col.metric_name == 'sum__value' for col in tbl.metrics):
tbl.metrics.append(SqlMetric(
metric_name='sum__value',
expression='SUM(value)',
))
db.session.merge(tbl)
db.session.commit()
tbl.fetch_metadata()
......
......@@ -88,9 +88,9 @@ def load_misc_dashboard():
"children": [],
"id": "CHART-r19KVMNCE7",
"meta": {
"chartId": 3978,
"chartId": 3971,
"height": 34,
"sliceName": "Calendar Heatmap multiformat 7",
"sliceName": "Calendar Heatmap multiformat 0",
"width": 4
},
"type": "CHART"
......
......@@ -45,8 +45,11 @@ def load_multi_line():
'viz_type': 'line_multi',
'line_charts': [ids[0]],
'line_charts_2': [ids[1]],
'since': '1960-01-01',
'since': '1970',
'until': '1995',
'prefix_metric_with_slice_name': True,
'show_legend': False,
'x_axis_format': '%Y',
}),
)
......
......@@ -89,8 +89,8 @@ def load_multiformat_time_series():
'metrics': ['count'],
'granularity_sqla': col.column_name,
'row_limit': config.get('ROW_LIMIT'),
'since': '1 year ago',
'until': 'now',
'since': '2015',
'until': '2016',
'where': '',
'viz_type': 'cal_heatmap',
'domain_granularity': 'month',
......@@ -98,11 +98,11 @@ def load_multiformat_time_series():
}
slc = Slice(
slice_name='Calendar Heatmap multiformat ' + str(i),
slice_name=f'Calendar Heatmap multiformat {i}',
viz_type='cal_heatmap',
datasource_type='table',
datasource_id=tbl.id,
params=get_slice_json(slice_data),
)
merge_slice(slc)
misc_dash_slices.add(slc.slice_name)
misc_dash_slices.add('Calendar Heatmap multiformat 0')
......@@ -25,6 +25,7 @@ import pandas as pd
from sqlalchemy import DateTime, String
from superset import db
from superset.connectors.sqla.models import SqlMetric
from superset.utils import core as utils
from .helpers import (
config,
......@@ -67,6 +68,18 @@ def load_world_bank_health_n_pop():
tbl.main_dttm_col = 'year'
tbl.database = utils.get_or_create_main_db()
tbl.filter_select_enabled = True
metrics = [
'sum__SP_POP_TOTL', 'sum__SH_DYN_AIDS', 'sum__SH_DYN_AIDS',
'sum__SP_RUR_TOTL_ZS', 'sum__SP_DYN_LE00_IN',
]
for m in metrics:
if not any(col.metric_name == m for col in tbl.metrics):
tbl.metrics.append(SqlMetric(
metric_name=m,
expression=f'{m[:3]}({m[5:]})',
))
db.session.merge(tbl)
db.session.commit()
tbl.fetch_metadata()
......
"""remove_aggs
Revision ID: 7467e77870e4
Revises: c829ff0b37d0
Create Date: 2018-07-22 08:50:01.078218
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '7467e77870e4'
down_revision = 'c829ff0b37d0'
def upgrade():
with op.batch_alter_table('table_columns') as batch_op:
batch_op.drop_column('avg')
batch_op.drop_column('max')
batch_op.drop_column('sum')
batch_op.drop_column('count_distinct')
batch_op.drop_column('min')
with op.batch_alter_table('columns') as batch_op:
batch_op.drop_column('avg')
batch_op.drop_column('max')
batch_op.drop_column('sum')
batch_op.drop_column('count_distinct')
batch_op.drop_column('min')
def downgrade():
op.add_column('table_columns', sa.Column('min', sa.Boolean(), nullable=True))
op.add_column(
'table_columns', sa.Column('count_distinct', sa.Boolean(), nullable=True))
op.add_column('table_columns', sa.Column('sum', sa.Boolean(), nullable=True))
op.add_column('table_columns', sa.Column('max', sa.Boolean(), nullable=True))
op.add_column('table_columns', sa.Column('avg', sa.Boolean(), nullable=True))
op.add_column('columns', sa.Column('min', sa.Boolean(), nullable=True))
op.add_column('columns', sa.Column('count_distinct', sa.Boolean(), nullable=True))
op.add_column('columns', sa.Column('sum', sa.Boolean(), nullable=True))
op.add_column('columns', sa.Column('max', sa.Boolean(), nullable=True))
op.add_column('columns', sa.Column('avg', sa.Boolean(), nullable=True))
"""empty message
Revision ID: c829ff0b37d0
Revises: ('4451805bbaa1', '1d9e835a84f9')
Create Date: 2018-07-22 08:49:48.936117
"""
# revision identifiers, used by Alembic.
revision = 'c829ff0b37d0'
down_revision = ('4451805bbaa1', '1d9e835a84f9')
from alembic import op
import sqlalchemy as sa
def upgrade():
pass
def downgrade():
pass
"""empty message
Revision ID: fbd55e0f83eb
Revises: ('7467e77870e4', 'de021a1ca60d')
Create Date: 2018-12-22 17:26:16.113317
"""
# revision identifiers, used by Alembic.
revision = 'fbd55e0f83eb'
down_revision = ('7467e77870e4', 'de021a1ca60d')
from alembic import op
import sqlalchemy as sa
def upgrade():
pass
def downgrade():
pass
......@@ -1410,7 +1410,7 @@ class Superset(BaseSupersetView):
form_data.pop('slice_id') # don't save old slice_id
slc = models.Slice(owners=[g.user] if g.user else [])
slc.params = json.dumps(form_data)
slc.params = json.dumps(form_data, indent=2, sort_keys=True)
slc.datasource_name = datasource_name
slc.viz_type = form_data['viz_type']
slc.datasource_type = datasource_type
......
......@@ -372,11 +372,6 @@ class DruidTests(SupersetTestCase):
.filter(DruidMetric.metric_name.like('%__metric1'))
)
self.assertEqual(
{metric.metric_name for metric in metrics},
{'max__metric1', 'min__metric1', 'sum__metric1'},
)
for metric in metrics:
agg, _ = metric.metric_name.split('__')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册