From 88964b7dfcc8355ef4e42b1599197bdff156e8f5 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Wed, 16 Jan 2019 22:32:13 -0800 Subject: [PATCH] Deprecate auto-generated metrics (#5461) * [WiP] deprecate auto-generated metrics & fix examples Picking up some leftover pieces not needed anymore since the MetricsControl * db merge * db merge * fix migration * Creating metrics required for tests --- superset/connectors/base/models.py | 9 +-- superset/connectors/base/views.py | 2 +- superset/connectors/druid/models.py | 80 +------------------ superset/connectors/druid/views.py | 12 +-- superset/connectors/sqla/models.py | 40 +--------- superset/data/birth_names.py | 8 +- superset/data/country_map.py | 17 +++- superset/data/energy.py | 8 ++ superset/data/misc_dashboard.py | 4 +- superset/data/multi_line.py | 5 +- superset/data/multiformat_time_series.py | 8 +- superset/data/world_bank.py | 13 +++ .../versions/7467e77870e4_remove_aggs.py | 44 ++++++++++ superset/migrations/versions/c829ff0b37d0_.py | 22 +++++ superset/migrations/versions/fbd55e0f83eb_.py | 22 +++++ superset/views/core.py | 2 +- tests/druid_tests.py | 5 -- 17 files changed, 150 insertions(+), 151 deletions(-) create mode 100644 superset/migrations/versions/7467e77870e4_remove_aggs.py create mode 100644 superset/migrations/versions/c829ff0b37d0_.py create mode 100644 superset/migrations/versions/fbd55e0f83eb_.py diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py index 8ed9e78e8..50ef6d8f5 100644 --- a/superset/connectors/base/models.py +++ b/superset/connectors/base/models.py @@ -350,13 +350,8 @@ class BaseColumn(AuditMixinNullable, ImportMixin): verbose_name = Column(String(1024)) is_active = Column(Boolean, default=True) type = Column(String(32)) - groupby = Column(Boolean, default=False) - count_distinct = Column(Boolean, default=False) - sum = Column(Boolean, default=False) - avg = Column(Boolean, default=False) - max = Column(Boolean, default=False) - min = Column(Boolean, default=False) - filterable = Column(Boolean, default=False) + groupby = Column(Boolean, default=True) + filterable = Column(Boolean, default=True) description = Column(Text) is_dttm = None diff --git a/superset/connectors/base/views.py b/superset/connectors/base/views.py index b2f203e66..d73dc191a 100644 --- a/superset/connectors/base/views.py +++ b/superset/connectors/base/views.py @@ -27,4 +27,4 @@ class DatasourceModelView(SupersetModelView): raise SupersetException(Markup( 'Cannot delete a datasource that has slices attached to it.' "Here's the list of associated charts: " + - ''.join([o.slice_link for o in obj.slices]))) + ''.join([o.slice_name for o in obj.slices]))) diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py index 105dfba1b..577679092 100644 --- a/superset/connectors/druid/models.py +++ b/superset/connectors/druid/models.py @@ -237,12 +237,6 @@ class DruidCluster(Model, AuditMixinNullable, ImportMixin): if col_obj.type == 'STRING': col_obj.groupby = True col_obj.filterable = True - if col_obj.type == 'hyperUnique' or col_obj.type == 'thetaSketch': - col_obj.count_distinct = True - if col_obj.is_num: - col_obj.sum = True - col_obj.min = True - col_obj.max = True datasource.refresh_metrics() session.commit() @@ -280,8 +274,7 @@ class DruidColumn(Model, BaseColumn): export_fields = ( 'datasource_id', 'column_name', 'is_active', 'type', 'groupby', - 'count_distinct', 'sum', 'avg', 'max', 'min', 'filterable', - 'description', 'dimension_spec_json', 'verbose_name', + 'filterable', 'description', 'dimension_spec_json', 'verbose_name', ) update_from_object_fields = export_fields export_parent = 'datasource' @@ -306,77 +299,6 @@ class DruidColumn(Model, BaseColumn): metric_type='count', json=json.dumps({'type': 'count', 'name': 'count'}), ) - # Somehow we need to reassign this for UDAFs - if self.type in ('DOUBLE', 'FLOAT'): - corrected_type = 'DOUBLE' - else: - corrected_type = self.type - - if self.sum and self.is_num: - mt = corrected_type.lower() + 'Sum' - name = 'sum__' + self.column_name - metrics[name] = DruidMetric( - metric_name=name, - metric_type='sum', - verbose_name='SUM({})'.format(self.column_name), - json=json.dumps({ - 'type': mt, 'name': name, 'fieldName': self.column_name}), - ) - - if self.avg and self.is_num: - mt = corrected_type.lower() + 'Avg' - name = 'avg__' + self.column_name - metrics[name] = DruidMetric( - metric_name=name, - metric_type='avg', - verbose_name='AVG({})'.format(self.column_name), - json=json.dumps({ - 'type': mt, 'name': name, 'fieldName': self.column_name}), - ) - - if self.min and self.is_num: - mt = corrected_type.lower() + 'Min' - name = 'min__' + self.column_name - metrics[name] = DruidMetric( - metric_name=name, - metric_type='min', - verbose_name='MIN({})'.format(self.column_name), - json=json.dumps({ - 'type': mt, 'name': name, 'fieldName': self.column_name}), - ) - if self.max and self.is_num: - mt = corrected_type.lower() + 'Max' - name = 'max__' + self.column_name - metrics[name] = DruidMetric( - metric_name=name, - metric_type='max', - verbose_name='MAX({})'.format(self.column_name), - json=json.dumps({ - 'type': mt, 'name': name, 'fieldName': self.column_name}), - ) - if self.count_distinct: - name = 'count_distinct__' + self.column_name - if self.type == 'hyperUnique' or self.type == 'thetaSketch': - metrics[name] = DruidMetric( - metric_name=name, - verbose_name='COUNT(DISTINCT {})'.format(self.column_name), - metric_type=self.type, - json=json.dumps({ - 'type': self.type, - 'name': name, - 'fieldName': self.column_name, - }), - ) - else: - metrics[name] = DruidMetric( - metric_name=name, - verbose_name='COUNT(DISTINCT {})'.format(self.column_name), - metric_type='count_distinct', - json=json.dumps({ - 'type': 'cardinality', - 'name': name, - 'fieldNames': [self.column_name]}), - ) return metrics def refresh_metrics(self): diff --git a/superset/connectors/druid/views.py b/superset/connectors/druid/views.py index 319104396..703ea399c 100644 --- a/superset/connectors/druid/views.py +++ b/superset/connectors/druid/views.py @@ -50,11 +50,9 @@ class DruidColumnInlineView(CompactCRUDMixin, SupersetModelView): # noqa edit_columns = [ 'column_name', 'verbose_name', 'description', 'dimension_spec_json', 'datasource', - 'groupby', 'filterable', 'count_distinct', 'sum', 'min', 'max'] + 'groupby', 'filterable'] add_columns = edit_columns - list_columns = [ - 'column_name', 'verbose_name', 'type', 'groupby', 'filterable', 'count_distinct', - 'sum', 'min', 'max'] + list_columns = ['column_name', 'verbose_name', 'type', 'groupby', 'filterable'] can_delete = False page_size = 500 label_columns = { @@ -63,12 +61,6 @@ class DruidColumnInlineView(CompactCRUDMixin, SupersetModelView): # noqa 'datasource': _('Datasource'), 'groupby': _('Groupable'), 'filterable': _('Filterable'), - 'count_distinct': _('Count Distinct'), - 'sum': _('Sum'), - 'min': _('Min'), - 'max': _('Max'), - 'verbose_name': _('Verbose Name'), - 'description': _('Description'), } description_columns = { 'filterable': _( diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 1b65be730..10591d3cb 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -102,7 +102,7 @@ class TableColumn(Model, BaseColumn): export_fields = ( 'table_id', 'column_name', 'verbose_name', 'is_dttm', 'is_active', - 'type', 'groupby', 'count_distinct', 'sum', 'avg', 'max', 'min', + 'type', 'groupby', 'filterable', 'expression', 'description', 'python_date_format', 'database_expression', ) @@ -185,43 +185,6 @@ class TableColumn(Model, BaseColumn): self.type or '', dttm) return s or "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S.%f')) - def get_metrics(self): - # TODO deprecate, this is not needed since MetricsControl - metrics = [] - M = SqlMetric # noqa - quoted = self.column_name - if self.sum: - metrics.append(M( - metric_name='sum__' + self.column_name, - metric_type='sum', - expression='SUM({})'.format(quoted), - )) - if self.avg: - metrics.append(M( - metric_name='avg__' + self.column_name, - metric_type='avg', - expression='AVG({})'.format(quoted), - )) - if self.max: - metrics.append(M( - metric_name='max__' + self.column_name, - metric_type='max', - expression='MAX({})'.format(quoted), - )) - if self.min: - metrics.append(M( - metric_name='min__' + self.column_name, - metric_type='min', - expression='MIN({})'.format(quoted), - )) - if self.count_distinct: - metrics.append(M( - metric_name='count_distinct__' + self.column_name, - metric_type='count_distinct', - expression='COUNT(DISTINCT {})'.format(quoted), - )) - return {m.metric_name: m for m in metrics} - class SqlMetric(Model, BaseMetric): @@ -878,7 +841,6 @@ class SqlaTable(Model, BaseDatasource): self.columns.append(dbcol) if not any_date_col and dbcol.is_time: any_date_col = col.name - metrics += dbcol.get_metrics().values() metrics.append(M( metric_name='count', diff --git a/superset/data/birth_names.py b/superset/data/birth_names.py index 05cbb34b0..379fdc806 100644 --- a/superset/data/birth_names.py +++ b/superset/data/birth_names.py @@ -23,7 +23,7 @@ import pandas as pd from sqlalchemy import DateTime, String from superset import db, security_manager -from superset.connectors.sqla.models import TableColumn +from superset.connectors.sqla.models import SqlMetric, TableColumn from superset.utils.core import get_or_create_main_db from .helpers import ( config, @@ -71,6 +71,12 @@ def load_birth_names(): expression="CASE WHEN state = 'CA' THEN num ELSE 0 END", )) + if not any(col.metric_name == 'sum__num' for col in obj.metrics): + obj.metrics.append(SqlMetric( + metric_name='sum__num', + expression='SUM(num)', + )) + db.session.merge(obj) db.session.commit() obj.fetch_metadata() diff --git a/superset/data/country_map.py b/superset/data/country_map.py index 207d50d60..c1c2b417b 100644 --- a/superset/data/country_map.py +++ b/superset/data/country_map.py @@ -21,6 +21,7 @@ import pandas as pd from sqlalchemy import BigInteger, Date, String from superset import db +from superset.connectors.sqla.models import SqlMetric from superset.utils import core as utils from .helpers import ( DATA_FOLDER, @@ -67,6 +68,11 @@ def load_country_map_data(): obj = TBL(table_name='birth_france_by_region') obj.main_dttm_col = 'dttm' obj.database = utils.get_or_create_main_db() + if not any(col.metric_name == 'avg__2004' for col in obj.metrics): + obj.metrics.append(SqlMetric( + metric_name='avg__2004', + expression='AVG(2004)', + )) db.session.merge(obj) db.session.commit() obj.fetch_metadata() @@ -79,7 +85,16 @@ def load_country_map_data(): 'where': '', 'viz_type': 'country_map', 'entity': 'DEPT_ID', - 'metric': 'avg__2004', + 'metric': { + 'expressionType': 'SIMPLE', + 'column': { + 'type': 'INT', + 'column_name': '2004', + }, + 'aggregate': 'AVG', + 'label': 'Boys', + 'optionName': 'metric_112342', + }, 'row_limit': 500000, } diff --git a/superset/data/energy.py b/superset/data/energy.py index d8ca3256c..c04eb46c4 100644 --- a/superset/data/energy.py +++ b/superset/data/energy.py @@ -24,6 +24,7 @@ import pandas as pd from sqlalchemy import Float, String from superset import db +from superset.connectors.sqla.models import SqlMetric from superset.utils import core as utils from .helpers import DATA_FOLDER, merge_slice, misc_dash_slices, Slice, TBL @@ -51,6 +52,13 @@ def load_energy(): tbl = TBL(table_name=tbl_name) tbl.description = 'Energy consumption' tbl.database = utils.get_or_create_main_db() + + if not any(col.metric_name == 'sum__value' for col in tbl.metrics): + tbl.metrics.append(SqlMetric( + metric_name='sum__value', + expression='SUM(value)', + )) + db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() diff --git a/superset/data/misc_dashboard.py b/superset/data/misc_dashboard.py index 31028c641..bfecc5982 100644 --- a/superset/data/misc_dashboard.py +++ b/superset/data/misc_dashboard.py @@ -88,9 +88,9 @@ def load_misc_dashboard(): "children": [], "id": "CHART-r19KVMNCE7", "meta": { - "chartId": 3978, + "chartId": 3971, "height": 34, - "sliceName": "Calendar Heatmap multiformat 7", + "sliceName": "Calendar Heatmap multiformat 0", "width": 4 }, "type": "CHART" diff --git a/superset/data/multi_line.py b/superset/data/multi_line.py index 3bd312689..d6c0ac896 100644 --- a/superset/data/multi_line.py +++ b/superset/data/multi_line.py @@ -45,8 +45,11 @@ def load_multi_line(): 'viz_type': 'line_multi', 'line_charts': [ids[0]], 'line_charts_2': [ids[1]], - 'since': '1960-01-01', + 'since': '1970', + 'until': '1995', 'prefix_metric_with_slice_name': True, + 'show_legend': False, + 'x_axis_format': '%Y', }), ) diff --git a/superset/data/multiformat_time_series.py b/superset/data/multiformat_time_series.py index edf68e8bc..5dec85ab0 100644 --- a/superset/data/multiformat_time_series.py +++ b/superset/data/multiformat_time_series.py @@ -89,8 +89,8 @@ def load_multiformat_time_series(): 'metrics': ['count'], 'granularity_sqla': col.column_name, 'row_limit': config.get('ROW_LIMIT'), - 'since': '1 year ago', - 'until': 'now', + 'since': '2015', + 'until': '2016', 'where': '', 'viz_type': 'cal_heatmap', 'domain_granularity': 'month', @@ -98,11 +98,11 @@ def load_multiformat_time_series(): } slc = Slice( - slice_name='Calendar Heatmap multiformat ' + str(i), + slice_name=f'Calendar Heatmap multiformat {i}', viz_type='cal_heatmap', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) - misc_dash_slices.add(slc.slice_name) + misc_dash_slices.add('Calendar Heatmap multiformat 0') diff --git a/superset/data/world_bank.py b/superset/data/world_bank.py index c9d8c1f39..9ad28f028 100644 --- a/superset/data/world_bank.py +++ b/superset/data/world_bank.py @@ -25,6 +25,7 @@ import pandas as pd from sqlalchemy import DateTime, String from superset import db +from superset.connectors.sqla.models import SqlMetric from superset.utils import core as utils from .helpers import ( config, @@ -67,6 +68,18 @@ def load_world_bank_health_n_pop(): tbl.main_dttm_col = 'year' tbl.database = utils.get_or_create_main_db() tbl.filter_select_enabled = True + + metrics = [ + 'sum__SP_POP_TOTL', 'sum__SH_DYN_AIDS', 'sum__SH_DYN_AIDS', + 'sum__SP_RUR_TOTL_ZS', 'sum__SP_DYN_LE00_IN', + ] + for m in metrics: + if not any(col.metric_name == m for col in tbl.metrics): + tbl.metrics.append(SqlMetric( + metric_name=m, + expression=f'{m[:3]}({m[5:]})', + )) + db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() diff --git a/superset/migrations/versions/7467e77870e4_remove_aggs.py b/superset/migrations/versions/7467e77870e4_remove_aggs.py new file mode 100644 index 000000000..5e28a11dc --- /dev/null +++ b/superset/migrations/versions/7467e77870e4_remove_aggs.py @@ -0,0 +1,44 @@ +"""remove_aggs + +Revision ID: 7467e77870e4 +Revises: c829ff0b37d0 +Create Date: 2018-07-22 08:50:01.078218 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = '7467e77870e4' +down_revision = 'c829ff0b37d0' + + +def upgrade(): + with op.batch_alter_table('table_columns') as batch_op: + batch_op.drop_column('avg') + batch_op.drop_column('max') + batch_op.drop_column('sum') + batch_op.drop_column('count_distinct') + batch_op.drop_column('min') + + with op.batch_alter_table('columns') as batch_op: + batch_op.drop_column('avg') + batch_op.drop_column('max') + batch_op.drop_column('sum') + batch_op.drop_column('count_distinct') + batch_op.drop_column('min') + + +def downgrade(): + op.add_column('table_columns', sa.Column('min', sa.Boolean(), nullable=True)) + op.add_column( + 'table_columns', sa.Column('count_distinct', sa.Boolean(), nullable=True)) + op.add_column('table_columns', sa.Column('sum', sa.Boolean(), nullable=True)) + op.add_column('table_columns', sa.Column('max', sa.Boolean(), nullable=True)) + op.add_column('table_columns', sa.Column('avg', sa.Boolean(), nullable=True)) + + op.add_column('columns', sa.Column('min', sa.Boolean(), nullable=True)) + op.add_column('columns', sa.Column('count_distinct', sa.Boolean(), nullable=True)) + op.add_column('columns', sa.Column('sum', sa.Boolean(), nullable=True)) + op.add_column('columns', sa.Column('max', sa.Boolean(), nullable=True)) + op.add_column('columns', sa.Column('avg', sa.Boolean(), nullable=True)) diff --git a/superset/migrations/versions/c829ff0b37d0_.py b/superset/migrations/versions/c829ff0b37d0_.py new file mode 100644 index 000000000..939dddb22 --- /dev/null +++ b/superset/migrations/versions/c829ff0b37d0_.py @@ -0,0 +1,22 @@ +"""empty message + +Revision ID: c829ff0b37d0 +Revises: ('4451805bbaa1', '1d9e835a84f9') +Create Date: 2018-07-22 08:49:48.936117 + +""" + +# revision identifiers, used by Alembic. +revision = 'c829ff0b37d0' +down_revision = ('4451805bbaa1', '1d9e835a84f9') + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + pass + + +def downgrade(): + pass diff --git a/superset/migrations/versions/fbd55e0f83eb_.py b/superset/migrations/versions/fbd55e0f83eb_.py new file mode 100644 index 000000000..2ca1c1887 --- /dev/null +++ b/superset/migrations/versions/fbd55e0f83eb_.py @@ -0,0 +1,22 @@ +"""empty message + +Revision ID: fbd55e0f83eb +Revises: ('7467e77870e4', 'de021a1ca60d') +Create Date: 2018-12-22 17:26:16.113317 + +""" + +# revision identifiers, used by Alembic. +revision = 'fbd55e0f83eb' +down_revision = ('7467e77870e4', 'de021a1ca60d') + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + pass + + +def downgrade(): + pass diff --git a/superset/views/core.py b/superset/views/core.py index 94a18da1c..02ff66a1f 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -1410,7 +1410,7 @@ class Superset(BaseSupersetView): form_data.pop('slice_id') # don't save old slice_id slc = models.Slice(owners=[g.user] if g.user else []) - slc.params = json.dumps(form_data) + slc.params = json.dumps(form_data, indent=2, sort_keys=True) slc.datasource_name = datasource_name slc.viz_type = form_data['viz_type'] slc.datasource_type = datasource_type diff --git a/tests/druid_tests.py b/tests/druid_tests.py index e4109b13d..ab26a98e5 100644 --- a/tests/druid_tests.py +++ b/tests/druid_tests.py @@ -372,11 +372,6 @@ class DruidTests(SupersetTestCase): .filter(DruidMetric.metric_name.like('%__metric1')) ) - self.assertEqual( - {metric.metric_name for metric in metrics}, - {'max__metric1', 'min__metric1', 'sum__metric1'}, - ) - for metric in metrics: agg, _ = metric.metric_name.split('__') -- GitLab