From 5eb250bb8d8b8e94d63d74b3f00457cd2b4b5e8a Mon Sep 17 00:00:00 2001 From: Leon Zhang Date: Tue, 4 Dec 2018 23:33:29 +0800 Subject: [PATCH] add heuristic rule TBL.008 support allow-collates checks --- advisor/heuristic.go | 117 ++++++++++++++---- advisor/heuristic_test.go | 39 ++++++ advisor/rules.go | 12 +- .../testdata/TestListHeuristicRules.golden | 10 ++ .../TestMergeConflictHeuristicRules.golden | 1 + common/config.go | 26 ++-- common/testdata/TestPrintConfiguration.golden | 5 +- 7 files changed, 175 insertions(+), 35 deletions(-) diff --git a/advisor/heuristic.go b/advisor/heuristic.go index 4bcab81..caf4e20 100644 --- a/advisor/heuristic.go +++ b/advisor/heuristic.go @@ -1767,9 +1767,12 @@ func (q *Query4Audit) RuleCountConst() Rule { func (q *Query4Audit) RuleSumNPE() Rule { var rule = q.RuleOK() fingerprint := query.Fingerprint(q.Query) + // TODO: https://github.com/XiaoMi/soar/issues/143 + // https://dev.mysql.com/doc/refman/8.0/en/group-by-functions.html sumReg := regexp.MustCompile(`(?i)sum\(\s*[0-9a-z?]*\s*\)`) isnullReg := regexp.MustCompile(`(?i)isnull\(sum\(\s*[0-9a-z?]*\s*\)\)`) if sumReg.MatchString(fingerprint) && !isnullReg.MatchString(fingerprint) { + // TODO: check wether column define with not null flag rule = HeuristicRules["FUN.006"] if position := isnullReg.FindIndex([]byte(q.Query)); len(position) > 0 { rule.Position = position[0] @@ -2857,7 +2860,6 @@ func (q *Query4Audit) RuleColumnWithCharset() Rule { } } } - } } } @@ -2868,18 +2870,18 @@ func (q *Query4Audit) RuleColumnWithCharset() Rule { // RuleTableCharsetCheck TBL.005 func (q *Query4Audit) RuleTableCharsetCheck() Rule { var rule = q.RuleOK() + var allow bool + var hasCharset bool switch q.Stmt.(type) { - case *sqlparser.DDL: + case *sqlparser.DDL, *sqlparser.DBDDL: for _, tiStmt := range q.TiStmt { switch node := tiStmt.(type) { case *tidb.CreateTableStmt: - var allow bool - var hasCharset bool for _, opt := range node.Options { if opt.Tp == tidb.TableOptionCharset { hasCharset = true - for _, ch := range common.Config.TableAllowCharsets { + for _, ch := range common.Config.AllowCharsets { if strings.TrimSpace(strings.ToLower(ch)) == strings.TrimSpace(strings.ToLower(opt.StrValue)) { allow = true break @@ -2888,22 +2890,27 @@ func (q *Query4Audit) RuleTableCharsetCheck() Rule { } } - // 未指定字符集使用MySQL默认配置字符集,我们认为MySQL的配置是被优化过的。 - if hasCharset && !allow { - rule = HeuristicRules["TBL.005"] - break + case *tidb.CreateDatabaseStmt: + for _, opt := range node.Options { + if opt.Tp == tidb.DatabaseOptionCharset { + hasCharset = true + for _, ch := range common.Config.AllowCharsets { + if strings.TrimSpace(strings.ToLower(ch)) == strings.TrimSpace(strings.ToLower(opt.Value)) { + allow = true + break + } + } + } } case *tidb.AlterTableStmt: for _, spec := range node.Specs { - var allow bool - var hasCharset bool switch spec.Tp { case tidb.AlterTableOption: for _, opt := range spec.Options { if opt.Tp == tidb.TableOptionCharset { hasCharset = true - for _, ch := range common.Config.TableAllowCharsets { + for _, ch := range common.Config.AllowCharsets { if strings.TrimSpace(strings.ToLower(ch)) == strings.TrimSpace(strings.ToLower(opt.StrValue)) { allow = true break @@ -2911,16 +2918,16 @@ func (q *Query4Audit) RuleTableCharsetCheck() Rule { } } } - // 未指定字符集使用MySQL默认配置字符集,我们认为MySQL的配置是被优化过的。 - if hasCharset && !allow { - rule = HeuristicRules["TBL.005"] - break - } } } } } } + + // 未指定字符集使用MySQL默认配置字符集,我们认为MySQL的配置是被优化过的。 + if hasCharset && !allow { + rule = HeuristicRules["TBL.005"] + } return rule } @@ -2975,6 +2982,70 @@ func (q *Query4Audit) RuleForbiddenTempTable() Rule { return rule } +// RuleTableCollateCheck TBL.008 +func (q *Query4Audit) RuleTableCollateCheck() Rule { + var rule = q.RuleOK() + var allow bool + var hasCollate bool + + switch q.Stmt.(type) { + case *sqlparser.DDL, *sqlparser.DBDDL: + for _, tiStmt := range q.TiStmt { + switch node := tiStmt.(type) { + case *tidb.CreateTableStmt: + for _, opt := range node.Options { + if opt.Tp == tidb.TableOptionCollate { + hasCollate = true + for _, ch := range common.Config.AllowCollates { + if strings.TrimSpace(strings.ToLower(ch)) == strings.TrimSpace(strings.ToLower(opt.StrValue)) { + allow = true + break + } + } + } + } + + case *tidb.CreateDatabaseStmt: + for _, opt := range node.Options { + if opt.Tp == tidb.DatabaseOptionCollate { + hasCollate = true + for _, ch := range common.Config.AllowCollates { + if strings.TrimSpace(strings.ToLower(ch)) == strings.TrimSpace(strings.ToLower(opt.Value)) { + allow = true + break + } + } + } + } + + case *tidb.AlterTableStmt: + for _, spec := range node.Specs { + switch spec.Tp { + case tidb.AlterTableOption: + for _, opt := range spec.Options { + if opt.Tp == tidb.TableOptionCollate { + hasCollate = true + for _, ch := range common.Config.AllowCollates { + if strings.TrimSpace(strings.ToLower(ch)) == strings.TrimSpace(strings.ToLower(opt.StrValue)) { + allow = true + break + } + } + } + } + } + } + } + } + } + + // 未指定字符集使用MySQL默认配置COLLATE,我们认为MySQL的配置是被优化过的。 + if hasCollate && !allow { + rule = HeuristicRules["TBL.008"] + } + return rule +} + // RuleBlobDefaultValue COL.015 func (q *Query4Audit) RuleBlobDefaultValue() Rule { var rule = q.RuleOK() @@ -3157,13 +3228,13 @@ func (q *Query4Audit) RuleAllowEngine() Rule { if opt.Tp == tidb.TableOptionEngine { hasDefaultEngine = true // 使用了非推荐的存储引擎 - for _, engine := range common.Config.TableAllowEngines { + for _, engine := range common.Config.AllowEngines { if strings.EqualFold(opt.StrValue, engine) { allowedEngine = true } } - // common.Config.TableAllowEngines 为空时不给予建议 - if !allowedEngine && len(common.Config.TableAllowEngines) > 0 { + // common.Config.AllowEngines 为空时不给予建议 + if !allowedEngine && len(common.Config.AllowEngines) > 0 { rule = HeuristicRules["TBL.002"] break } @@ -3181,13 +3252,13 @@ func (q *Query4Audit) RuleAllowEngine() Rule { for _, opt := range spec.Options { if opt.Tp == tidb.TableOptionEngine { // 使用了非推荐的存储引擎 - for _, engine := range common.Config.TableAllowEngines { + for _, engine := range common.Config.AllowEngines { if strings.EqualFold(opt.StrValue, engine) { allowedEngine = true } } - // common.Config.TableAllowEngines 为空时不给予建议 - if !allowedEngine && len(common.Config.TableAllowEngines) > 0 { + // common.Config.AllowEngines 为空时不给予建议 + if !allowedEngine && len(common.Config.AllowEngines) > 0 { rule = HeuristicRules["TBL.002"] break } diff --git a/advisor/heuristic_test.go b/advisor/heuristic_test.go index 0323a8c..0744e47 100644 --- a/advisor/heuristic_test.go +++ b/advisor/heuristic_test.go @@ -2781,6 +2781,7 @@ func TestRuleTableCharsetCheck(t *testing.T) { common.Log.Debug("Entering function: %s", common.GetFunctionName()) sqls := [][]string{ { + "CREATE DATABASE sbtest /*!40100 DEFAULT CHARACTER SET latin1 */;", "create table tbl (a int) DEFAULT CHARSET=latin1;", "ALTER TABLE tbl CONVERT TO CHARACTER SET latin1;", }, @@ -2815,6 +2816,44 @@ func TestRuleTableCharsetCheck(t *testing.T) { common.Log.Debug("Exiting function: %s", common.GetFunctionName()) } +// TBL.008 +func TestRuleTableCollateCheck(t *testing.T) { + common.Log.Debug("Entering function: %s", common.GetFunctionName()) + sqls := [][]string{ + { + "CREATE DATABASE sbtest /*!40100 DEFAULT COLLATE latin1_bin */;", + "create table tbl (a int) DEFAULT COLLATE=latin1_bin;", + }, + { + "create table tlb (a int);", + "ALTER TABLE `tbl` add column a int, add column b int ;", + }, + } + for _, sql := range sqls[0] { + q, err := NewQuery4Audit(sql) + if err == nil { + rule := q.RuleTableCollateCheck() + if rule.Item != "TBL.008" { + t.Error("Rule not match:", rule.Item, "Expect : TBL.008") + } + } else { + t.Error("sqlparser.Parse Error:", err) + } + } + for _, sql := range sqls[1] { + q, err := NewQuery4Audit(sql) + if err == nil { + rule := q.RuleTableCollateCheck() + if rule.Item != "OK" { + t.Error("Rule not match:", rule.Item, "Expect : OK") + } + } else { + t.Error("sqlparser.Parse Error:", err) + } + } + common.Log.Debug("Exiting function: %s", common.GetFunctionName()) +} + // COL.015 func TestRuleBlobDefaultValue(t *testing.T) { common.Log.Debug("Entering function: %s", common.GetFunctionName()) diff --git a/advisor/rules.go b/advisor/rules.go index 5d0013c..5f3b7ce 100644 --- a/advisor/rules.go +++ b/advisor/rules.go @@ -1069,7 +1069,7 @@ func init() { Item: "TBL.002", Severity: "L4", Summary: "请为表选择合适的存储引擎", - Content: `建表或修改表的存储引擎时建议使用推荐的存储引擎,如:` + strings.Join(common.Config.TableAllowEngines, ","), + Content: `建表或修改表的存储引擎时建议使用推荐的存储引擎,如:` + strings.Join(common.Config.AllowEngines, ","), Case: "create table test(`id` int(11) NOT NULL AUTO_INCREMENT)", Func: (*Query4Audit).RuleAllowEngine, }, @@ -1093,7 +1093,7 @@ func init() { Item: "TBL.005", Severity: "L4", Summary: "请使用推荐的字符集", - Content: `表字符集只允许设置为` + strings.Join(common.Config.TableAllowCharsets, ","), + Content: `表字符集只允许设置为` + strings.Join(common.Config.AllowCharsets, ","), Case: "CREATE TABLE tbl (a int) DEFAULT CHARSET = latin1;", Func: (*Query4Audit).RuleTableCharsetCheck, }, @@ -1113,6 +1113,14 @@ func init() { Case: "CREATE TEMPORARY TABLE `work` (`time` time DEFAULT NULL) ENGINE=InnoDB;", Func: (*Query4Audit).RuleForbiddenTempTable, }, + "TBL.008": { + Item: "TBL.008", + Severity: "L4", + Summary: "请使用推荐的COLLATE", + Content: `COLLATE 只允许设置为` + strings.Join(common.Config.AllowCollates, ","), + Case: "CREATE TABLE tbl (a int) DEFAULT COLLATE = latin1_bin;", + Func: (*Query4Audit).RuleTableCharsetCheck, + }, } } diff --git a/advisor/testdata/TestListHeuristicRules.golden b/advisor/testdata/TestListHeuristicRules.golden index ba8a11c..7bb0c8f 100644 --- a/advisor/testdata/TestListHeuristicRules.golden +++ b/advisor/testdata/TestListHeuristicRules.golden @@ -1202,3 +1202,13 @@ create view v_today (today) AS SELECT CURRENT_DATE; ```sql CREATE TEMPORARY TABLE `work` (`time` time DEFAULT NULL) ENGINE=InnoDB; ``` +## 请使用推荐的COLLATE + +* **Item**:TBL.008 +* **Severity**:L4 +* **Content**:COLLATE 只允许设置为 +* **Case**: + +```sql +CREATE TABLE tbl (a int) DEFAULT COLLATE = latin1_bin; +``` diff --git a/advisor/testdata/TestMergeConflictHeuristicRules.golden b/advisor/testdata/TestMergeConflictHeuristicRules.golden index e60fc05..782290c 100644 --- a/advisor/testdata/TestMergeConflictHeuristicRules.golden +++ b/advisor/testdata/TestMergeConflictHeuristicRules.golden @@ -114,3 +114,4 @@ advisor.Rule{Item:"TBL.004", Severity:"L2", Summary:"表的初始AUTO_INCREMENT advisor.Rule{Item:"TBL.005", Severity:"L4", Summary:"请使用推荐的字符集", Content:"表字符集只允许设置为utf8,utf8mb4", Case:"CREATE TABLE tbl (a int) DEFAULT CHARSET = latin1;", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"TBL.006", Severity:"L1", Summary:"不建议使用视图", Content:"不建议使用视图", Case:"create view v_today (today) AS SELECT CURRENT_DATE;", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"TBL.007", Severity:"L1", Summary:"不建议使用临时表", Content:"不建议使用临时表", Case:"CREATE TEMPORARY TABLE `work` (`time` time DEFAULT NULL) ENGINE=InnoDB;", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} +advisor.Rule{Item:"TBL.008", Severity:"L4", Summary:"请使用推荐的COLLATE", Content:"COLLATE 只允许设置为", Case:"CREATE TABLE tbl (a int) DEFAULT COLLATE = latin1_bin;", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} diff --git a/common/config.go b/common/config.go index 2c36bd2..ff27cae 100644 --- a/common/config.go +++ b/common/config.go @@ -98,8 +98,9 @@ type Configuration struct { MaxInCount int `yaml:"max-in-count"` // IN()最大数量 MaxIdxBytesPerColumn int `yaml:"max-index-bytes-percolumn"` // 索引中单列最大字节数,默认767 MaxIdxBytes int `yaml:"max-index-bytes"` // 索引总长度限制,默认3072 - TableAllowCharsets []string `yaml:"table-allow-charsets"` // Table 允许使用的 DEFAULT CHARSET - TableAllowEngines []string `yaml:"table-allow-engines"` // Table 允许使用的 Engine + AllowCharsets []string `yaml:"allow-charsets"` // 允许使用的 DEFAULT CHARSET + AllowCollates []string `yaml:"allow-collates"` // 允许使用的 COLLATE + AllowEngines []string `yaml:"allow-engines"` // 允许使用的存储引擎 MaxIdxCount int `yaml:"max-index-count"` // 单张表允许最多索引数 MaxColCount int `yaml:"max-column-count"` // 单张表允许最大列数 MaxValueCount int `yaml:"max-value-count"` // INSERT/REPLACE 单次允许批量写入的行数 @@ -178,8 +179,9 @@ var Config = &Configuration{ ReportJavascript: "", ReportTitle: "SQL优化分析报告", BlackList: "", - TableAllowCharsets: []string{"utf8", "utf8mb4"}, - TableAllowEngines: []string{"innodb"}, + AllowCharsets: []string{"utf8", "utf8mb4"}, + AllowCollates: []string{}, + AllowEngines: []string{"innodb"}, MaxIdxCount: 10, MaxColCount: 40, MaxValueCount: 100, @@ -531,8 +533,9 @@ func readCmdFlags() error { maxInCount := flag.Int("max-in-count", Config.MaxInCount, "MaxInCount, IN()最大数量") maxIdxBytesPerColumn := flag.Int("max-index-bytes-percolumn", Config.MaxIdxBytesPerColumn, "MaxIdxBytesPerColumn, 索引中单列最大字节数") maxIdxBytes := flag.Int("max-index-bytes", Config.MaxIdxBytes, "MaxIdxBytes, 索引总长度限制") - tableAllowCharsets := flag.String("table-allow-charsets", strings.ToLower(strings.Join(Config.TableAllowCharsets, ",")), "TableAllowCharsets") - tableAllowEngines := flag.String("table-allow-engines", strings.ToLower(strings.Join(Config.TableAllowEngines, ",")), "TableAllowEngines") + allowCharsets := flag.String("allow-charsets", strings.ToLower(strings.Join(Config.AllowCharsets, ",")), "AllowCharsets") + allowCollates := flag.String("allow-collates", strings.ToLower(strings.Join(Config.AllowCollates, ",")), "AllowCollates") + allowEngines := flag.String("allow-engines", strings.ToLower(strings.Join(Config.AllowEngines, ",")), "AllowEngines") maxIdxCount := flag.Int("max-index-count", Config.MaxIdxCount, "MaxIdxCount, 单表最大索引个数") maxColCount := flag.Int("max-column-count", Config.MaxColCount, "MaxColCount, 单表允许的最大列数") maxValueCount := flag.Int("max-value-count", Config.MaxValueCount, "MaxValueCount, INSERT/REPLACE 单次批量写入允许的行数") @@ -624,8 +627,15 @@ func readCmdFlags() error { Config.MaxIdxBytesPerColumn = *maxIdxBytesPerColumn Config.MaxIdxBytes = *maxIdxBytes - Config.TableAllowCharsets = strings.Split(strings.ToLower(*tableAllowCharsets), ",") - Config.TableAllowEngines = strings.Split(strings.ToLower(*tableAllowEngines), ",") + if *allowCharsets != "" { + Config.AllowCharsets = strings.Split(strings.ToLower(*allowCharsets), ",") + } + if *allowCollates != "" { + Config.AllowCollates = strings.Split(strings.ToLower(*allowCollates), ",") + } + if *allowEngines != "" { + Config.AllowEngines = strings.Split(strings.ToLower(*allowEngines), ",") + } Config.MaxIdxCount = *maxIdxCount Config.MaxColCount = *maxColCount Config.MaxValueCount = *maxValueCount diff --git a/common/testdata/TestPrintConfiguration.golden b/common/testdata/TestPrintConfiguration.golden index 314b355..af68710 100644 --- a/common/testdata/TestPrintConfiguration.golden +++ b/common/testdata/TestPrintConfiguration.golden @@ -55,10 +55,11 @@ allow-drop-index: false max-in-count: 10 max-index-bytes-percolumn: 767 max-index-bytes: 3072 -table-allow-charsets: +allow-charsets: - utf8 - utf8mb4 -table-allow-engines: +allow-collates: [] +allow-engines: - innodb max-index-count: 10 max-column-count: 40 -- GitLab