From 80baea183a6825b2d9349ede6cf1eeef47afca63 Mon Sep 17 00:00:00 2001 From: Leon Zhang Date: Wed, 23 Oct 2019 16:10:32 +0800 Subject: [PATCH] add new heuristic rule: RuleUNIONLimit SUB.007 --- Makefile | 2 +- advisor/heuristic.go | 18 ++++++++++++++++++ advisor/heuristic_test.go | 38 ++++++++++++++++++++++++++++++++++++++ advisor/rules.go | 8 ++++++++ doc/heuristic.md | 10 ++++++++++ 5 files changed, 75 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fd6858b..7a654a6 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ MYSQL_VERSION := $(or ${MYSQL_VERSION}, ${MYSQL_VERSION}, latest) all: | fmt build .PHONY: go_version_check -GO_VERSION_MIN=1.10 +GO_VERSION_MIN=1.12 # Parse out the x.y or x.y.z version and output a single value x*10000+y*100+z (e.g., 1.9 is 10900) # that allows the three components to be checked in a single comparison. VER_TO_INT:=awk '{split(substr($$0, match ($$0, /[0-9\.]+/)), a, "."); print a[1]*10000+a[2]*100+a[3]}' diff --git a/advisor/heuristic.go b/advisor/heuristic.go index 82f430f..6413ed0 100644 --- a/advisor/heuristic.go +++ b/advisor/heuristic.go @@ -1638,6 +1638,24 @@ func (q *Query4Audit) RuleSubQueryFunctions() Rule { return rule } +// RuleUNIONLimit SUB.007 +func (q *Query4Audit) RuleUNIONLimit() Rule { + var rule = q.RuleOK() + for _, tiStmtNode := range q.TiStmt { + switch stmt := tiStmtNode.(type) { + case *tidb.UnionStmt: + if stmt.Limit != nil { + for _, sel := range stmt.SelectList.Selects { + if sel.Limit == nil { + rule = HeuristicRules["SUB.007"] + } + } + } + } + } + return rule +} + // RuleMultiValueAttribute LIT.003 func (q *Query4Audit) RuleMultiValueAttribute() Rule { var rule = q.RuleOK() diff --git a/advisor/heuristic_test.go b/advisor/heuristic_test.go index 51de9e7..c2be70d 100644 --- a/advisor/heuristic_test.go +++ b/advisor/heuristic_test.go @@ -2308,6 +2308,44 @@ func TestRuleSubQueryFunctions(t *testing.T) { common.Log.Debug("Exiting function: %s", common.GetFunctionName()) } +// SUB.007 +func TestRuleUNIONLimit(t *testing.T) { + common.Log.Debug("Entering function: %s", common.GetFunctionName()) + sqls := [][]string{ + { + `(SELECT * FROM tb1 ORDER BY name) UNION ALL (SELECT * FROM tb2 ORDER BY name) LIMIT 20;`, + `(SELECT * FROM tb1 ORDER BY name LIMIT 20) UNION ALL (SELECT * FROM tb2 ORDER BY name) LIMIT 20;`, + `(SELECT * FROM tb1 ORDER BY name) UNION ALL (SELECT * FROM tb2 ORDER BY name LIMIT 20) LIMIT 20;`, + }, + { + `(SELECT * FROM tb1 ORDER BY name LIMIT 20) UNION ALL (SELECT * FROM tb2 ORDER BY name LIMIT 20) LIMIT 20;`, + }, + } + for _, sql := range sqls[0] { + q, err := NewQuery4Audit(sql) + if err == nil { + rule := q.RuleUNIONLimit() + if rule.Item != "SUB.007" { + t.Error("Rule not match:", rule.Item, "Expect : SUB.007") + } + } else { + t.Error("sqlparser.Parse Error:", err) + } + } + for _, sql := range sqls[1] { + q, err := NewQuery4Audit(sql) + if err == nil { + rule := q.RuleUNIONLimit() + if rule.Item != "OK" { + t.Error("Rule not match:", rule.Item, "Expect : OK") + } + } else { + t.Error("sqlparser.Parse Error:", err) + } + } + common.Log.Debug("Exiting function: %s", common.GetFunctionName()) +} + // SEC.002 func TestRuleReadablePasswords(t *testing.T) { common.Log.Debug("Entering function: %s", common.GetFunctionName()) diff --git a/advisor/rules.go b/advisor/rules.go index f899b32..867f266 100644 --- a/advisor/rules.go +++ b/advisor/rules.go @@ -1121,6 +1121,14 @@ func init() { Case: "SELECT * FROM staff WHERE name IN (SELECT max(NAME) FROM customer)", Func: (*Query4Audit).RuleSubQueryFunctions, }, + "SUB.007": { + Item: "SUB.007", + Severity: "L2", + Summary: "外层带有 LIMIT 输出限制的 UNION 联合查询,其内层查询建议也添加 LIMIT 输出限制", + Content: `有时 MySQL 无法将限制条件从外层“下推”到内层,这会使得原本可以限制能够限制部分返回结果的条件无法应用到内层查询的优化上。比如:(SELECT * FROM tb1 ORDER BY name) UNION ALL (SELECT * FROM tb2 ORDER BY name) LIMIT 20; MySQL 会将两个子查询的结果放在一个临时表中,然后取出 20 条结果,可以通过在两个子查询中添加 LIMIT 20 来减少临时表中的数据。(SELECT * FROM tb1 ORDER BY name LIMIT 20) UNION ALL (SELECT * FROM tb2 ORDER BY name LIMIT 20) LIMIT 20;`, + Case: "(SELECT * FROM tb1 ORDER BY name LIMIT 20) UNION ALL (SELECT * FROM tb2 ORDER BY name LIMIT 20) LIMIT 20;", + Func: (*Query4Audit).RuleUNIONLimit, + }, "TBL.001": { Item: "TBL.001", Severity: "L4", diff --git a/doc/heuristic.md b/doc/heuristic.md index 820793a..8d24101 100644 --- a/doc/heuristic.md +++ b/doc/heuristic.md @@ -1212,6 +1212,16 @@ SELECT * FROM staff WHERE name IN (SELECT NAME FROM customer ORDER BY name LIMIT ```sql SELECT * FROM staff WHERE name IN (SELECT max(NAME) FROM customer) ``` +## 外层带有 LIMIT 输出限制的 UNION 联合查询,其内层查询建议也添加 LIMIT 输出限制 + +* **Item**:SUB.007 +* **Severity**:L2 +* **Content**:有时 MySQL 无法将限制条件从外层“下推”到内层,这会使得原本可以限制能够限制部分返回结果的条件无法应用到内层查询的优化上。比如:(SELECT \* FROM tb1 ORDER BY name) UNION ALL (SELECT \* FROM tb2 ORDER BY name) LIMIT 20; MySQL 会将两个子查询的结果放在一个临时表中,然后取出 20 条结果,可以通过在两个子查询中添加 LIMIT 20 来减少临时表中的数据。(SELECT \* FROM tb1 ORDER BY name LIMIT 20) UNION ALL (SELECT \* FROM tb2 ORDER BY name LIMIT 20) LIMIT 20; +* **Case**: + +```sql +(SELECT * FROM tb1 ORDER BY name LIMIT 20) UNION ALL (SELECT * FROM tb2 ORDER BY name LIMIT 20) LIMIT 20; +``` ## 不建议使用分区表 * **Item**:TBL.001 -- GitLab