From 6f6f804bcf5179ad16f949dbb7cc295f7d324ffe Mon Sep 17 00:00:00 2001 From: Leon Zhang Date: Mon, 26 Nov 2018 20:00:26 +0800 Subject: [PATCH] add ARG.012 RuleInsertValues insert values count large than -max-value-count will give ARG.012 --- advisor/heuristic.go | 15 +++++++ advisor/heuristic_test.go | 42 ++++++++++++++++++- advisor/rules.go | 9 +++- .../testdata/TestListHeuristicRules.golden | 10 +++++ .../TestMergeConflictHeuristicRules.golden | 1 + common/config.go | 4 ++ doc/heuristic.md | 10 +++++ 7 files changed, 89 insertions(+), 2 deletions(-) diff --git a/advisor/heuristic.go b/advisor/heuristic.go index a21088e..f3dc88f 100644 --- a/advisor/heuristic.go +++ b/advisor/heuristic.go @@ -2093,6 +2093,21 @@ func (q *Query4Audit) RuleNot() Rule { return rule } +// RuleInsertValues ARG.012 +func (q *Query4Audit) RuleInsertValues() Rule { + var rule = q.RuleOK() + switch s := q.Stmt.(type) { + case *sqlparser.Insert: + switch val := s.Rows.(type) { + case sqlparser.Values: + if len(val) > common.Config.MaxValueCount { + rule = HeuristicRules["ARG.012"] + } + } + } + return rule +} + // RuleUNIONUsage SUB.002 func (q *Query4Audit) RuleUNIONUsage() Rule { var rule = q.RuleOK() diff --git a/advisor/heuristic_test.go b/advisor/heuristic_test.go index 82c702b..b2d52af 100644 --- a/advisor/heuristic_test.go +++ b/advisor/heuristic_test.go @@ -1822,7 +1822,7 @@ func TestRuleHint(t *testing.T) { } // ARG.011 -func TestNot(t *testing.T) { +func TestRuleNot(t *testing.T) { common.Log.Debug("Entering function: %s", common.GetFunctionName()) sqls := [][]string{ { @@ -1859,6 +1859,46 @@ func TestNot(t *testing.T) { common.Log.Debug("Exiting function: %s", common.GetFunctionName()) } +// ARG.012 +func TestRuleInsertValues(t *testing.T) { + common.Log.Debug("Entering function: %s", common.GetFunctionName()) + sqls := [][]string{ + { + `INSERT INTO tb VALUES (1), (2)`, + `REPLACE INTO tb VALUES (1), (2)`, + }, + { + `INSERT INTO tb VALUES (1)`, + }, + } + oldMaxValueCount := common.Config.MaxValueCount + common.Config.MaxValueCount = 1 + for _, sql := range sqls[0] { + q, err := NewQuery4Audit(sql) + if err == nil { + rule := q.RuleInsertValues() + if rule.Item != "ARG.012" { + t.Error("Rule not match:", rule.Item, "Expect : ARG.012") + } + } else { + t.Error("sqlparser.Parse Error:", err) + } + } + for _, sql := range sqls[1] { + q, err := NewQuery4Audit(sql) + if err == nil { + rule := q.RuleInsertValues() + if rule.Item != "OK" { + t.Error("Rule not match:", rule.Item, "Expect : OK") + } + } else { + t.Error("sqlparser.Parse Error:", err) + } + } + common.Config.MaxValueCount = oldMaxValueCount + common.Log.Debug("Exiting function: %s", common.GetFunctionName()) +} + // SUB.002 func TestRuleUNIONUsage(t *testing.T) { common.Log.Debug("Entering function: %s", common.GetFunctionName()) diff --git a/advisor/rules.go b/advisor/rules.go index 3215065..37de8fe 100644 --- a/advisor/rules.go +++ b/advisor/rules.go @@ -264,6 +264,14 @@ func init() { Case: "select id from t where num not in(1,2,3);", Func: (*Query4Audit).RuleNot, }, + "ARG.012": { + Item: "ARG.012", + Severity: "L2", + Summary: "一次性 INSERT/REPLACE 的数据过多", + Content: "单条 INSERT/REPLACE 语句批量插入大量数据性能较差,甚至可能导致从库同步延迟。为了提升性能,减少批量写入数据对从库同步延时的影响,建议采用分批次插入的方法。", + Case: "INSERT INTO tb (a) VALUES (1), (2)", + Func: (*Query4Audit).RuleInsertValues, + }, "CLA.001": { Item: "CLA.001", Severity: "L4", @@ -280,7 +288,6 @@ func init() { Case: "select name from tbl where id < 1000 order by rand(number)", Func: (*Query4Audit).RuleOrderByRand, }, - "CLA.003": { Item: "CLA.003", Severity: "L2", diff --git a/advisor/testdata/TestListHeuristicRules.golden b/advisor/testdata/TestListHeuristicRules.golden index 7afdff8..f6b37d8 100644 --- a/advisor/testdata/TestListHeuristicRules.golden +++ b/advisor/testdata/TestListHeuristicRules.golden @@ -182,6 +182,16 @@ SELECT * FROM t1 USE INDEX (i1) ORDER BY a; ```sql select id from t where num not in(1,2,3); ``` +## 一次性 INSERT/REPLACE 的数据过多 + +* **Item**:ARG.012 +* **Severity**:L2 +* **Content**:单条 INSERT/REPLACE 语句批量插入大量数据性能较差,甚至可能导致从库同步延迟。为了提升性能,减少批量写入数据对从库同步延时的影响,建议采用分批次插入的方法。 +* **Case**: + +```sql +INSERT INTO tb (a) VALUES (1), (2) +``` ## 最外层 SELECT 未指定 WHERE 条件 * **Item**:CLA.001 diff --git a/advisor/testdata/TestMergeConflictHeuristicRules.golden b/advisor/testdata/TestMergeConflictHeuristicRules.golden index f403e35..219114a 100644 --- a/advisor/testdata/TestMergeConflictHeuristicRules.golden +++ b/advisor/testdata/TestMergeConflictHeuristicRules.golden @@ -15,6 +15,7 @@ advisor.Rule{Item:"ARG.008", Severity:"L1", Summary:"OR 查询索引列时请尽 advisor.Rule{Item:"ARG.009", Severity:"L1", Summary:"引号中的字符串开头或结尾包含空格", Content:"如果 VARCHAR 列的前后存在空格将可能引起逻辑问题,如在 MySQL 5.5中 'a' 和 'a ' 可能会在查询中被认为是相同的值。", Case:"SELECT 'abc '", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"ARG.010", Severity:"L1", Summary:"不要使用 hint,如:sql_no_cache, force index, ignore key, straight join等", Content:"hint 是用来强制 SQL 按照某个执行计划来执行,但随着数据量变化我们无法保证自己当初的预判是正确的。", Case:"SELECT * FROM t1 USE INDEX (i1) ORDER BY a;", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"ARG.011", Severity:"L3", Summary:"不要使用负向查询,如:NOT IN/NOT LIKE", Content:"请尽量不要使用负向查询,这将导致全表扫描,对查询性能影响较大。", Case:"select id from t where num not in(1,2,3);", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} +advisor.Rule{Item:"ARG.012", Severity:"L2", Summary:"一次性 INSERT/REPLACE 的数据过多", Content:"单条 INSERT/REPLACE 语句批量插入大量数据性能较差,甚至可能导致从库同步延迟。为了提升性能,减少批量写入数据对从库同步延时的影响,建议采用分批次插入的方法。", Case:"INSERT INTO tb (a) VALUES (1), (2)", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"CLA.001", Severity:"L4", Summary:"最外层 SELECT 未指定 WHERE 条件", Content:"SELECT 语句没有 WHERE 子句,可能检查比预期更多的行(全表扫描)。对于 SELECT COUNT(*) 类型的请求如果不要求精度,建议使用 SHOW TABLE STATUS 或 EXPLAIN 替代。", Case:"select id from tbl", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"CLA.002", Severity:"L3", Summary:"不建议使用 ORDER BY RAND()", Content:"ORDER BY RAND() 是从结果集中检索随机行的一种非常低效的方法,因为它会对整个结果进行排序并丢弃其大部分数据。", Case:"select name from tbl where id < 1000 order by rand(number)", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"CLA.003", Severity:"L2", Summary:"不建议使用带 OFFSET 的LIMIT 查询", Content:"使用 LIMIT 和 OFFSET 对结果集分页的复杂度是 O(n^2),并且会随着数据增大而导致性能问题。采用“书签”扫描的方法实现分页效率更高。", Case:"select c1,c2 from tbl where name=xx order by number limit 1 offset 20", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} diff --git a/common/config.go b/common/config.go index c0b4c79..4f84116 100644 --- a/common/config.go +++ b/common/config.go @@ -101,6 +101,7 @@ type Configuration struct { TableAllowEngines []string `yaml:"table-allow-engines"` // Table 允许使用的 Engine MaxIdxCount int `yaml:"max-index-count"` // 单张表允许最多索引数 MaxColCount int `yaml:"max-column-count"` // 单张表允许最大列数 + MaxValueCount int `yaml:"max-value-count"` // INSERT/REPLACE 单次允许批量写入的行数 IdxPrefix string `yaml:"index-prefix"` // 普通索引建议使用的前缀 UkPrefix string `yaml:"unique-key-prefix"` // 唯一键建议使用的前缀 MaxSubqueryDepth int `yaml:"max-subquery-depth"` // 子查询最大尝试 @@ -180,6 +181,7 @@ var Config = &Configuration{ TableAllowEngines: []string{"innodb"}, MaxIdxCount: 10, MaxColCount: 40, + MaxValueCount: 100, MaxInCount: 10, IdxPrefix: "idx_", UkPrefix: "uk_", @@ -532,6 +534,7 @@ func readCmdFlags() error { tableAllowEngines := flag.String("table-allow-engines", strings.ToLower(strings.Join(Config.TableAllowEngines, ",")), "TableAllowEngines") maxIdxCount := flag.Int("max-index-count", Config.MaxIdxCount, "MaxIdxCount, 单表最大索引个数") maxColCount := flag.Int("max-column-count", Config.MaxColCount, "MaxColCount, 单表允许的最大列数") + maxValueCount := flag.Int("max-value-count", Config.MaxValueCount, "MaxValueCount, INSERT/REPLACE 单次批量写入允许的行数") idxPrefix := flag.String("index-prefix", Config.IdxPrefix, "IdxPrefix") ukPrefix := flag.String("unique-key-prefix", Config.UkPrefix, "UkPrefix") maxSubqueryDepth := flag.Int("max-subquery-depth", Config.MaxSubqueryDepth, "MaxSubqueryDepth") @@ -628,6 +631,7 @@ func readCmdFlags() error { Config.TableAllowEngines = strings.Split(strings.ToLower(*tableAllowEngines), ",") Config.MaxIdxCount = *maxIdxCount Config.MaxColCount = *maxColCount + Config.MaxValueCount = *maxValueCount Config.IdxPrefix = *idxPrefix Config.UkPrefix = *ukPrefix Config.MaxSubqueryDepth = *maxSubqueryDepth diff --git a/doc/heuristic.md b/doc/heuristic.md index 7afdff8..f6b37d8 100644 --- a/doc/heuristic.md +++ b/doc/heuristic.md @@ -182,6 +182,16 @@ SELECT * FROM t1 USE INDEX (i1) ORDER BY a; ```sql select id from t where num not in(1,2,3); ``` +## 一次性 INSERT/REPLACE 的数据过多 + +* **Item**:ARG.012 +* **Severity**:L2 +* **Content**:单条 INSERT/REPLACE 语句批量插入大量数据性能较差,甚至可能导致从库同步延迟。为了提升性能,减少批量写入数据对从库同步延时的影响,建议采用分批次插入的方法。 +* **Case**: + +```sql +INSERT INTO tb (a) VALUES (1), (2) +``` ## 最外层 SELECT 未指定 WHERE 条件 * **Item**:CLA.001 -- GitLab