From 6f6f804bcf5179ad16f949dbb7cc295f7d324ffe Mon Sep 17 00:00:00 2001
From: Leon Zhang <martianzhang@hotmail.com>
Date: Mon, 26 Nov 2018 20:00:26 +0800
Subject: [PATCH] add ARG.012 RuleInsertValues

  insert values count large than -max-value-count will give ARG.012
---
 advisor/heuristic.go                          | 15 +++++++
 advisor/heuristic_test.go                     | 42 ++++++++++++++++++-
 advisor/rules.go                              |  9 +++-
 .../testdata/TestListHeuristicRules.golden    | 10 +++++
 .../TestMergeConflictHeuristicRules.golden    |  1 +
 common/config.go                              |  4 ++
 doc/heuristic.md                              | 10 +++++
 7 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/advisor/heuristic.go b/advisor/heuristic.go
index a21088e..f3dc88f 100644
--- a/advisor/heuristic.go
+++ b/advisor/heuristic.go
@@ -2093,6 +2093,21 @@ func (q *Query4Audit) RuleNot() Rule {
 	return rule
 }
 
+// RuleInsertValues ARG.012
+func (q *Query4Audit) RuleInsertValues() Rule {
+	var rule = q.RuleOK()
+	switch s := q.Stmt.(type) {
+	case *sqlparser.Insert:
+		switch val := s.Rows.(type) {
+		case sqlparser.Values:
+			if len(val) > common.Config.MaxValueCount {
+				rule = HeuristicRules["ARG.012"]
+			}
+		}
+	}
+	return rule
+}
+
 // RuleUNIONUsage SUB.002
 func (q *Query4Audit) RuleUNIONUsage() Rule {
 	var rule = q.RuleOK()
diff --git a/advisor/heuristic_test.go b/advisor/heuristic_test.go
index 82c702b..b2d52af 100644
--- a/advisor/heuristic_test.go
+++ b/advisor/heuristic_test.go
@@ -1822,7 +1822,7 @@ func TestRuleHint(t *testing.T) {
 }
 
 // ARG.011
-func TestNot(t *testing.T) {
+func TestRuleNot(t *testing.T) {
 	common.Log.Debug("Entering function: %s", common.GetFunctionName())
 	sqls := [][]string{
 		{
@@ -1859,6 +1859,46 @@ func TestNot(t *testing.T) {
 	common.Log.Debug("Exiting function: %s", common.GetFunctionName())
 }
 
+// ARG.012
+func TestRuleInsertValues(t *testing.T) {
+	common.Log.Debug("Entering function: %s", common.GetFunctionName())
+	sqls := [][]string{
+		{
+			`INSERT INTO tb VALUES (1), (2)`,
+			`REPLACE INTO tb VALUES (1), (2)`,
+		},
+		{
+			`INSERT INTO tb VALUES (1)`,
+		},
+	}
+	oldMaxValueCount := common.Config.MaxValueCount
+	common.Config.MaxValueCount = 1
+	for _, sql := range sqls[0] {
+		q, err := NewQuery4Audit(sql)
+		if err == nil {
+			rule := q.RuleInsertValues()
+			if rule.Item != "ARG.012" {
+				t.Error("Rule not match:", rule.Item, "Expect : ARG.012")
+			}
+		} else {
+			t.Error("sqlparser.Parse Error:", err)
+		}
+	}
+	for _, sql := range sqls[1] {
+		q, err := NewQuery4Audit(sql)
+		if err == nil {
+			rule := q.RuleInsertValues()
+			if rule.Item != "OK" {
+				t.Error("Rule not match:", rule.Item, "Expect : OK")
+			}
+		} else {
+			t.Error("sqlparser.Parse Error:", err)
+		}
+	}
+	common.Config.MaxValueCount = oldMaxValueCount
+	common.Log.Debug("Exiting function: %s", common.GetFunctionName())
+}
+
 // SUB.002
 func TestRuleUNIONUsage(t *testing.T) {
 	common.Log.Debug("Entering function: %s", common.GetFunctionName())
diff --git a/advisor/rules.go b/advisor/rules.go
index 3215065..37de8fe 100644
--- a/advisor/rules.go
+++ b/advisor/rules.go
@@ -264,6 +264,14 @@ func init() {
 			Case:     "select id from t where num not in(1,2,3);",
 			Func:     (*Query4Audit).RuleNot,
 		},
+		"ARG.012": {
+			Item:     "ARG.012",
+			Severity: "L2",
+			Summary:  "一次性 INSERT/REPLACE 的数据过多",
+			Content:  "单条 INSERT/REPLACE 语句批量插入大量数据性能较差，甚至可能导致从库同步延迟。为了提升性能，减少批量写入数据对从库同步延时的影响，建议采用分批次插入的方法。",
+			Case:     "INSERT INTO tb (a) VALUES (1), (2)",
+			Func:     (*Query4Audit).RuleInsertValues,
+		},
 		"CLA.001": {
 			Item:     "CLA.001",
 			Severity: "L4",
@@ -280,7 +288,6 @@ func init() {
 			Case:     "select name from tbl where id < 1000 order by rand(number)",
 			Func:     (*Query4Audit).RuleOrderByRand,
 		},
-
 		"CLA.003": {
 			Item:     "CLA.003",
 			Severity: "L2",
diff --git a/advisor/testdata/TestListHeuristicRules.golden b/advisor/testdata/TestListHeuristicRules.golden
index 7afdff8..f6b37d8 100644
--- a/advisor/testdata/TestListHeuristicRules.golden
+++ b/advisor/testdata/TestListHeuristicRules.golden
@@ -182,6 +182,16 @@ SELECT * FROM t1 USE INDEX (i1) ORDER BY a;
 ```sql
 select id from t where num not in(1,2,3);
 ```
+## 一次性 INSERT/REPLACE 的数据过多
+
+* **Item**:ARG.012
+* **Severity**:L2
+* **Content**:单条 INSERT/REPLACE 语句批量插入大量数据性能较差，甚至可能导致从库同步延迟。为了提升性能，减少批量写入数据对从库同步延时的影响，建议采用分批次插入的方法。
+* **Case**:
+
+```sql
+INSERT INTO tb (a) VALUES (1), (2)
+```
 ## 最外层 SELECT 未指定 WHERE 条件
 
 * **Item**:CLA.001
diff --git a/advisor/testdata/TestMergeConflictHeuristicRules.golden b/advisor/testdata/TestMergeConflictHeuristicRules.golden
index f403e35..219114a 100644
--- a/advisor/testdata/TestMergeConflictHeuristicRules.golden
+++ b/advisor/testdata/TestMergeConflictHeuristicRules.golden
@@ -15,6 +15,7 @@ advisor.Rule{Item:"ARG.008", Severity:"L1", Summary:"OR 查询索引列时请尽
 advisor.Rule{Item:"ARG.009", Severity:"L1", Summary:"引号中的字符串开头或结尾包含空格", Content:"如果 VARCHAR 列的前后存在空格将可能引起逻辑问题，如在 MySQL 5.5中 'a' 和 'a ' 可能会在查询中被认为是相同的值。", Case:"SELECT 'abc '", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}}
 advisor.Rule{Item:"ARG.010", Severity:"L1", Summary:"不要使用 hint，如：sql_no_cache, force index, ignore key, straight join等", Content:"hint 是用来强制 SQL 按照某个执行计划来执行，但随着数据量变化我们无法保证自己当初的预判是正确的。", Case:"SELECT * FROM t1 USE INDEX (i1) ORDER BY a;", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}}
 advisor.Rule{Item:"ARG.011", Severity:"L3", Summary:"不要使用负向查询，如：NOT IN/NOT LIKE", Content:"请尽量不要使用负向查询，这将导致全表扫描，对查询性能影响较大。", Case:"select id from t where num not in(1,2,3);", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}}
+advisor.Rule{Item:"ARG.012", Severity:"L2", Summary:"一次性 INSERT/REPLACE 的数据过多", Content:"单条 INSERT/REPLACE 语句批量插入大量数据性能较差，甚至可能导致从库同步延迟。为了提升性能，减少批量写入数据对从库同步延时的影响，建议采用分批次插入的方法。", Case:"INSERT INTO tb (a) VALUES (1), (2)", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}}
 advisor.Rule{Item:"CLA.001", Severity:"L4", Summary:"最外层 SELECT 未指定 WHERE 条件", Content:"SELECT 语句没有 WHERE 子句，可能检查比预期更多的行(全表扫描)。对于 SELECT COUNT(*) 类型的请求如果不要求精度，建议使用 SHOW TABLE STATUS 或 EXPLAIN 替代。", Case:"select id from tbl", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}}
 advisor.Rule{Item:"CLA.002", Severity:"L3", Summary:"不建议使用 ORDER BY RAND()", Content:"ORDER BY RAND() 是从结果集中检索随机行的一种非常低效的方法，因为它会对整个结果进行排序并丢弃其大部分数据。", Case:"select name from tbl where id < 1000 order by rand(number)", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}}
 advisor.Rule{Item:"CLA.003", Severity:"L2", Summary:"不建议使用带 OFFSET 的LIMIT 查询", Content:"使用 LIMIT 和 OFFSET 对结果集分页的复杂度是 O(n^2)，并且会随着数据增大而导致性能问题。采用“书签”扫描的方法实现分页效率更高。", Case:"select c1,c2 from tbl where name=xx order by number limit 1 offset 20", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}}
diff --git a/common/config.go b/common/config.go
index c0b4c79..4f84116 100644
--- a/common/config.go
+++ b/common/config.go
@@ -101,6 +101,7 @@ type Configuration struct {
 	TableAllowEngines    []string `yaml:"table-allow-engines"`       // Table 允许使用的 Engine
 	MaxIdxCount          int      `yaml:"max-index-count"`           // 单张表允许最多索引数
 	MaxColCount          int      `yaml:"max-column-count"`          // 单张表允许最大列数
+	MaxValueCount        int      `yaml:"max-value-count"`           // INSERT/REPLACE 单次允许批量写入的行数
 	IdxPrefix            string   `yaml:"index-prefix"`              // 普通索引建议使用的前缀
 	UkPrefix             string   `yaml:"unique-key-prefix"`         // 唯一键建议使用的前缀
 	MaxSubqueryDepth     int      `yaml:"max-subquery-depth"`        // 子查询最大尝试
@@ -180,6 +181,7 @@ var Config = &Configuration{
 	TableAllowEngines:    []string{"innodb"},
 	MaxIdxCount:          10,
 	MaxColCount:          40,
+	MaxValueCount:        100,
 	MaxInCount:           10,
 	IdxPrefix:            "idx_",
 	UkPrefix:             "uk_",
@@ -532,6 +534,7 @@ func readCmdFlags() error {
 	tableAllowEngines := flag.String("table-allow-engines", strings.ToLower(strings.Join(Config.TableAllowEngines, ",")), "TableAllowEngines")
 	maxIdxCount := flag.Int("max-index-count", Config.MaxIdxCount, "MaxIdxCount, 单表最大索引个数")
 	maxColCount := flag.Int("max-column-count", Config.MaxColCount, "MaxColCount, 单表允许的最大列数")
+	maxValueCount := flag.Int("max-value-count", Config.MaxValueCount, "MaxValueCount, INSERT/REPLACE 单次批量写入允许的行数")
 	idxPrefix := flag.String("index-prefix", Config.IdxPrefix, "IdxPrefix")
 	ukPrefix := flag.String("unique-key-prefix", Config.UkPrefix, "UkPrefix")
 	maxSubqueryDepth := flag.Int("max-subquery-depth", Config.MaxSubqueryDepth, "MaxSubqueryDepth")
@@ -628,6 +631,7 @@ func readCmdFlags() error {
 	Config.TableAllowEngines = strings.Split(strings.ToLower(*tableAllowEngines), ",")
 	Config.MaxIdxCount = *maxIdxCount
 	Config.MaxColCount = *maxColCount
+	Config.MaxValueCount = *maxValueCount
 	Config.IdxPrefix = *idxPrefix
 	Config.UkPrefix = *ukPrefix
 	Config.MaxSubqueryDepth = *maxSubqueryDepth
diff --git a/doc/heuristic.md b/doc/heuristic.md
index 7afdff8..f6b37d8 100644
--- a/doc/heuristic.md
+++ b/doc/heuristic.md
@@ -182,6 +182,16 @@ SELECT * FROM t1 USE INDEX (i1) ORDER BY a;
 ```sql
 select id from t where num not in(1,2,3);
 ```
+## 一次性 INSERT/REPLACE 的数据过多
+
+* **Item**:ARG.012
+* **Severity**:L2
+* **Content**:单条 INSERT/REPLACE 语句批量插入大量数据性能较差，甚至可能导致从库同步延迟。为了提升性能，减少批量写入数据对从库同步延时的影响，建议采用分批次插入的方法。
+* **Case**:
+
+```sql
+INSERT INTO tb (a) VALUES (1), (2)
+```
 ## 最外层 SELECT 未指定 WHERE 条件
 
 * **Item**:CLA.001
-- 
GitLab