From d61e49ce7354cb7057e3e04714496c08e38b8ca5 Mon Sep 17 00:00:00 2001 From: Leon Zhang Date: Sun, 7 Jun 2020 09:11:19 +0800 Subject: [PATCH] add new heuristic rule KWR.005 WHEN InvisibleUnicode IN SQL give suggestion update test cases golden file --- advisor/heuristic.go | 24 + advisor/heuristic_test.go | 37 ++ advisor/rules.go | 8 + .../testdata/TestListHeuristicRules.golden | 10 + .../TestMergeConflictHeuristicRules.golden | 1 + ast/testdata/TestPrintPrettyStmtNode.golden | 3 +- ast/testdata/TestSchemaMetaInfo.golden | 14 +- ast/testdata/TestStmtNode2JSON.golden | 5 +- ast/testdata/TestTokenizer.golden | 353 ++++++----- ast/token.go | 4 +- ast/token_test.go | 1 + common/testdata/TestStringStorageReq.golden | 600 +++++++++--------- doc/heuristic.md | 10 + 13 files changed, 595 insertions(+), 475 deletions(-) diff --git a/advisor/heuristic.go b/advisor/heuristic.go index a7c2e7e..343392c 100644 --- a/advisor/heuristic.go +++ b/advisor/heuristic.go @@ -1545,6 +1545,30 @@ func (q *Query4Audit) RuleMultiBytesWord() Rule { return rule } +// RuleInvisibleUnicode KWR.005 +func (q *Query4Audit) RuleInvisibleUnicode() Rule { + var rule = q.RuleOK() + for _, tk := range ast.Tokenizer(q.Query) { + fmt.Println(tk.Val, []byte(tk.Val)) + // 多字节的肉眼不可见字符经过 Tokenizer 后被切成了单字节字符。 + // strings.Contains 中的内容也肉眼不可见,需要使用 cat -A 查看代码 + switch tk.Val { + case string([]byte{194}), string([]byte{160}): // non-broken-space C2 A0 + if strings.Contains(q.Query, ` `) { + rule = HeuristicRules["KWR.005"] + return rule + } + case string([]byte{226}), string([]byte{128}), string([]byte{139}): // zero-width space E2 80 8B + if strings.Contains(q.Query, `​`) { + rule = HeuristicRules["KWR.005"] + return rule + } + default: + } + } + return rule +} + // RuleInsertSelect LCK.001 func (q *Query4Audit) RuleInsertSelect() Rule { var rule = q.RuleOK() diff --git a/advisor/heuristic_test.go b/advisor/heuristic_test.go index 8fb2584..a5a0a8e 100644 --- a/advisor/heuristic_test.go +++ b/advisor/heuristic_test.go @@ -1284,6 +1284,43 @@ func TestRuleMultiBytesWord(t *testing.T) { common.Log.Debug("Exiting function: %s", common.GetFunctionName()) } +// KWR.005 +func TestRuleInvisibleUnicode(t *testing.T) { + common.Log.Debug("Entering function: %s", common.GetFunctionName()) + // 不可见的 unicode 可以通过 https://unicode-table.com 复制得到 + sqls := [][]string{ + { + `select 1`, // SQL 中包含 non-broken-space + `select​ 1;`, // SQL 中包含 zero-width space + }, + { + "select 1", // 正常 SQL + `select "1 "`, // 值中包含 non-broken-space + `select "1​"`, // 值中包含 zero-width space + }, + } + for _, sql := range sqls[0] { + q, _ := NewQuery4Audit(sql) + // 含有特殊 unicode 字符的 SQL 语法肯定是不通过的 + rule := q.RuleInvisibleUnicode() + if rule.Item != "KWR.005" { + t.Error("Rule not match:", rule.Item, "Expect : KWR.005") + } + } + for _, sql := range sqls[1] { + q, err := NewQuery4Audit(sql) + if err == nil { + rule := q.RuleInvisibleUnicode() + if rule.Item != "OK" { + t.Error("Rule not match:", rule.Item, "Expect : OK") + } + } else { + t.Error("sqlparser.Parse Error:", err) + } + } + common.Log.Debug("Exiting function: %s", common.GetFunctionName()) +} + // LCK.001 func TestRuleInsertSelect(t *testing.T) { common.Log.Debug("Entering function: %s", common.GetFunctionName()) diff --git a/advisor/rules.go b/advisor/rules.go index 4d46ed1..f7f9334 100644 --- a/advisor/rules.go +++ b/advisor/rules.go @@ -869,6 +869,14 @@ func init() { Case: "select col as 列 from tb", Func: (*Query4Audit).RuleMultiBytesWord, }, + "KWR.005": { + Item: "KWR.005", + Severity: "L1", + Summary: "SQL 中包含 unicode 特殊字符", + Content: "部分 IDE 会自动在 SQL 插入肉眼不可见的 unicode 字符。如:non-break space, zero-width space 等。Linux 下可使用 `cat -A file.sql` 命令查看不可见字符。", + Case: "update tb set status = 1 where id = 1;", + Func: (*Query4Audit).RuleInvisibleUnicode, + }, "LCK.001": { Item: "LCK.001", Severity: "L3", diff --git a/advisor/testdata/TestListHeuristicRules.golden b/advisor/testdata/TestListHeuristicRules.golden index 8d24101..9731c97 100644 --- a/advisor/testdata/TestListHeuristicRules.golden +++ b/advisor/testdata/TestListHeuristicRules.golden @@ -902,6 +902,16 @@ CREATE TABLE tbl ( `books` int ) ```sql select col as 列 from tb ``` +## SQL 中包含 unicode 特殊字符 + +* **Item**:KWR.005 +* **Severity**:L1 +* **Content**:部分 IDE 会自动在 SQL 插入肉眼不可见的 unicode 字符。如:non-break space, zero-width space 等。Linux 下可使用 \`cat -A file.sql\` 命令查看不可见字符。 +* **Case**: + +```sql +update tb set status = 1 where id = 1; +``` ## INSERT INTO xx SELECT 加锁粒度较大请谨慎 * **Item**:LCK.001 diff --git a/advisor/testdata/TestMergeConflictHeuristicRules.golden b/advisor/testdata/TestMergeConflictHeuristicRules.golden index 7cf697f..bddf6c7 100644 --- a/advisor/testdata/TestMergeConflictHeuristicRules.golden +++ b/advisor/testdata/TestMergeConflictHeuristicRules.golden @@ -84,6 +84,7 @@ advisor.Rule{Item:"KWR.001", Severity:"L2", Summary:"SQL_CALC_FOUND_ROWS 效率 advisor.Rule{Item:"KWR.002", Severity:"L2", Summary:"不建议使用 MySQL 关键字做列名或表名", Content:"当使用关键字做为列名或表名时程序需要对列名和表名进行转义,如果疏忽被将导致请求无法执行。", Case:"CREATE TABLE tbl ( `select` int )", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"KWR.003", Severity:"L1", Summary:"不建议使用复数做列名或表名", Content:"表名应该仅仅表示表里面的实体内容,不应该表示实体数量,对应于 DO 类名也是单数形式,符合表达习惯。", Case:"CREATE TABLE tbl ( `books` int )", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"KWR.004", Severity:"L1", Summary:"不建议使用使用多字节编码字符(中文)命名", Content:"为库、表、列、别名命名时建议使用英文,数字,下划线等字符,不建议使用中文或其他多字节编码字符。", Case:"select col as 列 from tb", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} +advisor.Rule{Item:"KWR.005", Severity:"L1", Summary:"SQL 中包含 unicode 特殊字符", Content:"部分 IDE 会自动在 SQL 插入肉眼不可见的 unicode 字符。如:non-break space, zero-width space 等。Linux 下可使用 `cat -A file.sql` 命令查看不可见字符。", Case:"update\u00a0tb set\u00a0status\u00a0=\u00a01 where\u00a0id\u00a0=\u00a01;", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"LCK.001", Severity:"L3", Summary:"INSERT INTO xx SELECT 加锁粒度较大请谨慎", Content:"INSERT INTO xx SELECT 加锁粒度较大请谨慎", Case:"INSERT INTO tbl SELECT * FROM tbl2;", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"LCK.002", Severity:"L3", Summary:"请慎用 INSERT ON DUPLICATE KEY UPDATE", Content:"当主键为自增键时使用 INSERT ON DUPLICATE KEY UPDATE 可能会导致主键出现大量不连续快速增长,导致主键快速溢出无法继续写入。极端情况下还有可能导致主从数据不一致。", Case:"INSERT INTO t1(a,b,c) VALUES (1,2,3) ON DUPLICATE KEY UPDATE c=c+1;", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} advisor.Rule{Item:"LIT.001", Severity:"L2", Summary:"用字符类型存储IP地址", Content:"字符串字面上看起来像IP地址,但不是 INET_ATON() 的参数,表示数据被存储为字符而不是整数。将IP地址存储为整数更为有效。", Case:"insert into tbl (IP,name) values('10.20.306.122','test')", Position:0, Func:func(*advisor.Query4Audit) advisor.Rule {...}} diff --git a/ast/testdata/TestPrintPrettyStmtNode.golden b/ast/testdata/TestPrintPrettyStmtNode.golden index 7c0cf76..42366e6 100644 --- a/ast/testdata/TestPrintPrettyStmtNode.golden +++ b/ast/testdata/TestPrintPrettyStmtNode.golden @@ -43,7 +43,7 @@ }, Datum: types.Datum{ k: 0x1, - collation: 0x0, + collation: "", decimal: 0x0, length: 0x0, i: 1, @@ -67,5 +67,6 @@ IsAfterUnionDistinct: false, IsInBraces: false, QueryBlockOffset: 0, + SelectIntoOpt: (*ast.SelectIntoOption)(nil), }, } diff --git a/ast/testdata/TestSchemaMetaInfo.golden b/ast/testdata/TestSchemaMetaInfo.golden index 14ec590..66b7a0d 100644 --- a/ast/testdata/TestSchemaMetaInfo.golden +++ b/ast/testdata/TestSchemaMetaInfo.golden @@ -9,7 +9,7 @@ select * from ta join tb using (id) select * from ta, tb limit 1 [`sakila`.`ta` `sakila`.`tb`] drop table tb -[`sakila`.tb`] +[`sakila`.`tb`] drop table db.tb [`db`.`tb`] drop database db @@ -17,9 +17,9 @@ drop database db create database db [`db`.`dual`] create index idx_col on tbl (col) -[`sakila`.tbl`] +[`sakila`.`tbl`] DROP INDEX idx_col on tbl -[`sakila`.tbl`] +[`sakila`.`tbl`] SELECT * FROM film WHERE length = 86; [`sakila`.`film`] SELECT * FROM film WHERE length IS NULL; @@ -177,11 +177,11 @@ SELECT last_update FROM film order by date(last_update); SELECT description FROM film WHERE description IN('NEWS','asd') GROUP BY description; [`sakila`.`film`] alter table address add index idx_city_id(city_id); -[`sakila`.address`] +[`sakila`.`address`] alter table inventory add index `idx_store_film` (`store_id`,`film_id`); -[`sakila`.inventory`] +[`sakila`.`inventory`] alter table inventory add index `idx_store_film` (`store_id`,`film_id`),add index `idx_store_film` (`store_id`,`film_id`),add index `idx_store_film` (`store_id`,`film_id`); -[`sakila`.inventory`] +[`sakila`.`inventory`] SELECT DATE_FORMAT(t.last_update, '%Y-%m-%d'), COUNT(DISTINCT (t.city)) FROM city t WHERE t.last_update > '2018-10-22 00:00:00' AND t.city LIKE '%Chrome%' AND t.city = 'eip' GROUP BY DATE_FORMAT(t.last_update, '%Y-%m-%d') ORDER BY DATE_FORMAT(t.last_update, '%Y-%m-%d'); [`sakila`.`city`] create table hello.t (id int unsigned); @@ -189,7 +189,7 @@ create table hello.t (id int unsigned); select * from tb where data >= ''; [`sakila`.`tb`] alter table tb alter column id drop default; -[`sakila`.tb`] +[`sakila`.`tb`] select maxId, minId from (select max(film_id) maxId, min(film_id) minId from film where last_update > '2016-03-27 02:01:01') as d; [`sakila`.`film`] select maxId, minId from (select max(film_id) maxId, min(film_id) minId from film) as d; diff --git a/ast/testdata/TestStmtNode2JSON.golden b/ast/testdata/TestStmtNode2JSON.golden index 2ccbb6f..f766114 100644 --- a/ast/testdata/TestStmtNode2JSON.golden +++ b/ast/testdata/TestStmtNode2JSON.golden @@ -32,7 +32,7 @@ }, "flag": 0, "k": 1, - "collation": 0, + "collation": "", "decimal": 0, "length": 0, "i": 1, @@ -57,7 +57,8 @@ "TableHints": null, "IsAfterUnionDistinct": false, "IsInBraces": false, - "QueryBlockOffset": 0 + "QueryBlockOffset": 0, + "SelectIntoOpt": null } ] diff --git a/ast/testdata/TestTokenizer.golden b/ast/testdata/TestTokenizer.golden index 2daeb17..b67c786 100644 --- a/ast/testdata/TestTokenizer.golden +++ b/ast/testdata/TestTokenizer.golden @@ -1,294 +1,321 @@ []ast.Token{ {Type:57348, Val:"select", i:0}, - {Type:57396, Val:"c1", i:0}, + {Type:57397, Val:"c1", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"c2", i:0}, + {Type:57397, Val:"c2", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"c3", i:0}, + {Type:57397, Val:"c3", i:0}, {Type:57353, Val:"from", i:0}, - {Type:57396, Val:"t1", i:0}, + {Type:57397, Val:"t1", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"t2", i:0}, - {Type:57384, Val:"join", i:0}, - {Type:57396, Val:"t3", i:0}, - {Type:57394, Val:"on", i:0}, - {Type:57396, Val:"t1", i:0}, + {Type:57397, Val:"t2", i:0}, + {Type:57385, Val:"join", i:0}, + {Type:57397, Val:"t3", i:0}, + {Type:57395, Val:"on", i:0}, + {Type:57397, Val:"t1", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"c1", i:0}, + {Type:57397, Val:"c1", i:0}, {Type:61, Val:"=", i:0}, - {Type:57396, Val:"t2", i:0}, + {Type:57397, Val:"t2", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"c1", i:0}, - {Type:57412, Val:"and", i:0}, - {Type:57396, Val:"t1", i:0}, + {Type:57397, Val:"c1", i:0}, + {Type:57415, Val:"and", i:0}, + {Type:57397, Val:"t1", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"c3", i:0}, + {Type:57397, Val:"c3", i:0}, {Type:61, Val:"=", i:0}, - {Type:57396, Val:"t3", i:0}, + {Type:57397, Val:"t3", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"c1", i:0}, + {Type:57397, Val:"c1", i:0}, {Type:57354, Val:"where", i:0}, - {Type:57396, Val:"id", i:0}, + {Type:57397, Val:"id", i:0}, {Type:62, Val:">", i:0}, - {Type:57399, Val:"1000", i:0}, + {Type:57402, Val:"1000", i:0}, } []ast.Token{ {Type:57348, Val:"select", i:0}, - {Type:57396, Val:"sourcetable", i:0}, + {Type:57397, Val:"sourcetable", i:0}, {Type:44, Val:",", i:0}, - {Type:57453, Val:"if", i:0}, + {Type:57456, Val:"if", i:0}, {Type:40, Val:"(", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"lastcontent", i:0}, + {Type:57397, Val:"lastcontent", i:0}, {Type:61, Val:"=", i:0}, - {Type:57402, Val:":v1", i:0}, + {Type:57405, Val:":v1", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"lastupdate", i:0}, + {Type:57397, Val:"lastupdate", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"lastcontent", i:0}, + {Type:57397, Val:"lastcontent", i:0}, {Type:41, Val:")", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"lastactivity", i:0}, + {Type:57397, Val:"lastactivity", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"totalcount", i:0}, + {Type:57397, Val:"totalcount", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"activity", i:0}, + {Type:57397, Val:"activity", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"type", i:0}, + {Type:57397, Val:"type", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"class", i:0}, + {Type:57397, Val:"class", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"type", i:0}, + {Type:57397, Val:"type", i:0}, {Type:44, Val:",", i:0}, {Type:40, Val:"(", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"nodeoptions", i:0}, + {Type:57397, Val:"nodeoptions", i:0}, {Type:38, Val:"&", i:0}, - {Type:57402, Val:":v2", i:0}, + {Type:57405, Val:":v2", i:0}, {Type:41, Val:")", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"nounsubscribe", i:0}, + {Type:57397, Val:"nounsubscribe", i:0}, {Type:57353, Val:"from", i:0}, - {Type:57396, Val:"node", i:0}, + {Type:57397, Val:"node", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"f", i:0}, - {Type:57388, Val:"inner", i:0}, - {Type:57384, Val:"join", i:0}, - {Type:57396, Val:"contenttype", i:0}, + {Type:57397, Val:"f", i:0}, + {Type:57389, Val:"inner", i:0}, + {Type:57385, Val:"join", i:0}, + {Type:57397, Val:"contenttype", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"type", i:0}, - {Type:57394, Val:"on", i:0}, - {Type:57396, Val:"type", i:0}, + {Type:57397, Val:"type", i:0}, + {Type:57395, Val:"on", i:0}, + {Type:57397, Val:"type", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"contenttypeid", i:0}, + {Type:57397, Val:"contenttypeid", i:0}, {Type:61, Val:"=", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"contenttypeid", i:0}, - {Type:57388, Val:"inner", i:0}, - {Type:57384, Val:"join", i:0}, - {Type:57396, Val:"subscribed", i:0}, + {Type:57397, Val:"contenttypeid", i:0}, + {Type:57389, Val:"inner", i:0}, + {Type:57385, Val:"join", i:0}, + {Type:57397, Val:"subscribed", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"sd", i:0}, - {Type:57394, Val:"on", i:0}, - {Type:57396, Val:"sd", i:0}, + {Type:57397, Val:"sd", i:0}, + {Type:57395, Val:"on", i:0}, + {Type:57397, Val:"sd", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"did", i:0}, + {Type:57397, Val:"did", i:0}, {Type:61, Val:"=", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"nodeid", i:0}, - {Type:57412, Val:"and", i:0}, - {Type:57396, Val:"sd", i:0}, + {Type:57397, Val:"nodeid", i:0}, + {Type:57415, Val:"and", i:0}, + {Type:57397, Val:"sd", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"userid", i:0}, + {Type:57397, Val:"userid", i:0}, {Type:61, Val:"=", i:0}, - {Type:57402, Val:":v3", i:0}, + {Type:57405, Val:":v3", i:0}, {Type:57347, Val:"union", i:0}, {Type:57362, Val:"all", i:0}, {Type:57348, Val:"select", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"name", i:0}, + {Type:57397, Val:"name", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"title", i:0}, + {Type:57397, Val:"title", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"userid", i:0}, + {Type:57397, Val:"userid", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"keyval", i:0}, + {Type:57397, Val:"keyval", i:0}, {Type:44, Val:",", i:0}, - {Type:57402, Val:":v4", i:0}, + {Type:57405, Val:":v4", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"sourcetable", i:0}, + {Type:57397, Val:"sourcetable", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"ifnull", i:0}, + {Type:57397, Val:"ifnull", i:0}, {Type:40, Val:"(", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"lastpost", i:0}, + {Type:57397, Val:"lastpost", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"joindate", i:0}, + {Type:57397, Val:"joindate", i:0}, {Type:41, Val:")", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"lastactivity", i:0}, + {Type:57397, Val:"lastactivity", i:0}, {Type:44, Val:",", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"posts", i:0}, + {Type:57397, Val:"posts", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"activity", i:0}, + {Type:57397, Val:"activity", i:0}, {Type:44, Val:",", i:0}, - {Type:57402, Val:":v5", i:0}, + {Type:57405, Val:":v5", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"type", i:0}, + {Type:57397, Val:"type", i:0}, {Type:44, Val:",", i:0}, - {Type:57402, Val:":v6", i:0}, + {Type:57405, Val:":v6", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"nounsubscribe", i:0}, + {Type:57397, Val:"nounsubscribe", i:0}, {Type:57353, Val:"from", i:0}, - {Type:57396, Val:"user", i:0}, + {Type:57397, Val:"user", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"f", i:0}, - {Type:57388, Val:"inner", i:0}, - {Type:57384, Val:"join", i:0}, - {Type:57396, Val:"userlist", i:0}, + {Type:57397, Val:"f", i:0}, + {Type:57389, Val:"inner", i:0}, + {Type:57385, Val:"join", i:0}, + {Type:57397, Val:"userlist", i:0}, {Type:57364, Val:"as", i:0}, - {Type:57396, Val:"ul", i:0}, - {Type:57394, Val:"on", i:0}, - {Type:57396, Val:"ul", i:0}, + {Type:57397, Val:"ul", i:0}, + {Type:57395, Val:"on", i:0}, + {Type:57397, Val:"ul", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"relationid", i:0}, + {Type:57397, Val:"relationid", i:0}, {Type:61, Val:"=", i:0}, - {Type:57396, Val:"f", i:0}, + {Type:57397, Val:"f", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"userid", i:0}, - {Type:57412, Val:"and", i:0}, - {Type:57396, Val:"ul", i:0}, + {Type:57397, Val:"userid", i:0}, + {Type:57415, Val:"and", i:0}, + {Type:57397, Val:"ul", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"userid", i:0}, + {Type:57397, Val:"userid", i:0}, {Type:61, Val:"=", i:0}, - {Type:57402, Val:":v7", i:0}, + {Type:57405, Val:":v7", i:0}, {Type:57354, Val:"where", i:0}, - {Type:57396, Val:"ul", i:0}, + {Type:57397, Val:"ul", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"type", i:0}, + {Type:57397, Val:"type", i:0}, {Type:61, Val:"=", i:0}, - {Type:57402, Val:":v8", i:0}, - {Type:57412, Val:"and", i:0}, - {Type:57396, Val:"ul", i:0}, + {Type:57405, Val:":v8", i:0}, + {Type:57415, Val:"and", i:0}, + {Type:57397, Val:"ul", i:0}, {Type:46, Val:".", i:0}, - {Type:57396, Val:"aq", i:0}, + {Type:57397, Val:"aq", i:0}, {Type:61, Val:"=", i:0}, - {Type:57402, Val:":v9", i:0}, + {Type:57405, Val:":v9", i:0}, {Type:57357, Val:"order", i:0}, {Type:57358, Val:"by", i:0}, - {Type:57396, Val:"title", i:0}, + {Type:57397, Val:"title", i:0}, {Type:57359, Val:"limit", i:0}, - {Type:57402, Val:":v10", i:0}, + {Type:57405, Val:":v10", i:0}, } []ast.Token{ {Type:57348, Val:"select", i:0}, - {Type:57396, Val:"c1", i:0}, + {Type:57397, Val:"c1", i:0}, {Type:57353, Val:"from", i:0}, - {Type:57396, Val:"t1", i:0}, + {Type:57397, Val:"t1", i:0}, {Type:57354, Val:"where", i:0}, - {Type:57396, Val:"id", i:0}, - {Type:57421, Val:">=", i:0}, - {Type:57399, Val:"1000", i:0}, + {Type:57397, Val:"id", i:0}, + {Type:57424, Val:">=", i:0}, + {Type:57402, Val:"1000", i:0}, } []ast.Token{ {Type:57348, Val:"select", i:0}, - {Type:57590, Val:"sql_calc_found_rows", i:0}, - {Type:57396, Val:"col", i:0}, + {Type:57593, Val:"SQL_CALC_FOUND_ROWS", i:0}, + {Type:57397, Val:"col", i:0}, {Type:57353, Val:"from", i:0}, - {Type:57396, Val:"tbl", i:0}, + {Type:57397, Val:"tbl", i:0}, {Type:57354, Val:"where", i:0}, - {Type:57396, Val:"id", i:0}, + {Type:57397, Val:"id", i:0}, {Type:62, Val:">", i:0}, - {Type:57399, Val:"1000", i:0}, + {Type:57402, Val:"1000", i:0}, } []ast.Token{ - {Type:57348, Val:"select", i:0}, + {Type:57348, Val:"SELECT", i:0}, {Type:42, Val:"*", i:0}, - {Type:57353, Val:"from", i:0}, - {Type:57396, Val:"tb", i:0}, - {Type:57354, Val:"where", i:0}, - {Type:57396, Val:"id", i:0}, + {Type:57353, Val:"FROM", i:0}, + {Type:57397, Val:"tb", i:0}, + {Type:57354, Val:"WHERE", i:0}, + {Type:57397, Val:"id", i:0}, {Type:61, Val:"=", i:0}, - {Type:57402, Val:":v1", i:0}, + {Type:57405, Val:":v1", i:0}, {Type:59, Val:";", i:0}, } []ast.Token{ - {Type:57348, Val:"select", i:0}, + {Type:57348, Val:"SELECT", i:0}, {Type:42, Val:"*", i:0}, - {Type:57353, Val:"from", i:0}, - {Type:57396, Val:"tb", i:0}, - {Type:57354, Val:"where", i:0}, - {Type:57396, Val:"id", i:0}, - {Type:57424, Val:"is", i:0}, - {Type:57407, Val:"null", i:0}, + {Type:57353, Val:"FROM", i:0}, + {Type:57397, Val:"tb", i:0}, + {Type:57354, Val:"WHERE", i:0}, + {Type:57397, Val:"id", i:0}, + {Type:57427, Val:"is", i:0}, + {Type:57410, Val:"null", i:0}, {Type:59, Val:";", i:0}, } []ast.Token{ - {Type:57348, Val:"select", i:0}, + {Type:57348, Val:"SELECT", i:0}, {Type:42, Val:"*", i:0}, - {Type:57353, Val:"from", i:0}, - {Type:57396, Val:"tb", i:0}, - {Type:57354, Val:"where", i:0}, - {Type:57396, Val:"id", i:0}, - {Type:57424, Val:"is", i:0}, - {Type:57413, Val:"not", i:0}, - {Type:57407, Val:"null", i:0}, + {Type:57353, Val:"FROM", i:0}, + {Type:57397, Val:"tb", i:0}, + {Type:57354, Val:"WHERE", i:0}, + {Type:57397, Val:"id", i:0}, + {Type:57427, Val:"is", i:0}, + {Type:57416, Val:"not", i:0}, + {Type:57410, Val:"null", i:0}, {Type:59, Val:";", i:0}, } []ast.Token{ - {Type:57348, Val:"select", i:0}, + {Type:57348, Val:"SELECT", i:0}, {Type:42, Val:"*", i:0}, - {Type:57353, Val:"from", i:0}, - {Type:57396, Val:"tb", i:0}, - {Type:57354, Val:"where", i:0}, - {Type:57396, Val:"id", i:0}, - {Type:57414, Val:"between", i:0}, - {Type:57399, Val:"1", i:0}, - {Type:57412, Val:"and", i:0}, - {Type:57399, Val:"3", i:0}, + {Type:57353, Val:"FROM", i:0}, + {Type:57397, Val:"tb", i:0}, + {Type:57354, Val:"WHERE", i:0}, + {Type:57397, Val:"id", i:0}, + {Type:57417, Val:"between", i:0}, + {Type:57402, Val:"1", i:0}, + {Type:57415, Val:"and", i:0}, + {Type:57402, Val:"3", i:0}, {Type:59, Val:";", i:0}, } []ast.Token{ - {Type:57441, Val:"alter", i:0}, - {Type:57448, Val:"table", i:0}, - {Type:57396, Val:"inventory", i:0}, - {Type:57445, Val:"add", i:0}, - {Type:57449, Val:"index", i:0}, - {Type:57396, Val:"idx_store_film", i:0}, - {Type:57396, Val:" (", i:0}, - {Type:57396, Val:"store_id", i:0}, - {Type:57396, Val:",", i:0}, - {Type:57396, Val:"film_id", i:0}, + {Type:57444, Val:"alter", i:0}, + {Type:57451, Val:"table", i:0}, + {Type:57397, Val:"inventory", i:0}, + {Type:57448, Val:"add", i:0}, + {Type:57452, Val:"index", i:0}, + {Type:57397, Val:"idx_store_film", i:0}, + {Type:57397, Val:" (", i:0}, + {Type:57397, Val:"store_id", i:0}, + {Type:57397, Val:",", i:0}, + {Type:57397, Val:"film_id", i:0}, {Type:57346, Val:");", i:0}, } +[]ast.Token{ + {Type:57351, Val:"UPDATE", i:0}, + {Type:57397, Val:"xxx", i:0}, + {Type:57372, Val:"SET", i:0}, + {Type:57397, Val:"c1", i:0}, + {Type:61, Val:"=", i:0}, + {Type:57401, Val:" LOGGER.error(\"\"); }", i:0}, + {Type:57354, Val:"WHERE", i:0}, + {Type:57397, Val:"id", i:0}, + {Type:61, Val:"=", i:0}, + {Type:57402, Val:"2", i:0}, + {Type:59, Val:";", i:0}, +} []ast.Token{ {Type:57351, Val:"update", i:0}, - {Type:57396, Val:"xxx", i:0}, + {Type:57346, Val:"\xc2", i:0}, + {Type:57346, Val:"\xa0", i:0}, + {Type:57397, Val:"tb", i:0}, {Type:57372, Val:"set", i:0}, - {Type:57396, Val:"c1", i:0}, + {Type:57346, Val:"\xc2", i:0}, + {Type:57346, Val:"\xa0", i:0}, + {Type:57488, Val:"status", i:0}, + {Type:57346, Val:"\xc2", i:0}, + {Type:57346, Val:"\xa0", i:0}, {Type:61, Val:"=", i:0}, - {Type:57398, Val:" LOGGER.error(\"\"); }", i:0}, + {Type:57346, Val:"\xc2", i:0}, + {Type:57346, Val:"\xa0", i:0}, + {Type:57402, Val:"1", i:0}, {Type:57354, Val:"where", i:0}, - {Type:57396, Val:"id", i:0}, + {Type:57346, Val:"\xc2", i:0}, + {Type:57346, Val:"\xa0", i:0}, + {Type:57397, Val:"id", i:0}, + {Type:57346, Val:"\xc2", i:0}, + {Type:57346, Val:"\xa0", i:0}, {Type:61, Val:"=", i:0}, - {Type:57399, Val:"2", i:0}, + {Type:57346, Val:"\xc2", i:0}, + {Type:57346, Val:"\xa0", i:0}, + {Type:57402, Val:"1", i:0}, {Type:59, Val:";", i:0}, } diff --git a/ast/token.go b/ast/token.go index d1e4356..ed8353f 100644 --- a/ast/token.go +++ b/ast/token.go @@ -580,7 +580,7 @@ type Token struct { i int } -// Tokenizer 用于初始化token +// Tokenizer 用于初始化 token,区别于 Tokenize 函数,这个函数使用 vitess 的切词方式 func Tokenizer(sql string) []Token { var tokens []Token tkn := sqlparser.NewStringTokenizer(sql) @@ -785,7 +785,7 @@ func getQuotedString(buf string) string { return buf } -// Tokenize 序列化token +// Tokenize 序列化 token,区别于 Tokenizer 函数,这个函数是 soar built-in 实现的切词 func Tokenize(sql string) []Token { var token Token var tokenLength int diff --git a/ast/token_test.go b/ast/token_test.go index c0c66ab..fe7b405 100644 --- a/ast/token_test.go +++ b/ast/token_test.go @@ -52,6 +52,7 @@ func TestTokenizer(t *testing.T) { "SELECT * FROM tb WHERE id between 1 and 3;", "alter table inventory add index idx_store_film` (`store_id`,`film_id`);", `UPDATE xxx SET c1=' LOGGER.error(""); }' WHERE id = 2 ;`, + `update tb set status = 1 where id = 1;`, // SQL 中包含 non-broken-space } err := common.GoldenDiff(func() { for _, sql := range sqls { diff --git a/common/testdata/TestStringStorageReq.golden b/common/testdata/TestStringStorageReq.golden index 3f8175d..df6299b 100644 --- a/common/testdata/TestStringStorageReq.golden +++ b/common/testdata/TestStringStorageReq.golden @@ -1,13 +1,123 @@ -char(10) cp852 10 -char(256) cp852 255 -binary(10) cp852 10 -binary(256) cp852 255 -varchar(10) cp852 11 -varbinary(10) cp852 11 -enum('G','PG','PG-13','R','NC-17') cp852 1 -set('one', 'two') cp852 1 -not_exist cp852 0 -char(-1) cp852 0 +char(10) binary 10 +char(256) binary 255 +binary(10) binary 10 +binary(256) binary 255 +varchar(10) binary 11 +varbinary(10) binary 11 +enum('G','PG','PG-13','R','NC-17') binary 1 +set('one', 'two') binary 1 +not_exist binary 0 +char(-1) binary 0 +char(10) keybcs2 10 +char(256) keybcs2 255 +binary(10) keybcs2 10 +binary(256) keybcs2 255 +varchar(10) keybcs2 11 +varbinary(10) keybcs2 11 +enum('G','PG','PG-13','R','NC-17') keybcs2 1 +set('one', 'two') keybcs2 1 +not_exist keybcs2 0 +char(-1) keybcs2 0 +char(10) latin7 10 +char(256) latin7 255 +binary(10) latin7 10 +binary(256) latin7 255 +varchar(10) latin7 11 +varbinary(10) latin7 11 +enum('G','PG','PG-13','R','NC-17') latin7 1 +set('one', 'two') latin7 1 +not_exist latin7 0 +char(-1) latin7 0 +char(10) utf32 40 +char(256) utf32 1020 +binary(10) utf32 10 +binary(256) utf32 255 +varchar(10) utf32 41 +varbinary(10) utf32 41 +enum('G','PG','PG-13','R','NC-17') utf32 1 +set('one', 'two') utf32 1 +not_exist utf32 0 +char(-1) utf32 0 +char(10) cp1250 10 +char(256) cp1250 255 +binary(10) cp1250 10 +binary(256) cp1250 255 +varchar(10) cp1250 11 +varbinary(10) cp1250 11 +enum('G','PG','PG-13','R','NC-17') cp1250 1 +set('one', 'two') cp1250 1 +not_exist cp1250 0 +char(-1) cp1250 0 +char(10) ujis 30 +char(256) ujis 765 +binary(10) ujis 10 +binary(256) ujis 255 +varchar(10) ujis 31 +varbinary(10) ujis 31 +enum('G','PG','PG-13','R','NC-17') ujis 1 +set('one', 'two') ujis 1 +not_exist ujis 0 +char(-1) ujis 0 +char(10) utf16 40 +char(256) utf16 1020 +binary(10) utf16 10 +binary(256) utf16 255 +varchar(10) utf16 41 +varbinary(10) utf16 41 +enum('G','PG','PG-13','R','NC-17') utf16 1 +set('one', 'two') utf16 1 +not_exist utf16 0 +char(-1) utf16 0 +char(10) sjis 20 +char(256) sjis 510 +binary(10) sjis 10 +binary(256) sjis 255 +varchar(10) sjis 21 +varbinary(10) sjis 21 +enum('G','PG','PG-13','R','NC-17') sjis 1 +set('one', 'two') sjis 1 +not_exist sjis 0 +char(-1) sjis 0 +char(10) tis620 10 +char(256) tis620 255 +binary(10) tis620 10 +binary(256) tis620 255 +varchar(10) tis620 11 +varbinary(10) tis620 11 +enum('G','PG','PG-13','R','NC-17') tis620 1 +set('one', 'two') tis620 1 +not_exist tis620 0 +char(-1) tis620 0 +char(10) cp1256 10 +char(256) cp1256 255 +binary(10) cp1256 10 +binary(256) cp1256 255 +varchar(10) cp1256 11 +varbinary(10) cp1256 11 +enum('G','PG','PG-13','R','NC-17') cp1256 1 +set('one', 'two') cp1256 1 +not_exist cp1256 0 +char(-1) cp1256 0 +char(10) cp932 20 +char(256) cp932 510 +binary(10) cp932 10 +binary(256) cp932 255 +varchar(10) cp932 21 +varbinary(10) cp932 21 +enum('G','PG','PG-13','R','NC-17') cp932 1 +set('one', 'two') cp932 1 +not_exist cp932 0 +char(-1) cp932 0 +char(10) euckr 20 +char(256) euckr 510 +binary(10) euckr 10 +binary(256) euckr 255 +varchar(10) euckr 21 +varbinary(10) euckr 21 +enum('G','PG','PG-13','R','NC-17') euckr 1 +set('one', 'two') euckr 1 +not_exist euckr 0 +char(-1) euckr 0 char(10) gb2312 20 char(256) gb2312 510 binary(10) gb2312 10 @@ -18,6 +128,26 @@ enum('G','PG','PG-13','R','NC-17') gb2312 1 set('one', 'two') gb2312 1 not_exist gb2312 0 char(-1) gb2312 0 +char(10) greek 10 +char(256) greek 255 +binary(10) greek 10 +binary(256) greek 255 +varchar(10) greek 11 +varbinary(10) greek 11 +enum('G','PG','PG-13','R','NC-17') greek 1 +set('one', 'two') greek 1 +not_exist greek 0 +char(-1) greek 0 +char(10) koi8r 10 +char(256) koi8r 255 +binary(10) koi8r 10 +binary(256) koi8r 255 +varchar(10) koi8r 11 +varbinary(10) koi8r 11 +enum('G','PG','PG-13','R','NC-17') koi8r 1 +set('one', 'two') koi8r 1 +not_exist koi8r 0 +char(-1) koi8r 0 char(10) utf16le 40 char(256) utf16le 1020 binary(10) utf16le 10 @@ -28,26 +158,16 @@ enum('G','PG','PG-13','R','NC-17') utf16le 1 set('one', 'two') utf16le 1 not_exist utf16le 0 char(-1) utf16le 0 -char(10) cp850 10 -char(256) cp850 255 -binary(10) cp850 10 -binary(256) cp850 255 -varchar(10) cp850 11 -varbinary(10) cp850 11 -enum('G','PG','PG-13','R','NC-17') cp850 1 -set('one', 'two') cp850 1 -not_exist cp850 0 -char(-1) cp850 0 -char(10) cp1251 10 -char(256) cp1251 255 -binary(10) cp1251 10 -binary(256) cp1251 255 -varchar(10) cp1251 11 -varbinary(10) cp1251 11 -enum('G','PG','PG-13','R','NC-17') cp1251 1 -set('one', 'two') cp1251 1 -not_exist cp1251 0 -char(-1) cp1251 0 +char(10) utf8mb4 40 +char(256) utf8mb4 1020 +binary(10) utf8mb4 10 +binary(256) utf8mb4 255 +varchar(10) utf8mb4 41 +varbinary(10) utf8mb4 41 +enum('G','PG','PG-13','R','NC-17') utf8mb4 1 +set('one', 'two') utf8mb4 1 +not_exist utf8mb4 0 +char(-1) utf8mb4 0 char(10) cp1257 10 char(256) cp1257 255 binary(10) cp1257 10 @@ -68,36 +188,16 @@ enum('G','PG','PG-13','R','NC-17') cp866 1 set('one', 'two') cp866 1 not_exist cp866 0 char(-1) cp866 0 -char(10) greek 10 -char(256) greek 255 -binary(10) greek 10 -binary(256) greek 255 -varchar(10) greek 11 -varbinary(10) greek 11 -enum('G','PG','PG-13','R','NC-17') greek 1 -set('one', 'two') greek 1 -not_exist greek 0 -char(-1) greek 0 -char(10) koi8r 10 -char(256) koi8r 255 -binary(10) koi8r 10 -binary(256) koi8r 255 -varchar(10) koi8r 11 -varbinary(10) koi8r 11 -enum('G','PG','PG-13','R','NC-17') koi8r 1 -set('one', 'two') koi8r 1 -not_exist koi8r 0 -char(-1) koi8r 0 -char(10) latin2 10 -char(256) latin2 255 -binary(10) latin2 10 -binary(256) latin2 255 -varchar(10) latin2 11 -varbinary(10) latin2 11 -enum('G','PG','PG-13','R','NC-17') latin2 1 -set('one', 'two') latin2 1 -not_exist latin2 0 -char(-1) latin2 0 +char(10) geostd8 10 +char(256) geostd8 255 +binary(10) geostd8 10 +binary(256) geostd8 255 +varchar(10) geostd8 11 +varbinary(10) geostd8 11 +enum('G','PG','PG-13','R','NC-17') geostd8 1 +set('one', 'two') geostd8 1 +not_exist geostd8 0 +char(-1) geostd8 0 char(10) latin5 10 char(256) latin5 255 binary(10) latin5 10 @@ -108,116 +208,46 @@ enum('G','PG','PG-13','R','NC-17') latin5 1 set('one', 'two') latin5 1 not_exist latin5 0 char(-1) latin5 0 -char(10) ascii 10 -char(256) ascii 255 -binary(10) ascii 10 -binary(256) ascii 255 -varchar(10) ascii 11 -varbinary(10) ascii 11 -enum('G','PG','PG-13','R','NC-17') ascii 1 -set('one', 'two') ascii 1 -not_exist ascii 0 -char(-1) ascii 0 -char(10) ucs2 20 -char(256) ucs2 510 -binary(10) ucs2 10 -binary(256) ucs2 255 -varchar(10) ucs2 21 -varbinary(10) ucs2 21 -enum('G','PG','PG-13','R','NC-17') ucs2 1 -set('one', 'two') ucs2 1 -not_exist ucs2 0 -char(-1) ucs2 0 -char(10) latin7 10 -char(256) latin7 255 -binary(10) latin7 10 -binary(256) latin7 255 -varchar(10) latin7 11 -varbinary(10) latin7 11 -enum('G','PG','PG-13','R','NC-17') latin7 1 -set('one', 'two') latin7 1 -not_exist latin7 0 -char(-1) latin7 0 -char(10) utf16 40 -char(256) utf16 1020 -binary(10) utf16 10 -binary(256) utf16 255 -varchar(10) utf16 41 -varbinary(10) utf16 41 -enum('G','PG','PG-13','R','NC-17') utf16 1 -set('one', 'two') utf16 1 -not_exist utf16 0 -char(-1) utf16 0 -char(10) keybcs2 10 -char(256) keybcs2 255 -binary(10) keybcs2 10 -binary(256) keybcs2 255 -varchar(10) keybcs2 11 -varbinary(10) keybcs2 11 -enum('G','PG','PG-13','R','NC-17') keybcs2 1 -set('one', 'two') keybcs2 1 -not_exist keybcs2 0 -char(-1) keybcs2 0 -char(10) cp932 20 -char(256) cp932 510 -binary(10) cp932 10 -binary(256) cp932 255 -varchar(10) cp932 21 -varbinary(10) cp932 21 -enum('G','PG','PG-13','R','NC-17') cp932 1 -set('one', 'two') cp932 1 -not_exist cp932 0 -char(-1) cp932 0 -char(10) sjis 20 -char(256) sjis 510 -binary(10) sjis 10 -binary(256) sjis 255 -varchar(10) sjis 21 -varbinary(10) sjis 21 -enum('G','PG','PG-13','R','NC-17') sjis 1 -set('one', 'two') sjis 1 -not_exist sjis 0 -char(-1) sjis 0 -char(10) armscii8 10 -char(256) armscii8 255 -binary(10) armscii8 10 -binary(256) armscii8 255 -varchar(10) armscii8 11 -varbinary(10) armscii8 11 -enum('G','PG','PG-13','R','NC-17') armscii8 1 -set('one', 'two') armscii8 1 -not_exist armscii8 0 -char(-1) armscii8 0 -char(10) macce 10 -char(256) macce 255 -binary(10) macce 10 -binary(256) macce 255 -varchar(10) macce 11 -varbinary(10) macce 11 -enum('G','PG','PG-13','R','NC-17') macce 1 -set('one', 'two') macce 1 -not_exist macce 0 -char(-1) macce 0 -char(10) macroman 10 -char(256) macroman 255 -binary(10) macroman 10 -binary(256) macroman 255 -varchar(10) macroman 11 -varbinary(10) macroman 11 -enum('G','PG','PG-13','R','NC-17') macroman 1 -set('one', 'two') macroman 1 -not_exist macroman 0 -char(-1) macroman 0 -char(10) ujis 30 -char(256) ujis 765 -binary(10) ujis 10 -binary(256) ujis 255 -varchar(10) ujis 31 -varbinary(10) ujis 31 -enum('G','PG','PG-13','R','NC-17') ujis 1 -set('one', 'two') ujis 1 -not_exist ujis 0 -char(-1) ujis 0 +char(10) cp852 10 +char(256) cp852 255 +binary(10) cp852 10 +binary(256) cp852 255 +varchar(10) cp852 11 +varbinary(10) cp852 11 +enum('G','PG','PG-13','R','NC-17') cp852 1 +set('one', 'two') cp852 1 +not_exist cp852 0 +char(-1) cp852 0 +char(10) hebrew 10 +char(256) hebrew 255 +binary(10) hebrew 10 +binary(256) hebrew 255 +varchar(10) hebrew 11 +varbinary(10) hebrew 11 +enum('G','PG','PG-13','R','NC-17') hebrew 1 +set('one', 'two') hebrew 1 +not_exist hebrew 0 +char(-1) hebrew 0 +char(10) koi8u 10 +char(256) koi8u 255 +binary(10) koi8u 10 +binary(256) koi8u 255 +varchar(10) koi8u 11 +varbinary(10) koi8u 11 +enum('G','PG','PG-13','R','NC-17') koi8u 1 +set('one', 'two') koi8u 1 +not_exist koi8u 0 +char(-1) koi8u 0 +char(10) latin1 10 +char(256) latin1 255 +binary(10) latin1 10 +binary(256) latin1 255 +varchar(10) latin1 11 +varbinary(10) latin1 11 +enum('G','PG','PG-13','R','NC-17') latin1 1 +set('one', 'two') latin1 1 +not_exist latin1 0 +char(-1) latin1 0 char(10) utf8 30 char(256) utf8 765 binary(10) utf8 10 @@ -228,6 +258,16 @@ enum('G','PG','PG-13','R','NC-17') utf8 1 set('one', 'two') utf8 1 not_exist utf8 0 char(-1) utf8 0 +char(10) ucs2 20 +char(256) ucs2 510 +binary(10) ucs2 10 +binary(256) ucs2 255 +varchar(10) ucs2 21 +varbinary(10) ucs2 21 +enum('G','PG','PG-13','R','NC-17') ucs2 1 +set('one', 'two') ucs2 1 +not_exist ucs2 0 +char(-1) ucs2 0 char(10) hp8 10 char(256) hp8 255 binary(10) hp8 10 @@ -238,16 +278,56 @@ enum('G','PG','PG-13','R','NC-17') hp8 1 set('one', 'two') hp8 1 not_exist hp8 0 char(-1) hp8 0 -char(10) cp1250 10 -char(256) cp1250 255 -binary(10) cp1250 10 -binary(256) cp1250 255 -varchar(10) cp1250 11 -varbinary(10) cp1250 11 -enum('G','PG','PG-13','R','NC-17') cp1250 1 -set('one', 'two') cp1250 1 -not_exist cp1250 0 -char(-1) cp1250 0 +char(10) macroman 10 +char(256) macroman 255 +binary(10) macroman 10 +binary(256) macroman 255 +varchar(10) macroman 11 +varbinary(10) macroman 11 +enum('G','PG','PG-13','R','NC-17') macroman 1 +set('one', 'two') macroman 1 +not_exist macroman 0 +char(-1) macroman 0 +char(10) ascii 10 +char(256) ascii 255 +binary(10) ascii 10 +binary(256) ascii 255 +varchar(10) ascii 11 +varbinary(10) ascii 11 +enum('G','PG','PG-13','R','NC-17') ascii 1 +set('one', 'two') ascii 1 +not_exist ascii 0 +char(-1) ascii 0 +char(10) big5 20 +char(256) big5 510 +binary(10) big5 10 +binary(256) big5 255 +varchar(10) big5 21 +varbinary(10) big5 21 +enum('G','PG','PG-13','R','NC-17') big5 1 +set('one', 'two') big5 1 +not_exist big5 0 +char(-1) big5 0 +char(10) cp1251 10 +char(256) cp1251 255 +binary(10) cp1251 10 +binary(256) cp1251 255 +varchar(10) cp1251 11 +varbinary(10) cp1251 11 +enum('G','PG','PG-13','R','NC-17') cp1251 1 +set('one', 'two') cp1251 1 +not_exist cp1251 0 +char(-1) cp1251 0 +char(10) cp850 10 +char(256) cp850 255 +binary(10) cp850 10 +binary(256) cp850 255 +varchar(10) cp850 11 +varbinary(10) cp850 11 +enum('G','PG','PG-13','R','NC-17') cp850 1 +set('one', 'two') cp850 1 +not_exist cp850 0 +char(-1) cp850 0 char(10) eucjpms 30 char(256) eucjpms 765 binary(10) eucjpms 10 @@ -258,56 +338,6 @@ enum('G','PG','PG-13','R','NC-17') eucjpms 1 set('one', 'two') eucjpms 1 not_exist eucjpms 0 char(-1) eucjpms 0 -char(10) koi8u 10 -char(256) koi8u 255 -binary(10) koi8u 10 -binary(256) koi8u 255 -varchar(10) koi8u 11 -varbinary(10) koi8u 11 -enum('G','PG','PG-13','R','NC-17') koi8u 1 -set('one', 'two') koi8u 1 -not_exist koi8u 0 -char(-1) koi8u 0 -char(10) latin1 10 -char(256) latin1 255 -binary(10) latin1 10 -binary(256) latin1 255 -varchar(10) latin1 11 -varbinary(10) latin1 11 -enum('G','PG','PG-13','R','NC-17') latin1 1 -set('one', 'two') latin1 1 -not_exist latin1 0 -char(-1) latin1 0 -char(10) swe7 10 -char(256) swe7 255 -binary(10) swe7 10 -binary(256) swe7 255 -varchar(10) swe7 11 -varbinary(10) swe7 11 -enum('G','PG','PG-13','R','NC-17') swe7 1 -set('one', 'two') swe7 1 -not_exist swe7 0 -char(-1) swe7 0 -char(10) binary 10 -char(256) binary 255 -binary(10) binary 10 -binary(256) binary 255 -varchar(10) binary 11 -varbinary(10) binary 11 -enum('G','PG','PG-13','R','NC-17') binary 1 -set('one', 'two') binary 1 -not_exist binary 0 -char(-1) binary 0 -char(10) euckr 20 -char(256) euckr 510 -binary(10) euckr 10 -binary(256) euckr 255 -varchar(10) euckr 21 -varbinary(10) euckr 21 -enum('G','PG','PG-13','R','NC-17') euckr 1 -set('one', 'two') euckr 1 -not_exist euckr 0 -char(-1) euckr 0 char(10) gbk 20 char(256) gbk 510 binary(10) gbk 10 @@ -318,56 +348,16 @@ enum('G','PG','PG-13','R','NC-17') gbk 1 set('one', 'two') gbk 1 not_exist gbk 0 char(-1) gbk 0 -char(10) geostd8 10 -char(256) geostd8 255 -binary(10) geostd8 10 -binary(256) geostd8 255 -varchar(10) geostd8 11 -varbinary(10) geostd8 11 -enum('G','PG','PG-13','R','NC-17') geostd8 1 -set('one', 'two') geostd8 1 -not_exist geostd8 0 -char(-1) geostd8 0 -char(10) hebrew 10 -char(256) hebrew 255 -binary(10) hebrew 10 -binary(256) hebrew 255 -varchar(10) hebrew 11 -varbinary(10) hebrew 11 -enum('G','PG','PG-13','R','NC-17') hebrew 1 -set('one', 'two') hebrew 1 -not_exist hebrew 0 -char(-1) hebrew 0 -char(10) utf32 40 -char(256) utf32 1020 -binary(10) utf32 10 -binary(256) utf32 255 -varchar(10) utf32 41 -varbinary(10) utf32 41 -enum('G','PG','PG-13','R','NC-17') utf32 1 -set('one', 'two') utf32 1 -not_exist utf32 0 -char(-1) utf32 0 -char(10) utf8mb4 40 -char(256) utf8mb4 1020 -binary(10) utf8mb4 10 -binary(256) utf8mb4 255 -varchar(10) utf8mb4 41 -varbinary(10) utf8mb4 41 -enum('G','PG','PG-13','R','NC-17') utf8mb4 1 -set('one', 'two') utf8mb4 1 -not_exist utf8mb4 0 -char(-1) utf8mb4 0 -char(10) big5 20 -char(256) big5 510 -binary(10) big5 10 -binary(256) big5 255 -varchar(10) big5 21 -varbinary(10) big5 21 -enum('G','PG','PG-13','R','NC-17') big5 1 -set('one', 'two') big5 1 -not_exist big5 0 -char(-1) big5 0 +char(10) armscii8 10 +char(256) armscii8 255 +binary(10) armscii8 10 +binary(256) armscii8 255 +varchar(10) armscii8 11 +varbinary(10) armscii8 11 +enum('G','PG','PG-13','R','NC-17') armscii8 1 +set('one', 'two') armscii8 1 +not_exist armscii8 0 +char(-1) armscii8 0 char(10) dec8 10 char(256) dec8 255 binary(10) dec8 10 @@ -388,23 +378,33 @@ enum('G','PG','PG-13','R','NC-17') gb18030 1 set('one', 'two') gb18030 1 not_exist gb18030 0 char(-1) gb18030 0 -char(10) tis620 10 -char(256) tis620 255 -binary(10) tis620 10 -binary(256) tis620 255 -varchar(10) tis620 11 -varbinary(10) tis620 11 -enum('G','PG','PG-13','R','NC-17') tis620 1 -set('one', 'two') tis620 1 -not_exist tis620 0 -char(-1) tis620 0 -char(10) cp1256 10 -char(256) cp1256 255 -binary(10) cp1256 10 -binary(256) cp1256 255 -varchar(10) cp1256 11 -varbinary(10) cp1256 11 -enum('G','PG','PG-13','R','NC-17') cp1256 1 -set('one', 'two') cp1256 1 -not_exist cp1256 0 -char(-1) cp1256 0 +char(10) latin2 10 +char(256) latin2 255 +binary(10) latin2 10 +binary(256) latin2 255 +varchar(10) latin2 11 +varbinary(10) latin2 11 +enum('G','PG','PG-13','R','NC-17') latin2 1 +set('one', 'two') latin2 1 +not_exist latin2 0 +char(-1) latin2 0 +char(10) macce 10 +char(256) macce 255 +binary(10) macce 10 +binary(256) macce 255 +varchar(10) macce 11 +varbinary(10) macce 11 +enum('G','PG','PG-13','R','NC-17') macce 1 +set('one', 'two') macce 1 +not_exist macce 0 +char(-1) macce 0 +char(10) swe7 10 +char(256) swe7 255 +binary(10) swe7 10 +binary(256) swe7 255 +varchar(10) swe7 11 +varbinary(10) swe7 11 +enum('G','PG','PG-13','R','NC-17') swe7 1 +set('one', 'two') swe7 1 +not_exist swe7 0 +char(-1) swe7 0 diff --git a/doc/heuristic.md b/doc/heuristic.md index 8d24101..9731c97 100644 --- a/doc/heuristic.md +++ b/doc/heuristic.md @@ -902,6 +902,16 @@ CREATE TABLE tbl ( `books` int ) ```sql select col as 列 from tb ``` +## SQL 中包含 unicode 特殊字符 + +* **Item**:KWR.005 +* **Severity**:L1 +* **Content**:部分 IDE 会自动在 SQL 插入肉眼不可见的 unicode 字符。如:non-break space, zero-width space 等。Linux 下可使用 \`cat -A file.sql\` 命令查看不可见字符。 +* **Case**: + +```sql +update tb set status = 1 where id = 1; +``` ## INSERT INTO xx SELECT 加锁粒度较大请谨慎 * **Item**:LCK.001 -- GitLab