From 14c19f4d89c8cc635ae6370e5415a2bd1588e652 Mon Sep 17 00:00:00 2001 From: Leon Zhang Date: Fri, 12 Apr 2019 23:05:10 +0800 Subject: [PATCH] fix tokenize bug with multi type of quote UPDATE xxx SET c1=' LOGGER.error(""); }' WHERE id = 2 ; --- ast/testdata/TestGetQuotedString.golden | 2 + ast/testdata/TestSplitStatement.golden | 11 +++-- ast/testdata/TestTokenizer.golden | 15 +++++- ast/token.go | 11 +++-- ast/token_test.go | 65 +++++++++++++------------ 5 files changed, 65 insertions(+), 39 deletions(-) diff --git a/ast/testdata/TestGetQuotedString.golden b/ast/testdata/TestGetQuotedString.golden index 9cf0b91..c9538cf 100644 --- a/ast/testdata/TestGetQuotedString.golden +++ b/ast/testdata/TestGetQuotedString.golden @@ -22,3 +22,5 @@ orignal: 'hello 'world' quoted: 'hello ' orignal: "hello "world" quoted: "hello " +orignal: ' LOGGER.error(""); }' +quoted: ' LOGGER.error(""); }' diff --git a/ast/testdata/TestSplitStatement.golden b/ast/testdata/TestSplitStatement.golden index deb4590..d1d5f41 100644 --- a/ast/testdata/TestSplitStatement.golden +++ b/ast/testdata/TestSplitStatement.golden @@ -38,12 +38,13 @@ tb; -- comment 19 INSERT /*+ SET_VAR(foreign_key_checks=OFF) */ INTO t2 VALUES(2); 20 select /*!50000 1,*/ 1; -0 select * from test\Ghello -1 select 'hello\Gworld', col from test\Ghello +21 UPDATE xxx SET c1=' LOGGER.error(""); }' WHERE id = 2 ; +0 select * from test\G +1 select 'hello\Gworld', col from test\G 2 -- select * from test\Ghello 3 #select * from test\Ghello -4 select * /*comment*/from test\Ghello -5 select * /*comment;*/from test\Ghello +4 select * /*comment*/from test\G +5 select * /*comment;*/from test\G 6 select * /*comment \\G*/ - from test\\Ghello + from test\\G diff --git a/ast/testdata/TestTokenizer.golden b/ast/testdata/TestTokenizer.golden index 0001c9c..bf6a8df 100644 --- a/ast/testdata/TestTokenizer.golden +++ b/ast/testdata/TestTokenizer.golden @@ -210,7 +210,7 @@ } []ast.Token{ {Type:57348, Val:"select", i:0}, - {Type:57590, Val:"sql_calc_found_rows", i:0}, + {Type:57592, Val:"sql_calc_found_rows", i:0}, {Type:57396, Val:"col", i:0}, {Type:57353, Val:"from", i:0}, {Type:57396, Val:"tbl", i:0}, @@ -279,3 +279,16 @@ {Type:57396, Val:"film_id", i:0}, {Type:57346, Val:");", i:0}, } +[]ast.Token{ + {Type:57351, Val:"update", i:0}, + {Type:57396, Val:"xxx", i:0}, + {Type:57372, Val:"set", i:0}, + {Type:57396, Val:"c1", i:0}, + {Type:61, Val:"=", i:0}, + {Type:57398, Val:" LOGGER.error(\"\"); }", i:0}, + {Type:57354, Val:"where", i:0}, + {Type:57396, Val:"id", i:0}, + {Type:61, Val:"=", i:0}, + {Type:57399, Val:"2", i:0}, + {Type:59, Val:";", i:0}, +} diff --git a/ast/token.go b/ast/token.go index 3416332..2674344 100644 --- a/ast/token.go +++ b/ast/token.go @@ -928,13 +928,18 @@ func SplitStatement(buf []byte, delimiter []byte) (string, string, []byte) { } // quoted string - if b == '`' || b == '\'' || b == '"' { + switch b { + case '`', '\'', '"': if i > 1 && buf[i-1] != '\\' { if quoted && b == quoteRune { quoted = false + quoteRune = '0' } else { - quoted = true - quoteRune = b + // check if first time found quote + if quoteRune == 0 { + quoted = true + quoteRune = b + } } } } diff --git a/ast/token_test.go b/ast/token_test.go index ef7f18e..e59be83 100644 --- a/ast/token_test.go +++ b/ast/token_test.go @@ -51,6 +51,7 @@ func TestTokenizer(t *testing.T) { "SELECT * FROM tb WHERE id is not null;", "SELECT * FROM tb WHERE id between 1 and 3;", "alter table inventory add index idx_store_film` (`store_id`,`film_id`);", + `UPDATE xxx SET c1=' LOGGER.error(""); }' WHERE id = 2 ;`, } err := common.GoldenDiff(func() { for _, sql := range sqls { @@ -78,6 +79,7 @@ func TestGetQuotedString(t *testing.T) { "``", `'hello 'world'`, `"hello "world"`, + `' LOGGER.error(""); }'`, } err := common.GoldenDiff(func() { for _, s := range str { @@ -121,72 +123,75 @@ func TestFormat(t *testing.T) { func TestSplitStatement(t *testing.T) { common.Log.Debug("Entering function: %s", common.GetFunctionName()) bufs := [][]byte{ - []byte("select * from test;hello"), - []byte("select 'asd;fas', col from test;hello"), - []byte("-- select * from test;hello"), - []byte("#select * from test;hello"), - []byte("select * /*comment*/from test;hello"), - []byte("select * /*comment;*/from test;hello"), + []byte("select * from test;hello"), // 0 + []byte("select 'asd;fas', col from test;hello"), // 1 + []byte("-- select * from test;hello"), // 2 + []byte("#select * from test;hello"), // 3 + []byte("select * /*comment*/from test;hello"), // 4 + []byte("select * /*comment;*/from test;hello"), // 5 []byte(`select * /*comment ;*/ - from test;hello`), - []byte(`select * from test`), + from test;hello`), // 6 + []byte(`select * from test`), // 7 // https://github.com/XiaoMi/soar/issues/66 - []byte(`/*comment*/`), - []byte(`/*comment*/;`), - []byte(`--`), - []byte(`-- comment`), - []byte(`# comment`), + []byte(`/*comment*/`), // 8 + []byte(`/*comment*/;`), // 9 + []byte(`--`), // 10 + []byte(`-- comment`), // 11 + []byte(`# comment`), // 12 // https://github.com/XiaoMi/soar/issues/116 []byte(`select * -- comment from tb -where col = 1`), +where col = 1`), // 13 []byte(`select * -- from tb -where col = 1`), +where col = 1`), // 14 []byte(`select * # from tb -where col = 1`), +where col = 1`), // 15 []byte(`select * -- from tb -where col = 1`), +where col = 1`), // 16 []byte(`select * from -- comment tb; -select col from tb where col = 1;`), +select col from tb where col = 1;`), // 17 // https://github.com/XiaoMi/soar/issues/120 []byte(` -- comment select col from tb; select col from tb; -`), - []byte(`INSERT /*+ SET_VAR(foreign_key_checks=OFF) */ INTO t2 VALUES(2);`), - []byte(`select /*!50000 1,*/ 1;`), +`), // 18 + []byte(`INSERT /*+ SET_VAR(foreign_key_checks=OFF) */ INTO t2 VALUES(2);`), // 19 + []byte(`select /*!50000 1,*/ 1;`), // 20 + []byte(`UPDATE xxx SET c1=' LOGGER.error(""); }' WHERE id = 2 ;`), // 21 } + // \G 分隔符 buf2s := [][]byte{ - []byte("select * from test\\Ghello"), - []byte("select 'hello\\Gworld', col from test\\Ghello"), - []byte("-- select * from test\\Ghello"), - []byte("#select * from test\\Ghello"), - []byte("select * /*comment*/from test\\Ghello"), - []byte("select * /*comment;*/from test\\Ghello"), + []byte("select * from test\\Ghello"), // 0 + []byte("select 'hello\\Gworld', col from test\\Ghello"), // 1 + []byte("-- select * from test\\Ghello"), // 2 + []byte("#select * from test\\Ghello"), // 3 + []byte("select * /*comment*/from test\\Ghello"), // 4 + []byte("select * /*comment;*/from test\\Ghello"), // 5 []byte(`select * /*comment \\G*/ - from test\\Ghello`), + from test\\Ghello`), // 6 } err := common.GoldenDiff(func() { for i, buf := range bufs { - sql, _, _ := SplitStatement(buf, []byte(common.Config.Delimiter)) + sql, _, _ := SplitStatement(buf, []byte(";")) fmt.Println(i, sql) } + for i, buf := range buf2s { - sql, _, _ := SplitStatement(buf, []byte(common.Config.Delimiter)) + sql, _, _ := SplitStatement(buf, []byte("\\G")) fmt.Println(i, sql) } }, t.Name(), update) -- GitLab