提交 9d7ec0f3 编写于 作者: martianzhang's avatar martianzhang

fix #110

  remove bom before auditing
上级 a07240e6
......@@ -119,6 +119,10 @@ func main() {
}
}
// remove bom from file header
var bom []byte
sql, bom = common.RemoveBOM([]byte(sql))
switch common.Config.ReportType {
case "html":
// HTML 格式输入 CSS 加载
......@@ -129,13 +133,17 @@ func main() {
fmt.Println(common.Markdown2HTML(sql))
return
case "explain-digest":
// 当用户输入为 EXPLAIN息,只对 Explain 信息进行分析
// 当用户输入为 EXPLAIN息,只对 Explain 信息进行分析
// 注意: 这里只能处理一条 SQL 的 EXPLAIN 信息,用户一次反馈多条 SQL 的 EXPLAIN 信息无法处理
advisor.DigestExplainText(sql)
return
case "chardet":
// Get charset of input
fmt.Println(common.Chardet([]byte(sql)))
charset := common.CheckCharsetByBOM(bom)
if charset == "" {
charset = common.Chardet([]byte(sql))
}
fmt.Println(charset)
return
case "remove-comment":
fmt.Println(string(database.RemoveSQLComments([]byte(sql))))
......
......@@ -22,11 +22,69 @@ import (
// Chardet get best match charset
func Chardet(buf []byte) string {
// check character set by file BOM
charset := CheckCharsetByBOM(buf)
if charset != "" {
return charset
}
// use chardet pkg check file charset
charset = "unknown"
var confidence int
detector := chardet.NewTextDetector()
result, err := detector.DetectBest(buf)
// detector.DetectBest is unstable
// when the confidence value are equally, the best detect charset will be random
result, err := detector.DetectAll(buf)
if err != nil {
return "unknown"
return charset
}
// SOAR's main user speak Chinese, GB-18030, UTF-8 are higher suggested
for _, r := range result {
if confidence >= r.Confidence && r.Confidence != 0 {
return charset
}
confidence = r.Confidence
if r.Charset == "GB-18030" || r.Charset == "UTF-8" {
return r.Charset
}
charset = r.Charset
}
return charset
}
// CheckCharsetByBOM ref: https://en.wikipedia.org/wiki/Byte_order_mark
func CheckCharsetByBOM(buf []byte) string {
// TODO: There are many kind of BOM
// UTF-8 EF BB BF
if len(buf) >= 3 {
if buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf {
return "UTF-8"
}
}
// GB-18030 84 31 95 33
if len(buf) >= 4 {
if buf[0] == 0x84 && buf[1] == 0x31 && buf[2] == 0x95 && buf[3] == 0x33 {
return "GB-18030"
}
}
return result.Charset
return ""
}
// RemoveBOM remove bom from file
func RemoveBOM(buf []byte) (string, []byte) {
// ef bb bf, UTF-8 BOM
if len(buf) > 3 {
if buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf {
return string(buf[3:]), buf[:3]
}
}
// ff fe, UTF-16 (LE) BOM
if len(buf) > 2 {
if buf[0] == 0xff && buf[1] == 0xfe {
return string(buf[2:]), buf[:2]
}
}
return string(buf), []byte{}
}
......@@ -17,6 +17,7 @@
package common
import (
"fmt"
"io/ioutil"
"testing"
)
......@@ -38,3 +39,26 @@ func TestChardet(t *testing.T) {
}
}
}
func TestRemoveBOM(t *testing.T) {
fileName := DevPath + "/common/testdata/UTF-8.bom.sql"
buf, err := ioutil.ReadFile(fileName)
if err != nil {
t.Errorf("ioutil.ReadFile %s, Error: %s", fileName, err.Error())
}
GoldenDiff(func() {
fmt.Println(RemoveBOM(buf))
}, t.Name(), update)
}
func TestCheckCharsetByBOM(t *testing.T) {
fileName := DevPath + "/common/testdata/UTF-8.bom.sql"
buf, err := ioutil.ReadFile(fileName)
if err != nil {
t.Errorf("ioutil.ReadFile %s, Error: %s", fileName, err.Error())
}
if CheckCharsetByBOM(buf) != "UTF-8" {
t.Errorf("checkCharsetByBOM Want: UTF-8, Get: %s", CheckCharsetByBOM(buf))
}
}
select col from tb c = 1;
[239 187 191]
select col from tb c = 1;
......@@ -532,7 +532,7 @@ func (db *Connector) explainAbleSQL(sql string) (string, error) {
// 执行explain请求,返回mysql.Result执行结果
func (db *Connector) executeExplain(sql string, explainType int, formatType int) (*QueryResult, error) {
var err error
sql, _ = db.explainAbleSQL(sql)
sql, err = db.explainAbleSQL(sql)
if sql == "" {
return nil, err
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册