未验证 提交 3d78a452 编写于 作者: C cai.zhang 提交者: GitHub

Support escape string (#24848)

Signed-off-by: Ncai.zhang <cai.zhang@zilliz.com>
上级 552c6acb
grammar Plan;
@lexer::members {
var str = ""
}
expr:
IntegerConstant # Integer
| FloatingConstant # Floating
......@@ -87,16 +91,45 @@ FloatingConstant:
Identifier: Nondigit (Nondigit | Digit)* | '$meta';
StringLiteral: EncodingPrefix? ('"' DoubleSCharSequence? '"' | '\'' SingleSCharSequence? '\'');
//StringLiteral: EncodingPrefix? ('"' DoubleSCharSequence? '"' | '\'' SingleSCharSequence? '\'');
StringLiteral: EncodingPrefix?
'"' {str += "\""}
( '\\'
(
'\'' {str += "'" }
| ~['] {str += ("\\" + string(l.GetInputStream().LA(-1)))}
)
| ~["\\] {str += string(l.GetInputStream().LA(-1))}
)*
'"' {
str += "\""
l.SetText(str)
str = ""
}
|
'\'' {str += "'"}
( '\\'
(
'\'' {str += "'" }
| ~['] {str += ("\\" + string(l.GetInputStream().LA(-1)))}
)
| '"' {str += "\\\"" }
| ~['"\\] {str += string(l.GetInputStream().LA(-1))}
)*
'\'' {
str += "'"
l.SetText(str)
str = ""
};
JSONIdentifier: Identifier('[' (StringLiteral | DecimalConstant) ']')+;
fragment EncodingPrefix: 'u8' | 'u' | 'U' | 'L';
fragment DoubleSCharSequence: DoubleSChar+;
fragment SingleSCharSequence: SingleSChar+;
fragment DoubleSChar: ~["\\\r\n] | EscapeSequence | '\\\n' | '\\\r\n';
fragment SingleSChar: ~['\\\r\n] | EscapeSequence | '\\\n' | '\\\r\n';
//fragment DoubleSCharSequence: DoubleSChar+;
//fragment SingleSCharSequence: SingleSChar+;
//
//fragment DoubleSChar: ~["\\\r\n] | EscapeSequence | '\\\n' | '\\\r\n';
//fragment SingleSChar: ~['\\\r\n] | EscapeSequence | '\\\n' | '\\\r\n';
fragment Nondigit: [a-zA-Z_];
fragment Digit: [0-9];
fragment BinaryConstant: '0' [bB] [0-1]+;
......
......@@ -127,16 +127,19 @@ func (v *ParserVisitor) VisitFloating(ctx *parser.FloatingContext) interface{} {
// VisitString translates expr to GenericValue.
func (v *ParserVisitor) VisitString(ctx *parser.StringContext) interface{} {
literal := ctx.StringLiteral().GetText()
if (strings.HasPrefix(literal, "\"") && strings.HasSuffix(literal, "\"")) ||
(strings.HasPrefix(literal, "'") && strings.HasSuffix(literal, "'")) {
literal = literal[1 : len(literal)-1]
if strings.HasPrefix(literal, "'") && strings.HasSuffix(literal, "'") {
literal = "\"" + literal[1:len(literal)-1] + "\""
}
pattern, err := strconv.Unquote(literal)
if err != nil {
return err
}
return &ExprWithType{
dataType: schemapb.DataType_VarChar,
expr: &planpb.Expr{
Expr: &planpb.Expr_ValueExpr{
ValueExpr: &planpb.ValueExpr{
Value: NewString(literal),
Value: NewString(pattern),
},
},
},
......@@ -420,10 +423,13 @@ func (v *ParserVisitor) VisitLike(ctx *parser.LikeContext) interface{} {
return fmt.Errorf("like operation on complicated expr is unsupported")
}
pattern := ctx.StringLiteral().GetText()
if (strings.HasPrefix(pattern, "\"") && strings.HasSuffix(pattern, "\"")) ||
(strings.HasPrefix(pattern, "'") && strings.HasSuffix(pattern, "'")) {
pattern = pattern[1 : len(pattern)-1]
literal := ctx.StringLiteral().GetText()
if strings.HasPrefix(literal, "'") && strings.HasSuffix(literal, "'") {
literal = "\"" + literal[1:len(literal)-1] + "\""
}
pattern, err := strconv.Unquote(literal)
if err != nil {
return err
}
op, operand, err := translatePatternMatch(pattern)
......
......@@ -4,10 +4,9 @@ import (
"sync"
"testing"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/proto/planpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/typeutil"
"github.com/stretchr/testify/assert"
)
......@@ -1544,3 +1543,106 @@ func Test_InvalidJSONContains(t *testing.T) {
})
assert.Error(t, err)
}
func Test_EscapeString(t *testing.T) {
schema := newTestSchema()
expr := ""
var err error
expr = `A == "\"" || B == '\"'`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
expr = `A == "\n" || B == '\n'`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
expr = `A == "\367" || B == '\367'`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
expr = `A == "\3678" || B == '\3678'`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
expr = `A == "ab'c\'d" || B == 'abc"de\"'`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
expr = `str2 like 'abc\"def-%'`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
expr = `str2 like 'abc"def-%'`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
expr = `str4 like "abc\367-%"`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.NoError(t, err)
}
func Test_InvalidEscapeString(t *testing.T) {
schema := newTestSchema()
expr := ""
var err error
expr = `A == "ab
c" || B == 'ab
c'`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err)
// Octal
expr = `A == "\423" || B == '\378'`
_, err = CreateSearchPlan(schema, expr, "FloatVectorField", &planpb.QueryInfo{
Topk: 0,
MetricType: "",
SearchParams: "",
RoundDecimal: 0,
})
assert.Error(t, err)
}
......@@ -621,7 +621,7 @@ func (s *JSONExprSuite) checkSearch(collectionName, fieldName string, dim int) {
s.doSearch(collectionName, []string{fieldName}, expr, dim, checkFunc)
log.Info("like expression run successfully")
expr = `str1 like 'abc\"def-%'`
expr = `str1 like 'abc\\"def-%'`
checkFunc = func(result *milvuspb.SearchResults) {
s.Equal(1, len(result.Results.FieldsData))
s.Equal(fieldName, result.Results.FieldsData[0].GetFieldName())
......@@ -640,7 +640,7 @@ func (s *JSONExprSuite) checkSearch(collectionName, fieldName string, dim int) {
s.doSearch(collectionName, []string{fieldName}, expr, dim, checkFunc)
log.Info("like expression run successfully")
expr = `str2 like 'abc\"def-%'`
expr = `str2 like 'abc\\"def-%'`
checkFunc = func(result *milvuspb.SearchResults) {
for _, f := range result.Results.GetFieldsData() {
s.Nil(f)
......@@ -858,6 +858,8 @@ func newJSONData(fieldName string, rowNum int) *schemapb.FieldData {
},
"str1": `abc\"def-` + string(rune(i)),
"str2": fmt.Sprintf("abc\"def-%d", i),
"str3": fmt.Sprintf("abc\ndef-%d", i),
"str4": fmt.Sprintf("abc\367-%d", i),
}
if i%2 == 0 {
data = map[string]interface{}{
......@@ -912,6 +914,126 @@ func (s *JSONExprSuite) doSearchWithInvalidExpr(collectionName string, outputFie
s.NotEqual(commonpb.ErrorCode_Success, searchResult.GetStatus().GetErrorCode())
}
func (s *JSONExprSuite) TestJsonWithEscapeString() {
c := s.Cluster
ctx, cancel := context.WithCancel(c.GetContext())
defer cancel()
prefix := "TestHelloMilvus"
dbName := ""
collectionName := prefix + funcutil.GenRandomStr()
dim := 128
rowNum := 100
constructCollectionSchema := func() *schemapb.CollectionSchema {
pk := &schemapb.FieldSchema{
FieldID: 0,
Name: integration.Int64Field,
IsPrimaryKey: true,
Description: "",
DataType: schemapb.DataType_Int64,
TypeParams: nil,
IndexParams: nil,
AutoID: true,
}
fVec := &schemapb.FieldSchema{
FieldID: 0,
Name: integration.FloatVecField,
IsPrimaryKey: false,
Description: "",
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: strconv.Itoa(dim),
},
},
IndexParams: nil,
AutoID: false,
}
return &schemapb.CollectionSchema{
Name: collectionName,
Description: "",
AutoID: true,
EnableDynamicField: true,
Fields: []*schemapb.FieldSchema{
pk,
fVec,
},
}
}
schema := constructCollectionSchema()
marshaledSchema, err := proto.Marshal(schema)
s.NoError(err)
createCollectionStatus, err := c.Proxy.CreateCollection(ctx, &milvuspb.CreateCollectionRequest{
DbName: dbName,
CollectionName: collectionName,
Schema: marshaledSchema,
ShardsNum: 2,
})
s.NoError(err)
if createCollectionStatus.GetErrorCode() != commonpb.ErrorCode_Success {
log.Warn("createCollectionStatus fail reason", zap.String("reason", createCollectionStatus.GetReason()))
}
s.Equal(createCollectionStatus.GetErrorCode(), commonpb.ErrorCode_Success)
log.Info("CreateCollection result", zap.Any("createCollectionStatus", createCollectionStatus))
showCollectionsResp, err := c.Proxy.ShowCollections(ctx, &milvuspb.ShowCollectionsRequest{})
s.NoError(err)
s.Equal(showCollectionsResp.GetStatus().GetErrorCode(), commonpb.ErrorCode_Success)
log.Info("ShowCollections result", zap.Any("showCollectionsResp", showCollectionsResp))
describeCollectionResp, err := c.Proxy.DescribeCollection(ctx, &milvuspb.DescribeCollectionRequest{CollectionName: collectionName})
s.NoError(err)
s.True(describeCollectionResp.Schema.EnableDynamicField)
s.Equal(2, len(describeCollectionResp.GetSchema().GetFields()))
fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, rowNum, dim)
dynamicData := newJSONData(common.MetaFieldName, rowNum)
dynamicData.IsDynamic = true
s.insertFlushIndexLoad(ctx, dbName, collectionName, rowNum, dim, []*schemapb.FieldData{fVecColumn, dynamicData})
expr := ""
// search
expr = `str1 like "abc\\\"%"`
checkFunc := func(result *milvuspb.SearchResults) {
s.Equal(1, len(result.Results.FieldsData))
s.Equal(common.MetaFieldName, result.Results.FieldsData[0].GetFieldName())
s.Equal(schemapb.DataType_JSON, result.Results.FieldsData[0].GetType())
s.Equal(10, len(result.Results.FieldsData[0].GetScalars().GetJsonData().GetData()))
}
s.doSearch(collectionName, []string{common.MetaFieldName}, expr, dim, checkFunc)
expr = `str2 like "abc\"def-%"`
checkFunc = func(result *milvuspb.SearchResults) {
s.Equal(1, len(result.Results.FieldsData))
s.Equal(common.MetaFieldName, result.Results.FieldsData[0].GetFieldName())
s.Equal(schemapb.DataType_JSON, result.Results.FieldsData[0].GetType())
s.Equal(10, len(result.Results.FieldsData[0].GetScalars().GetJsonData().GetData()))
}
s.doSearch(collectionName, []string{common.MetaFieldName}, expr, dim, checkFunc)
expr = `str3 like "abc\ndef-%"`
checkFunc = func(result *milvuspb.SearchResults) {
s.Equal(1, len(result.Results.FieldsData))
s.Equal(common.MetaFieldName, result.Results.FieldsData[0].GetFieldName())
s.Equal(schemapb.DataType_JSON, result.Results.FieldsData[0].GetType())
s.Equal(10, len(result.Results.FieldsData[0].GetScalars().GetJsonData().GetData()))
}
s.doSearch(collectionName, []string{common.MetaFieldName}, expr, dim, checkFunc)
// search fail reason: "string field contains invalid UTF-8"
//expr = `str4 like "abc\367-%"`
//checkFunc = func(result *milvuspb.SearchResults) {
// s.Equal(1, len(result.Results.FieldsData))
// s.Equal(common.MetaFieldName, result.Results.FieldsData[0].GetFieldName())
// s.Equal(schemapb.DataType_JSON, result.Results.FieldsData[0].GetType())
// s.Equal(10, len(result.Results.FieldsData[0].GetScalars().GetJsonData().GetData()))
//}
//s.doSearch(collectionName, []string{common.MetaFieldName}, expr, dim, checkFunc)
}
func TestJsonExpr(t *testing.T) {
suite.Run(t, new(JSONExprSuite))
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册