Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
lwm1986
roslyn
提交
51f68c35
R
roslyn
项目概览
lwm1986
/
roslyn
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
roslyn
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
51f68c35
编写于
12月 09, 2015
作者:
C
Cyrus Najmabadi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Pool the matrix we use when computing edit distances.
上级
e9fe77c4
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
142 addition
and
85 deletion
+142
-85
src/Workspaces/Core/Portable/Utilities/EditDistance.cs
src/Workspaces/Core/Portable/Utilities/EditDistance.cs
+142
-85
未找到文件。
src/Workspaces/Core/Portable/Utilities/EditDistance.cs
浏览文件 @
51f68c35
...
...
@@ -37,19 +37,21 @@ public CacheResult(string candidate, int threshold, bool isCloseMatch, double ma
}
}
private
readonly
string
originalText
;
private
readonly
string
_source
;
private
char
[]
_sourceLowerCaseCharacters
;
// private readonly int threshold;
// Cache the result of the last call to IsCloseMatch. We'll often be called with the same
// value multiple times in a row, so we can avoid expensive computation by returning the
// same value immediately.
private
CacheResult
lastIsCloseMatchResult
;
private
readonly
int
defaultThreshold
;
private
CacheResult
_lastIsCloseMatchResult
;
private
readonly
int
_defaultThreshold
;
public
EditDistance
(
string
text
/*, int? threshold = null*/
)
{
this
.
originalText
=
text
.
ToLower
()
;
// originalTextArray
= ConvertToLowercaseArray(text);
this
.
_source
=
text
;
this
.
_sourceLowerCaseCharacters
=
ConvertToLowercaseArray
(
text
);
// We only allow fairly close matches (in order to prevent too many
// spurious hits). A reasonable heuristic for this is the Log_2(length) (rounded
...
...
@@ -60,11 +62,24 @@ public EditDistance(string text/*, int? threshold = null*/)
// length 8-15: 3 edits allowed.
//
// and so forth.
this
.
defaultThreshold
=
Max
(
1
,
(
int
)
Log
(
text
.
Length
,
2
));
this
.
_defaultThreshold
=
Max
(
1
,
(
int
)
Log
(
text
.
Length
,
2
));
}
private
static
char
[]
ConvertToLowercaseArray
(
string
text
)
{
var
array
=
Pool
<
char
>.
GetArray
(
text
.
Length
);
for
(
int
i
=
0
;
i
<
text
.
Length
;
i
++)
{
array
[
i
]
=
char
.
ToLower
(
text
[
i
]);
}
return
array
;
}
public
void
Dispose
()
{
Pool
<
char
>.
ReleaseArray
(
this
.
_sourceLowerCaseCharacters
);
_sourceLowerCaseCharacters
=
null
;
}
public
static
bool
IsCloseMatch
(
string
originalText
,
string
candidateText
,
int
?
threshold
=
null
)
...
...
@@ -101,45 +116,75 @@ public static int GetEditDistance(string s, string t)
public
int
GetEditDistance
(
string
target
)
{
return
GetEditDistanceWorker
(
this
.
originalText
,
target
);
var
targetLowerCaseCharacters
=
ConvertToLowercaseArray
(
target
);
try
{
return
GetEditDistance
(
_sourceLowerCaseCharacters
,
targetLowerCaseCharacters
,
_source
.
Length
,
target
.
Length
);
}
finally
{
Pool
<
char
>.
ReleaseArray
(
targetLowerCaseCharacters
);
}
}
private
static
int
GetEditDistanceWorker
(
string
source
,
string
target
)
private
const
int
MaxMatrixPoolDimension
=
64
;
private
static
readonly
ObjectPool
<
int
[,
]>
s_matrixPool
=
new
ObjectPool
<
int
[,
]>
(()
=>
new
int
[
64
,
64
]);
private
static
int
[,]
GetMatrix
(
int
width
,
int
height
)
{
if
(
source
.
Length
==
0
)
if
(
width
>
MaxMatrixPoolDimension
||
height
>
MaxMatrixPoolDimension
)
{
return
target
.
Length
;
return
new
int
[
width
,
height
];
}
return
s_matrixPool
.
Allocate
();
}
if
(
target
.
Length
==
0
)
private
static
void
ReleaseMatrix
(
int
[,]
matrix
)
{
return
source
.
Length
;
if
(
matrix
.
GetLength
(
0
)
<=
MaxMatrixPoolDimension
||
matrix
.
GetLength
(
1
)
<=
MaxMatrixPoolDimension
)
{
s_matrixPool
.
Free
(
matrix
);
}
}
target
=
target
.
ToLower
();
var
matrix
=
new
int
[
source
.
Length
+
2
,
target
.
Length
+
2
];
var
maxValue
=
source
.
Length
+
target
.
Length
+
1
;
private
static
int
GetEditDistance
(
char
[]
source
,
char
[]
target
,
int
sourceLength
,
int
targetLength
)
{
if
(
sourceLength
==
0
)
{
return
targetLength
;
}
if
(
targetLength
==
0
)
{
return
sourceLength
;
}
var
matrix
=
GetMatrix
(
sourceLength
+
2
,
targetLength
+
2
);
try
{
var
maxValue
=
sourceLength
+
targetLength
+
1
;
var
DA
=
new
Dictionary
<
char
,
int
>();
var
max
=
source
.
Length
+
target
.
Length
+
1
;
for
(
int
i
=
0
;
i
<=
source
.
Length
;
i
++)
var
max
=
sourceLength
+
target
Length
+
1
;
for
(
int
i
=
0
;
i
<=
source
Length
;
i
++)
{
matrix
[
i
+
1
,
1
]
=
i
;
matrix
[
i
+
1
,
0
]
=
max
;
}
for
(
int
j
=
1
;
j
<=
target
.
Length
;
j
++)
for
(
int
j
=
1
;
j
<=
target
Length
;
j
++)
{
matrix
[
1
,
j
+
1
]
=
j
;
matrix
[
0
,
j
+
1
]
=
max
;
}
for
(
int
i
=
1
;
i
<=
source
.
Length
;
i
++)
for
(
int
i
=
1
;
i
<=
source
Length
;
i
++)
{
var
DB
=
0
;
var
sourceChar
=
source
[
i
-
1
];
for
(
int
j
=
1
;
j
<=
target
.
Length
;
j
++)
for
(
int
j
=
1
;
j
<=
target
Length
;
j
++)
{
var
targetChar
=
target
[
j
-
1
];
...
...
@@ -162,7 +207,12 @@ private static int GetEditDistanceWorker(string source, string target)
DA
[
sourceChar
]
=
i
;
}
return
matrix
[
source
.
Length
+
1
,
target
.
Length
+
1
];
return
matrix
[
sourceLength
+
1
,
targetLength
+
1
];
}
finally
{
ReleaseMatrix
(
matrix
);
}
}
private
static
int
GetValue
(
Dictionary
<
char
,
int
>
da
,
char
c
)
...
...
@@ -178,7 +228,7 @@ public bool IsCloseMatch(string candidateText, out double matchCost)
public
bool
IsCloseMatch
(
string
candidateText
,
int
?
threshold
,
out
double
matchCost
)
{
if
(
this
.
originalText
.
Length
<
3
)
if
(
this
.
_source
.
Length
<
3
)
{
// If we're comparing strings that are too short, we'll find
// far too many spurious hits. Don't even both in this case.
...
...
@@ -186,18 +236,25 @@ public bool IsCloseMatch(string candidateText, int? threshold, out double matchC
return
false
;
}
candidateText
=
candidateText
.
ToLower
();
threshold
=
threshold
??
this
.
defaultThreshold
;
if
(
lastIsCloseMatchResult
.
CandidateText
==
candidateText
&&
lastIsCloseMatchResult
.
Threshold
==
threshold
)
threshold
=
threshold
??
this
.
_defaultThreshold
;
if
(
_lastIsCloseMatchResult
.
CandidateText
==
candidateText
&&
_lastIsCloseMatchResult
.
Threshold
==
threshold
)
{
matchCost
=
lastIsCloseMatchResult
.
MatchCost
;
return
lastIsCloseMatchResult
.
IsCloseMatch
;
matchCost
=
_
lastIsCloseMatchResult
.
MatchCost
;
return
_
lastIsCloseMatchResult
.
IsCloseMatch
;
}
var
candidateCharArray
=
ConvertToLowercaseArray
(
candidateText
);
try
{
var
result
=
IsCloseMatchWorker
(
candidateText
,
threshold
.
Value
,
out
matchCost
);
lastIsCloseMatchResult
=
new
CacheResult
(
candidateText
,
threshold
.
Value
,
result
,
matchCost
);
_
lastIsCloseMatchResult
=
new
CacheResult
(
candidateText
,
threshold
.
Value
,
result
,
matchCost
);
return
result
;
}
finally
{
Pool
<
char
>.
ReleaseArray
(
candidateCharArray
);
}
}
private
bool
IsCloseMatchWorker
(
string
candidateText
,
int
threshold
,
out
double
matchCost
)
{
...
...
@@ -206,7 +263,7 @@ private bool IsCloseMatchWorker(string candidateText, int threshold, out double
// If the two strings differ by more characters than the cost threshold, then there's
// no point in even computing the edit distance as it would necessarily take at least
// that many additions/deletions.
if
(
Math
.
Abs
(
originalText
.
Length
-
candidateText
.
Length
)
<=
threshold
)
if
(
Math
.
Abs
(
_source
.
Length
-
candidateText
.
Length
)
<=
threshold
)
{
matchCost
=
GetEditDistance
(
candidateText
);
}
...
...
@@ -217,7 +274,7 @@ private bool IsCloseMatchWorker(string candidateText, int threshold, out double
// in the string we're currently looking at. That's enough to consider it
// although we place it just at the threshold (i.e. it's worse than all
// other matches).
if
(
candidateText
.
IndexOf
(
originalText
,
StringComparison
.
OrdinalIgnoreCase
)
>=
0
)
if
(
candidateText
.
IndexOf
(
_source
,
StringComparison
.
OrdinalIgnoreCase
)
>=
0
)
{
matchCost
=
threshold
;
}
...
...
@@ -228,7 +285,7 @@ private bool IsCloseMatchWorker(string candidateText, int threshold, out double
return
false
;
}
matchCost
+=
Penalty
(
candidateText
,
this
.
originalText
);
matchCost
+=
Penalty
(
candidateText
,
this
.
_source
);
return
true
;
}
...
...
@@ -287,6 +344,36 @@ private static void SetValue(int[,] matrix, int i, int j, int val)
// possible to index into the actual storage.
matrix
[
i
+
1
,
j
+
1
]
=
val
;
}
internal
static
class
Pool
<
T
>
{
private
const
int
MaxPooledArraySize
=
256
;
// Keep around a few arrays of size 256 that we can use for operations without
// causing lots of garbage to be created. If we do compare items larger than
// that, then we will just allocate and release those arrays on demand.
private
static
ObjectPool
<
T
[
]>
s_pool
=
new
ObjectPool
<
T
[
]>
(()
=>
new
T
[
MaxPooledArraySize
]);
public
static
T
[]
GetArray
(
int
size
)
{
if
(
size
<=
MaxPooledArraySize
)
{
var
array
=
s_pool
.
Allocate
();
Array
.
Clear
(
array
,
0
,
array
.
Length
);
return
array
;
}
return
new
T
[
size
];
}
public
static
void
ReleaseArray
(
T
[]
array
)
{
if
(
array
.
Length
<=
MaxPooledArraySize
)
{
s_pool
.
Free
(
array
);
}
}
}
}
#if false
...
...
@@ -869,36 +956,6 @@ private static double Penalty(string candidateText, string originalText)
return
0
;
}
internal
static
class
Pool
<
T
>
{
private
const
int
MaxPooledArraySize
=
256
;
// Keep around a few arrays of size 256 that we can use for operations without
// causing lots of garbage to be created. If we do compare items larger than
// that, then we will just allocate and release those arrays on demand.
private
static
ObjectPool
<
T
[
]>
s_pool
=
new
ObjectPool
<
T
[
]>
(()
=>
new
T
[
MaxPooledArraySize
]);
public
static
T
[]
GetArray
(
int
size
)
{
if
(
size
<=
MaxPooledArraySize
)
{
var
array
=
s_pool
.
Allocate
();
Array
.
Clear
(
array
,
0
,
array
.
Length
);
return
array
;
}
return
new
T
[
size
];
}
public
static
void
ReleaseArray
(
T
[]
array
)
{
if
(
array
.
Length
<=
MaxPooledArraySize
)
{
s_pool
.
Free
(
array
);
}
}
}
}
#endif
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录