提交 7599e1f8 编写于 作者: C Cyrus Najmabadi

Provide public EditDistance entrypoints for working with character arrays.

上级 4885b741
...@@ -39,24 +39,23 @@ public CacheResult(string candidate, int threshold, bool isCloseMatch, double ma ...@@ -39,24 +39,23 @@ public CacheResult(string candidate, int threshold, bool isCloseMatch, double ma
private string _source; private string _source;
private char[] _sourceLowerCaseCharacters; private char[] _sourceLowerCaseCharacters;
// private readonly int threshold; private readonly int _defaultThreshold;
// Cache the result of the last call to IsCloseMatch. We'll often be called with the same // Cache the result of the last call to IsCloseMatch. We'll often be called with the same
// value multiple times in a row, so we can avoid expensive computation by returning the // value multiple times in a row, so we can avoid expensive computation by returning the
// same value immediately. // same value immediately.
private CacheResult _lastIsCloseMatchResult; private CacheResult _lastIsCloseMatchResult;
private readonly int _defaultThreshold;
public EditDistance(string text/*, int? threshold = null*/) public EditDistance(string text)
{ {
if (text == null) if (text == null)
{ {
throw new ArgumentNullException(nameof(text)); throw new ArgumentNullException(nameof(text));
} }
this._source = text; _source = text;
this._sourceLowerCaseCharacters = ConvertToLowercaseArray(text); _sourceLowerCaseCharacters = ConvertToLowercaseArray(text);
// We only allow fairly close matches (in order to prevent too many // We only allow fairly close matches (in order to prevent too many
// spurious hits). A reasonable heuristic for this is the Log_2(length) (rounded // spurious hits). A reasonable heuristic for this is the Log_2(length) (rounded
...@@ -67,7 +66,7 @@ public EditDistance(string text/*, int? threshold = null*/) ...@@ -67,7 +66,7 @@ public EditDistance(string text/*, int? threshold = null*/)
// length 8-15: 3 edits allowed. // length 8-15: 3 edits allowed.
// //
// and so forth. // and so forth.
this._defaultThreshold = Max(1, (int)Log(text.Length, 2)); _defaultThreshold = Max(1, (int)Log(_source.Length, 2));
} }
private static char[] ConvertToLowercaseArray(string text) private static char[] ConvertToLowercaseArray(string text)
...@@ -120,9 +119,14 @@ public static int GetEditDistance(string s, string t) ...@@ -120,9 +119,14 @@ public static int GetEditDistance(string s, string t)
} }
} }
public static int GetEditDistance(char[] s, char[] t)
{
return GetEditDistance(s, t, s.Length, t.Length);
}
public int GetEditDistance(string target) public int GetEditDistance(string target)
{ {
if (this._source == null) if (this._sourceLowerCaseCharacters == null)
{ {
throw new ObjectDisposedException(nameof(EditDistance)); throw new ObjectDisposedException(nameof(EditDistance));
} }
...@@ -130,9 +134,7 @@ public int GetEditDistance(string target) ...@@ -130,9 +134,7 @@ public int GetEditDistance(string target)
var targetLowerCaseCharacters = ConvertToLowercaseArray(target); var targetLowerCaseCharacters = ConvertToLowercaseArray(target);
try try
{ {
return _source.Length <= target.Length return GetEditDistance(_sourceLowerCaseCharacters, targetLowerCaseCharacters, _source.Length, target.Length);
? GetEditDistance(_sourceLowerCaseCharacters, targetLowerCaseCharacters, _source.Length, target.Length)
: GetEditDistance(targetLowerCaseCharacters, _sourceLowerCaseCharacters, target.Length, _source.Length);
} }
finally finally
{ {
...@@ -179,6 +181,13 @@ private static void ReleaseMatrix(int[,] matrix) ...@@ -179,6 +181,13 @@ private static void ReleaseMatrix(int[,] matrix)
} }
private static int GetEditDistance(char[] source, char[] target, int sourceLength, int targetLength) private static int GetEditDistance(char[] source, char[] target, int sourceLength, int targetLength)
{
return sourceLength <= targetLength
? GetEditDistanceWorker(source, target, sourceLength, targetLength)
: GetEditDistanceWorker(target, source, targetLength, sourceLength);
}
private static int GetEditDistanceWorker(char[] source, char[] target, int sourceLength, int targetLength)
{ {
// Note: sourceLength and targetLength values will mutate and represent the lengths // Note: sourceLength and targetLength values will mutate and represent the lengths
// of the portions of the arrays we want to compare. // of the portions of the arrays we want to compare.
...@@ -286,7 +295,7 @@ public bool IsCloseMatch(string candidateText, out double matchCost) ...@@ -286,7 +295,7 @@ public bool IsCloseMatch(string candidateText, out double matchCost)
public bool IsCloseMatch(string candidateText, int? threshold, out double matchCost) public bool IsCloseMatch(string candidateText, int? threshold, out double matchCost)
{ {
if (this._source.Length < 3) if (_source.Length < 3)
{ {
// If we're comparing strings that are too short, we'll find // If we're comparing strings that are too short, we'll find
// far too many spurious hits. Don't even both in this case. // far too many spurious hits. Don't even both in this case.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册