提交 b6838a74 编写于 作者: S Stephen Toub

Improve performance of Regex ctor and IsMatch

The Regex class maintains a cache of byte codes, which the Regex ctor indexes into using a key.  It uses this seemingly innocuous line to create that key:

String key = ((int)options).ToString(NumberFormatInfo.InvariantInfo) + ":" + cultureKey + ":" + pattern;

This, however, has the unfortunate effect of allocating a string for the options, a string array for the five strings to be passed to the String.Concat call generated by the compiler, another string array allocation inside of Concat, and then the resulting string for the whole operation.  The cost of those allocations is causing a non-trivial slowdown for repeated Regex.IsMatch calls for simple regular expressions, such as for a phone number (e.g. "^\\d{3}-\\d{3}-\\d{4}$").

This commit adds a new struct key type that just stores the constitutent options, cultureKey, and pattern, rather than creating a string to store them.  That key is then what's stored in each entry in the cache.

For repeated Regex.IsMatch calls for basic regular expressions like the phone number one previously mentioned, on my machine this improves throughput by ~35%, in large part due to ~80% reduction in number of allocations, and (for this particular test case) an ~70% reduction in number of bytes allocated (it depends primarily on the length of the pattern and the length of the culture name).


Commit migrated from https://github.com/dotnet/corefx/commit/9820567e1bb1972502bfe6345963eb0ab99ef3f9
上级 d4f86b03
......@@ -124,7 +124,7 @@ private Regex(String pattern, RegexOptions options, TimeSpan matchTimeout, bool
else
cultureKey = CultureInfo.CurrentCulture.ToString();
String key = ((int)options).ToString(NumberFormatInfo.InvariantInfo) + ":" + cultureKey + ":" + pattern;
var key = new CachedCodeEntryKey(options, cultureKey, pattern);
cached = LookupCachedAndUpdate(key);
_pattern = pattern;
......@@ -916,7 +916,7 @@ internal Match Run(bool quick, int prevlen, String input, int beginning, int len
/*
* Find code cache based on options+pattern
*/
private static CachedCodeEntry LookupCachedAndUpdate(String key)
private static CachedCodeEntry LookupCachedAndUpdate(CachedCodeEntryKey key)
{
lock (s_livecode)
{
......@@ -938,7 +938,7 @@ private static CachedCodeEntry LookupCachedAndUpdate(String key)
/*
* Add current code to the cache
*/
private CachedCodeEntry CacheCode(String key)
private CachedCodeEntry CacheCode(CachedCodeEntryKey key)
{
CachedCodeEntry newcached = null;
......@@ -1003,13 +1003,54 @@ internal bool Debug
*/
public delegate String MatchEvaluator(Match match);
/*
* Used as a key for CacheCodeEntry
*/
internal struct CachedCodeEntryKey : IEquatable<CachedCodeEntryKey>
{
private readonly RegexOptions _options;
private readonly string _cultureKey;
private readonly string _pattern;
internal CachedCodeEntryKey(RegexOptions options, string cultureKey, string pattern)
{
_options = options;
_cultureKey = cultureKey;
_pattern = pattern;
}
public override bool Equals(object obj)
{
return obj is CachedCodeEntryKey ? Equals((CachedCodeEntryKey)obj) : false;
}
public bool Equals(CachedCodeEntryKey other)
{
return this == other;
}
public static bool operator ==(CachedCodeEntryKey left, CachedCodeEntryKey right)
{
return left._options == right._options && left._cultureKey == right._cultureKey && left._pattern == right._pattern;
}
public static bool operator !=(CachedCodeEntryKey left, CachedCodeEntryKey right)
{
return !(left == right);
}
public override int GetHashCode()
{
return ((int)_options) ^ _cultureKey.GetHashCode() ^ _pattern.GetHashCode();
}
}
/*
* Used to cache byte codes
*/
internal sealed class CachedCodeEntry
{
internal string _key;
internal CachedCodeEntryKey _key;
internal RegexCode _code;
internal Dictionary<Int32, Int32> _caps;
internal Dictionary<String, Int32> _capnames;
......@@ -1018,7 +1059,7 @@ internal sealed class CachedCodeEntry
internal ExclusiveReference _runnerref;
internal SharedReference _replref;
internal CachedCodeEntry(string key, Dictionary<String, Int32> capnames, String[] capslist, RegexCode code, Dictionary<Int32, Int32> caps, int capsize, ExclusiveReference runner, SharedReference repl)
internal CachedCodeEntry(CachedCodeEntryKey key, Dictionary<String, Int32> capnames, String[] capslist, RegexCode code, Dictionary<Int32, Int32> caps, int capsize, ExclusiveReference runner, SharedReference repl)
{
_key = key;
_capnames = capnames;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册