Remove some overhead from XmlNode.SelectSingleNode (#54299)

* Remove unnecessary cost from XmlNode.SelectSingleNode It's currently implemented by delegating to SelectSingleNodes and returning the first one. While the list is lazily-populated, this still entails creating an `XPathNodeList`, creating a `List<XmlNode>`, storing the enumerated into the list, and then returning the element from the list, which is then thrown away. With just a few lines, we can cut through all of that. * Make XPathScanner and XPathParser into structs No need for these to be allocated classes. They can just live on the stack where they're created. * Reduce ScanName overhead This shows up on the hot path of parsing the xpath expression. We can use a span to avoid bounds checking. * Avoid bounds check in NextChar NextChar is used by lots of routines to advance to the next character. We can streamline it to avoid the bounds check when indexing into the string. * Optimize SkipSpace for there not being any The typical case is there isn't any whitespace, so inline that fast check. This was showing up as a few percentage of a simple scenario.

Remove some overhead from XmlNode.SelectSingleNode (#54299)
* Remove unnecessary cost from XmlNode.SelectSingleNode It's currently implemented by delegating to SelectSingleNodes and returning the first one. While the list is lazily-populated, this still entails creating an `XPathNodeList`, creating a `List<XmlNode>`, storing the enumerated into the list, and then returning the element from the list, which is then thrown away. With just a few lines, we can cut through all of that. * Make XPathScanner and XPathParser into structs No need for these to be allocated classes. They can just live on the stack where they're created. * Reduce ScanName overhead This shows up on the hot path of parsing the xpath expression. We can use a span to avoid bounds checking. * Avoid bounds check in NextChar NextChar is used by lots of routines to advance to the next character. We can streamline it to avoid the bounds check when indexing into the string. * Optimize SkipSpace for there not being any The typical case is there isn't any whitespace, so inline that fast check. This was showing up as a few percentage of a simple scenario.
bbc3366a · Stephen Toub · GitHub · 386d0b19 · bbc3366a · bbc3366a
3 changed file
--- a/src/libraries/System.Private.Xml/src/System/Xml/Dom/XmlNode.cs
+++ b/src/libraries/System.Private.Xml/src/System/Xml/Dom/XmlNode.cs
@@ -48,9 +48,17 @@ internal XmlNode(XmlDocument doc)
        // Selects the first node that matches the xpath expression
        public XmlNode? SelectSingleNode(string xpath)
        {
-            XmlNodeList? list = SelectNodes(xpath);
-            // SelectNodes returns null for certain node types
-            return list != null ? list[0] : null;
+            if (CreateNavigator() is XPathNavigator navigator)
+            {
+                XPathNodeIterator nodeIterator = navigator.Select(xpath);
+                if (nodeIterator.MoveNext())
+                {
+                    Debug.Assert(nodeIterator.Current != null);
+                    return ((IHasXmlNode)nodeIterator.Current).GetNode();
+                }
+            }
+
+            return null;
        }

        // Selects the first node that matches the xpath expression and given namespace context.

--- a/src/libraries/System.Private.Xml/src/System/Xml/XPath/Internal/XPathParser.cs
+++ b/src/libraries/System.Private.Xml/src/System/Xml/XPath/Internal/XPathParser.cs
@@ -9,35 +9,34 @@

 namespace MS.Internal.Xml.XPath
 {
-    internal sealed class XPathParser
+    internal struct XPathParser
    {
-        private readonly XPathScanner _scanner;
+        private XPathScanner _scanner;

-        private XPathParser(XPathScanner scanner)
+        private XPathParser(string xpathExpr)
        {
-            _scanner = scanner;
+            _scanner = new XPathScanner(xpathExpr);
+            _parseDepth = 0;
        }

        public static AstNode ParseXPathExpression(string xpathExpression)
        {
-            XPathScanner scanner = new XPathScanner(xpathExpression);
-            XPathParser parser = new XPathParser(scanner);
+            XPathParser parser = new XPathParser(xpathExpression);
            AstNode result = parser.ParseExpression(null);
-            if (scanner.Kind != XPathScanner.LexKind.Eof)
+            if (parser._scanner.Kind != XPathScanner.LexKind.Eof)
            {
-                throw XPathException.Create(SR.Xp_InvalidToken, scanner.SourceText);
+                throw XPathException.Create(SR.Xp_InvalidToken, parser._scanner.SourceText);
            }
            return result;
        }

        public static AstNode ParseXPathPattern(string xpathPattern)
        {
-            XPathScanner scanner = new XPathScanner(xpathPattern);
-            XPathParser parser = new XPathParser(scanner);
+            XPathParser parser = new XPathParser(xpathPattern);
            AstNode result = parser.ParsePattern();
-            if (scanner.Kind != XPathScanner.LexKind.Eof)
+            if (parser._scanner.Kind != XPathScanner.LexKind.Eof)
            {
-                throw XPathException.Create(SR.Xp_InvalidToken, scanner.SourceText);
+                throw XPathException.Create(SR.Xp_InvalidToken, parser._scanner.SourceText);
            }
            return result;
        }
@@ -224,17 +223,12 @@ private AstNode ParseUnionExpr(AstNode? qyInput)
            } while (true);
        }

-        private static bool IsNodeType(XPathScanner scaner)
-        {
-            return (
-                scaner.Prefix.Length == 0 && (
-                    scaner.Name == "node" ||
-                    scaner.Name == "text" ||
-                    scaner.Name == "processing-instruction" ||
-                    scaner.Name == "comment"
-                )
-            );
-        }
+        private bool IsNodeType =>
+            _scanner.Prefix.Length == 0 &&
+            (_scanner.Name == "node" ||
+             _scanner.Name == "text" ||
+             _scanner.Name == "processing-instruction" ||
+             _scanner.Name == "comment");

        //>> PathOp   ::= '/' | '//'
        //>> PathExpr ::= LocationPath |
@@ -242,7 +236,7 @@ private static bool IsNodeType(XPathScanner scaner)
        private AstNode ParsePathExpr(AstNode? qyInput)
        {
            AstNode opnd;
-            if (IsPrimaryExpr(_scanner))
+            if (IsPrimaryExpr)
            { // in this moment we should distinct LocationPas vs FilterExpr (which starts from is PrimaryExpr)
                opnd = ParseFilterExpr(qyInput);
                if (_scanner.Kind == XPathScanner.LexKind.Slash)
@@ -407,7 +401,7 @@ private AstNode ParseNodeTest(AstNode? qyInput, Axis.AxisType axisType, XPathNod
            switch (_scanner.Kind)
            {
                case XPathScanner.LexKind.Name:
-                    if (_scanner.CanBeFunction && IsNodeType(_scanner))
+                    if (_scanner.CanBeFunction && IsNodeType)
                    {
                        nodePrefix = string.Empty;
                        nodeName = string.Empty;
@@ -457,21 +451,17 @@ private AstNode ParseNodeTest(AstNode? qyInput, Axis.AxisType axisType, XPathNod
            return new Axis(axisType, qyInput, nodePrefix, nodeName, nodeType);
        }

-        private static bool IsPrimaryExpr(XPathScanner scanner)
-        {
-            return (
-                scanner.Kind == XPathScanner.LexKind.String ||
-                scanner.Kind == XPathScanner.LexKind.Number ||
-                scanner.Kind == XPathScanner.LexKind.Dollar ||
-                scanner.Kind == XPathScanner.LexKind.LParens ||
-                scanner.Kind == XPathScanner.LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner)
-            );
-        }
+        private bool IsPrimaryExpr =>
+            _scanner.Kind == XPathScanner.LexKind.String ||
+            _scanner.Kind == XPathScanner.LexKind.Number ||
+            _scanner.Kind == XPathScanner.LexKind.Dollar ||
+            _scanner.Kind == XPathScanner.LexKind.LParens ||
+            _scanner.Kind == XPathScanner.LexKind.Name && _scanner.CanBeFunction && !IsNodeType;

        //>> PrimaryExpr ::= Literal | Number | VariableReference | '(' Expr ')' | FunctionCall
        private AstNode ParsePrimaryExpr(AstNode? qyInput)
        {
-            Debug.Assert(IsPrimaryExpr(_scanner));
+            Debug.Assert(IsPrimaryExpr);
            AstNode? opnd = null;
            switch (_scanner.Kind)
            {
@@ -499,7 +489,7 @@ private AstNode ParsePrimaryExpr(AstNode? qyInput)
                    PassToken(XPathScanner.LexKind.RParens);
                    break;
                case XPathScanner.LexKind.Name:
-                    if (_scanner.CanBeFunction && !IsNodeType(_scanner))
+                    if (_scanner.CanBeFunction && !IsNodeType)
                    {
                        opnd = ParseMethod(null);
                    }

--- a/src/libraries/System.Private.Xml/src/System/Xml/XPath/Internal/XPathScanner.cs
+++ b/src/libraries/System.Private.Xml/src/System/Xml/XPath/Internal/XPathScanner.cs
@@ -4,12 +4,13 @@
 using System;
 using System.Diagnostics;
 using System.Globalization;
+using System.Runtime.CompilerServices;
 using System.Xml;
 using System.Xml.XPath;

 namespace MS.Internal.Xml.XPath
 {
-    internal sealed class XPathScanner
+    internal struct XPathScanner
    {
        private readonly string _xpathExpr;
        private int _xpathExprIndex;
@@ -18,16 +19,17 @@ internal sealed class XPathScanner
        private string? _name;
        private string? _prefix;
        private string? _stringValue;
-        private double _numberValue = double.NaN;
+        private double _numberValue;
        private bool _canBeFunction;

-        public XPathScanner(string xpathExpr)
+        public XPathScanner(string xpathExpr) : this()
        {
            if (xpathExpr == null)
            {
                throw XPathException.Create(SR.Xp_ExprExpected, string.Empty);
            }
            _xpathExpr = xpathExpr;
+            _numberValue = double.NaN;
            NextChar();
            NextLex();
        }
@@ -39,16 +41,18 @@ public XPathScanner(string xpathExpr)
        private bool NextChar()
        {
            Debug.Assert(0 <= _xpathExprIndex && _xpathExprIndex <= _xpathExpr.Length);
-            if (_xpathExprIndex < _xpathExpr.Length)
+
+            string expr = _xpathExpr;
+            int index = _xpathExprIndex;
+            if ((uint)index < (uint)expr.Length)
            {
-                _currentChar = _xpathExpr[_xpathExprIndex++];
+                _currentChar = expr[index];
+                _xpathExprIndex = index + 1;
                return true;
            }
-            else
-            {
-                _currentChar = '\0';
-                return false;
-            }
+
+            _currentChar = '\0';
+            return false;
        }

        public LexKind Kind { get { return _kind; } }
@@ -104,9 +108,19 @@ public bool CanBeFunction
            }
        }

+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private void SkipSpace()
        {
-            while (XmlCharType.IsWhiteSpace(CurrentChar) && NextChar()) ;
+            if (XmlCharType.IsWhiteSpace(CurrentChar))
+            {
+                SkipKnownSpace();
+            }
+        }
+
+        private void SkipKnownSpace()
+        {
+            Debug.Assert(XmlCharType.IsWhiteSpace(CurrentChar));
+            while (NextChar() && XmlCharType.IsWhiteSpace(CurrentChar));
        }

        public bool NextLex()
@@ -312,23 +326,26 @@ private string ScanString()

        private string ScanName()
        {
-            Debug.Assert(XmlCharType.IsStartNCNameSingleChar(CurrentChar));
-            int start = _xpathExprIndex - 1;
-            int len = 0;
+            ReadOnlySpan<char> span = _xpathExpr.AsSpan(_xpathExprIndex - 1);

-            while (true)
+            Debug.Assert(!span.IsEmpty);
+            Debug.Assert(span[0] == CurrentChar);
+            Debug.Assert(XmlCharType.IsStartNCNameSingleChar(span[0]));
+            Debug.Assert(XmlCharType.IsNCNameSingleChar(span[0]));
+
+            int i;
+            for (i = 1; i < span.Length && XmlCharType.IsNCNameSingleChar(span[i]); i++);
+
+            if ((uint)i < (uint)span.Length)
            {
-                if (XmlCharType.IsNCNameSingleChar(CurrentChar))
-                {
-                    NextChar();
-                    len++;
-                }
-                else
-                {
-                    break;
-                }
+                _currentChar = span[i];
+                _xpathExprIndex += i;
+                return span.Slice(0, i).ToString();
            }
-            return _xpathExpr.Substring(start, len);
+
+            _currentChar = '\0';
+            _xpathExprIndex += i - 1;
+            return span.ToString();
        }

        public enum LexKind