Merge pull request #19374 from CyrusNajmabadi/serializationSimplification

Simplify serialization.

Merge pull request #19374 from CyrusNajmabadi/serializationSimplification
Simplify serialization.
a404917e · CyrusNajmabadi · GitHub · 5e053aaf · 969b5cb1 · a404917e
4 changed file
--- a/src/Compilers/Core/Portable/Serialization/IObjectWritable.cs
+++ b/src/Compilers/Core/Portable/Serialization/IObjectWritable.cs
@@ -10,4 +10,4 @@ internal interface IObjectWritable
    {
        void WriteTo(ObjectWriter writer);
    }
-}
+}
\ No newline at end of file
--- a/src/Compilers/Core/Portable/Serialization/ObjectBinder.cs
+++ b/src/Compilers/Core/Portable/Serialization/ObjectBinder.cs
@@ -2,8 +2,6 @@

 using System;
 using System.Collections.Generic;
-using System.Collections.Immutable;
-using System.Linq;

 namespace Roslyn.Utilities
 {

--- a/src/Compilers/Core/Portable/Serialization/ObjectReader.cs
+++ b/src/Compilers/Core/Portable/Serialization/ObjectReader.cs
@@ -2,10 +2,8 @@

 using System;
 using System.Collections.Generic;
-using System.Collections.Immutable;
 using System.Diagnostics;
 using System.IO;
-using System.Runtime.CompilerServices;
 using System.Text;
 using System.Threading;
 using System.Threading.Tasks;
@@ -32,16 +30,18 @@ internal sealed partial class ObjectReader : IDisposable
        /// this version, just change VersionByte2.
        /// </summary>
        internal const byte VersionByte1 = 0b10101010;
-        internal const byte VersionByte2 = 0b00000111;
+        internal const byte VersionByte2 = 0b00001000;

        private readonly BinaryReader _reader;
        private readonly CancellationToken _cancellationToken;

        /// <summary>
        /// Map of reference id's to deserialized objects.
+        ///
+        /// These are not readonly because they're structs and we mutate them.
        /// </summary>
-        private readonly ReaderReferenceMap<object> _objectReferenceMap;
-        private readonly ReaderReferenceMap<string> _stringReferenceMap;
+        private ReaderReferenceMap<object> _objectReferenceMap;
+        private ReaderReferenceMap<string> _stringReferenceMap;

        /// <summary>
        /// Copy of the global binder data that maps from Types to the appropriate reading-function
@@ -67,8 +67,8 @@ internal sealed partial class ObjectReader : IDisposable
            Debug.Assert(BitConverter.IsLittleEndian);

            _reader = new BinaryReader(stream, Encoding.UTF8);
-            _objectReferenceMap = new ReaderReferenceMap<object>();
-            _stringReferenceMap = new ReaderReferenceMap<string>();
+            _objectReferenceMap = ReaderReferenceMap<object>.Create();
+            _stringReferenceMap = ReaderReferenceMap<string>.Create();

            // Capture a copy of the current static binder state.  That way we don't have to 
            // access any locks while we're doing our processing.
@@ -227,17 +227,18 @@ private object ReadValueWorker()
        /// <summary>
        /// An reference-id to object map, that can share base data efficiently.
        /// </summary>
-        private class ReaderReferenceMap<T> where T : class
+        private struct ReaderReferenceMap<T> where T : class
        {
            private readonly List<T> _values;

            internal static readonly ObjectPool<List<T>> s_objectListPool
                = new ObjectPool<List<T>>(() => new List<T>(20));

-            public ReaderReferenceMap()
-            {
-                _values = s_objectListPool.Allocate();
-            }
+            private ReaderReferenceMap(List<T> values)
+                => _values = values;
+
+            public static ReaderReferenceMap<T> Create()
+                => new ReaderReferenceMap<T>(s_objectListPool.Allocate());

            public void Dispose()
            {
@@ -245,21 +246,22 @@ public void Dispose()
                s_objectListPool.Free(_values);
            }

-            public int GetNextReferenceId()
+
+            public int GetNextObjectId()
            {
+                var id = _values.Count;
                _values.Add(null);
-                return _values.Count - 1;
+                return id;
            }

-            public void SetValue(int referenceId, T value)
-            {
-                _values[referenceId] = value;
-            }
+            public void AddValue(T value)
+                => _values.Add(value);
+
+            public void AddValue(int index, T value)
+                => _values[index] = value;

            public T GetValue(int referenceId)
-            {
-                return _values[referenceId];
-            }
+                => _values[referenceId];
        }

        internal uint ReadCompressedUInt()
@@ -321,7 +323,6 @@ private string ReadStringValue(EncodingKind kind)

        private unsafe string ReadStringLiteral(EncodingKind kind)
        {
-            int id = _stringReferenceMap.GetNextReferenceId();
            string value;
            if (kind == EncodingKind.StringUtf8)
            {
@@ -338,7 +339,7 @@ private unsafe string ReadStringLiteral(EncodingKind kind)
                }
            }

-            _stringReferenceMap.SetValue(id, value);
+            _stringReferenceMap.AddValue(value);
            return value;
        }

@@ -576,21 +577,18 @@ public Type ReadType()
        private Type ReadTypeAfterTag()
            => _binderSnapshot.GetTypeFromId(this.ReadInt32());

-        private Func<ObjectReader, object> ReadTypeReader()
-        {
-            _reader.ReadByte();
-            return _binderSnapshot.GetTypeReaderFromId(this.ReadInt32());
-        }
-
        private object ReadObject()
        {
-            var id = _objectReferenceMap.GetNextReferenceId();
+            var objectId = _objectReferenceMap.GetNextObjectId();
+
+            // reading an object may recurse.  So we need to grab our ID up front as we'll
+            // end up making our sub-objects before we make this object.

-            var typeReader = this.ReadTypeReader();
+            var typeReader = _binderSnapshot.GetTypeReaderFromId(this.ReadInt32());

            // recursive: read and construct instance immediately from member elements encoding next in the stream
            var instance = typeReader(this);
-            _objectReferenceMap.SetValue(id, instance);
+            _objectReferenceMap.AddValue(objectId, instance);
            return instance;
        }

@@ -609,4 +607,4 @@ private static Exception NoSerializationReaderException(string typeName)
            return new InvalidOperationException(string.Format(Resources.Cannot_serialize_type_0, typeName));
        }
    }
-}
+}
\ No newline at end of file
--- a/src/Compilers/Core/Portable/Serialization/ObjectWriter.cs
+++ b/src/Compilers/Core/Portable/Serialization/ObjectWriter.cs
@@ -32,9 +32,26 @@ internal sealed partial class ObjectWriter : IDisposable
        /// Map of serialized object's reference ids.  The object-reference-map uses reference equality
        /// for performance.  While the string-reference-map uses value-equality for greater cache hits 
        /// and reuse.
+        /// 
+        /// These are not readonly because they're structs and we mutate them.
+        /// 
+        /// When we write out objects/strings we give each successive, unique, item a monotonically 
+        /// increasing integral ID starting at 0.  I.e. the first object gets ID-0, the next gets 
+        /// ID-1 and so on and so forth.  We do *not* include these IDs with the object when it is
+        /// written out.  We only include the ID if we hit the object *again* while writing.
+        /// 
+        /// During reading, the reader knows to give each object it reads the same monotonically 
+        /// increasing integral value.  i.e. the first object it reads is put into an array at position
+        /// 0, the next at position 1, and so on.  Then, when the reader reads in an object-reference
+        /// it can just retrieved it directly from that array.
+        /// 
+        /// In other words, writing and reading take advantage of the fact that they know they will
+        /// write and read objects in the exact same order.  So they only need the IDs for references
+        /// and not the objects themselves because the ID is inferred from the order the object is
+        /// written or read in.
        /// </summary>
-        private readonly WriterReferenceMap _objectReferenceMap;
-        private readonly WriterReferenceMap _stringReferenceMap;
+        private WriterReferenceMap _objectReferenceMap;
+        private WriterReferenceMap _stringReferenceMap;

        /// <summary>
        /// Copy of the global binder data that maps from Types to the appropriate reading-function
@@ -279,7 +296,7 @@ private void WriteEncodedUInt32(uint v)
        /// <summary>
        /// An object reference to reference-id map, that can share base data efficiently.
        /// </summary>
-        private class WriterReferenceMap
+        private struct WriterReferenceMap
        {
            private readonly Dictionary<object, int> _valueToIdMap;
            private readonly bool _valueEquality;
@@ -294,16 +311,16 @@ private class WriterReferenceMap
            public WriterReferenceMap(bool valueEquality)
            {
                _valueEquality = valueEquality;
-                _valueToIdMap = GetDictionaryPool().Allocate();
+                _valueToIdMap = GetDictionaryPool(valueEquality).Allocate();
                _nextId = 0;
            }

-            private ObjectPool<Dictionary<object, int>> GetDictionaryPool()
-                => _valueEquality ? s_valueDictionaryPool : s_referenceDictionaryPool;
+            private static ObjectPool<Dictionary<object, int>> GetDictionaryPool(bool valueEquality)
+                => valueEquality ? s_valueDictionaryPool : s_referenceDictionaryPool;

            public void Dispose()
            {
-                var pool = GetDictionaryPool();
+                var pool = GetDictionaryPool(_valueEquality);

                // If the map grew too big, don't return it to the pool.
                // When testing with the Roslyn solution, this dropped only 2.5% of requests.
@@ -319,15 +336,12 @@ public void Dispose()
            }

            public bool TryGetReferenceId(object value, out int referenceId)
-            {
-                return _valueToIdMap.TryGetValue(value, out referenceId);
-            }
+                => _valueToIdMap.TryGetValue(value, out referenceId);

-            public int Add(object value)
+            public void Add(object value)
            {
                var id = _nextId++;
                _valueToIdMap.Add(value, id);
-                return id;
            }
        }

@@ -728,7 +742,8 @@ private void WriteObject(object instance, IObjectWritable instanceAsWritableOpt)
                    // don't blow the stack.  'LongRunning' ensures that we get a dedicated thread
                    // to do this work.  That way we don't end up blocking the threadpool.
                    var task = Task.Factory.StartNew(
-                        () => WriteObjectWorker(instance, writable),
+                        obj => WriteObjectWorker((IObjectWritable)obj),
+                        writable,
                        _cancellationToken,
                        TaskCreationOptions.LongRunning,
                        TaskScheduler.Default);
@@ -736,7 +751,7 @@ private void WriteObject(object instance, IObjectWritable instanceAsWritableOpt)
                }
                else
                {
-                    WriteObjectWorker(instance, writable);
+                    WriteObjectWorker(writable);
                }

                _recursionDepth--;
@@ -744,19 +759,17 @@ private void WriteObject(object instance, IObjectWritable instanceAsWritableOpt)
            }
        }

-        private void WriteObjectWorker(object instance, IObjectWritable writable)
+        private void WriteObjectWorker(IObjectWritable writable)
        {
            // emit object header up front
-            this.WriteObjectHeader(instance, 0);
-            writable.WriteTo(this);
-        }
-
-        private void WriteObjectHeader(object instance, uint memberCount)
-        {
-            _objectReferenceMap.Add(instance);
+            _objectReferenceMap.Add(writable);

            _writer.Write((byte)EncodingKind.Object);
-            this.WriteKnownType(instance.GetType());
+
+            // Directly write out the type-id for this object.  i.e. no need to write out the 'Type'
+            // tag since we just wrote out the 'Object' tag
+            this.WriteInt32(_binderSnapshot.GetTypeId(writable.GetType()));
+            writable.WriteTo(this);
        }

        private static Exception NoSerializationTypeException(string typeName)