SQLitePersistentStorage.cs 12.8 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright (c) Microsoft.  All Rights Reserved.  Licensed under the Apache License, Version 2.0.  See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using Microsoft.CodeAnalysis.Host;
using Microsoft.CodeAnalysis.Options;
using Microsoft.CodeAnalysis.SQLite.Interop;
10
using Microsoft.CodeAnalysis.Storage;
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

namespace Microsoft.CodeAnalysis.SQLite
{
    /// <summary>
    /// Implementation of an <see cref="IPersistentStorage"/> backed by SQLite.
    /// </summary>
    internal partial class SQLitePersistentStorage : AbstractPersistentStorage
    {
        private const string Version = "1";

        /// <summary>
        /// Inside the DB we have a table dedicated to storing strings that also provides a unique 
        /// integral ID per string.  This allows us to store data keyed in a much more efficient
        /// manner as we can use those IDs instead of duplicating strings all over the place.  For
        /// example, there may be many pieces of data associated with a file.  We don't want to 
        /// key off the file path in all these places as that would cause a large amount of bloat.
        /// 
        /// Because the string table can map from arbitrary strings to unique IDs, it can also be
        /// used to create IDs for compound objects.  For example, given the IDs for the FilePath
        /// and Name of a Project, we can get an ID that represents the project itself by just
        /// creating a compound key of those two IDs.  This ID can then be used in other compound
        /// situations.  For example, a Document's ID is creating by compounding its Project's 
        /// ID, along with the IDs for the Document's FilePath and Name.
        /// 
        /// The format of the table is:
        /// 
        ///  StringInfo
        ///  --------------------------------------------------------------
        ///  | Id (integer, primary key, auto increment) | Data (varchar) |
        ///  --------------------------------------------------------------
        /// </summary>
        private const string StringInfoTableName = "StringInfo" + Version;

        /// <summary>
        /// Inside the DB we have a table for data corresponding to the <see cref="Solution"/>.  The 
        /// data is just a blob that is keyed by a string Id.  Data with this ID can be retrieved
        /// or overwritten.
        /// 
        /// The format of the table is:
        /// 
        ///  SolutionData
52
        ///  -----------------------------------------------
C
CyrusNajmabadi 已提交
53
        ///  | DataId (primary key, varchar) | Data (blob) |
54
        ///  -----------------------------------------------
55 56 57 58 59 60 61 62 63 64 65 66 67 68
        /// </summary>
        private const string SolutionDataTableName = "SolutionData" + Version;

        /// <summary>
        /// Inside the DB we have a table for data that we want associated with a <see cref="Project"/>.
        /// The data is keyed off of an integral value produced by combining the ID of the Project and
        /// the ID of the name of the data (see <see cref="SQLitePersistentStorage.ReadStreamAsync(Project, string, CancellationToken)"/>.
        /// 
        /// This gives a very efficient integral key, and means that the we only have to store a 
        /// single mapping from stream name to ID in the string table.
        /// 
        /// The format of the table is:
        /// 
        ///  ProjectData
69 70 71
        ///  -----------------------------------------------
        ///  | DataId (primary key, integer) | Data (blob) |
        ///  -----------------------------------------------
72 73 74 75 76 77 78 79 80 81 82 83 84 85
        /// </summary>
        private const string ProjectDataTableName = "ProjectData" + Version;

        /// <summary>
        /// Inside the DB we have a table for data that we want associated with a <see cref="Document"/>.
        /// The data is keyed off of an integral value produced by combining the ID of the Document and
        /// the ID of the name of the data (see <see cref="SQLitePersistentStorage.ReadStreamAsync(Document, string, CancellationToken)"/>.
        /// 
        /// This gives a very efficient integral key, and means that the we only have to store a 
        /// single mapping from stream name to ID in the string table.
        /// 
        /// The format of the table is:
        /// 
        ///  DocumentData
86
        ///  ----------------------------------------------
C
CyrusNajmabadi 已提交
87
        ///  | DataId (primary key, integer) | Data (blob) |
88
        ///  ----------------------------------------------
89 90 91
        /// </summary>
        private const string DocumentDataTableName = "DocumentData" + Version;

92
        private const string DataIdColumnName = "DataId";
93 94 95 96
        private const string DataColumnName = "Data";

        private readonly CancellationTokenSource _shutdownTokenSource = new CancellationTokenSource();

97
        private readonly IDisposable _dbOwnershipLock;
98 99
        private readonly IPersistentStorageFaultInjector _faultInjectorOpt;

100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
        // Accessors that allow us to retrieve/store data into specific DB tables.  The
        // core Accessor type has logic that we to share across all reading/writing, while
        // the derived types contain only enough logic to specify how to read/write from
        // their respective tables.

        private readonly SolutionAccessor _solutionAccessor;
        private readonly ProjectAccessor _projectAccessor;
        private readonly DocumentAccessor _documentAccessor;

        // We pool connections to the DB so that we don't have to take the hit of 
        // reconnecting.  The connections also cache the prepared statements used
        // to get/set data from the db.  A connection is safe to use by one thread
        // at a time, but is not safe for simultaneous use by multiple threads.
        private readonly object _connectionGate = new object();
        private readonly Stack<SqlConnection> _connectionsPool = new Stack<SqlConnection>();

        public SQLitePersistentStorage(
            string workingFolderPath,
            string solutionFilePath,
            string databaseFile,
120
            IDisposable dbOwnershipLock,
121
            IPersistentStorageFaultInjector faultInjectorOpt)
122
            : base(workingFolderPath, solutionFilePath, databaseFile)
123
        {
124
            _dbOwnershipLock = dbOwnershipLock;
125
            _faultInjectorOpt = faultInjectorOpt;
126

127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
            _solutionAccessor = new SolutionAccessor(this);
            _projectAccessor = new ProjectAccessor(this);
            _documentAccessor = new DocumentAccessor(this);
        }

        private SqlConnection GetConnection()
        {
            lock (_connectionGate)
            {
                // If we have an available connection, just return that.
                if (_connectionsPool.Count > 0)
                {
                    return _connectionsPool.Pop();
                }
            }

            // Otherwise create a new connection.
144
            return SqlConnection.Create(_faultInjectorOpt, this.DatabaseFile);
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
        }

        private void ReleaseConnection(SqlConnection connection)
        {
            lock (_connectionGate)
            {
                // If we've been asked to shutdown, then don't actually add the connection back to 
                // the pool.  Instead, just close it as we no longer need it.
                if (_shutdownTokenSource.IsCancellationRequested)
                {
                    connection.Close_OnlyForUseBySqlPersistentStorage();
                    return;
                }

                _connectionsPool.Push(connection);
            }
        }

163
        public override void Dispose()
164 165
        {
            // Flush all pending writes so that all data our features wanted written
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
            // are definitely persisted to the DB.
            try
            {
                CloseWorker();
            }
            finally
            {
                // let the lock go
                _dbOwnershipLock.Dispose();
            }
        }

        private void CloseWorker()
        {
            // Flush all pending writes so that all data our features wanted written
181
            // are definitely persisted to the DB.
182 183 184 185 186 187 188 189 190
            try
            {
                FlushAllPendingWritesAsync(CancellationToken.None).Wait();
            }
            catch (Exception e)
            {
                // Flushing may fail.  We still have to close all our connections.
                StorageDatabaseLogger.LogException(e);
            }
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205

            lock (_connectionGate)
            {
                // Notify any outstanding async work that it should stop.
                _shutdownTokenSource.Cancel();

                // Go through all our pooled connections and close them.
                while (_connectionsPool.Count > 0)
                {
                    var connection = _connectionsPool.Pop();
                    connection.Close_OnlyForUseBySqlPersistentStorage();
                }
            }
        }

206
        /// <summary>
G
Greg Ingram 已提交
207
        /// Gets a <see cref="SqlConnection"/> from the connection pool, or creates one if none are available.
208 209 210 211 212 213
        /// </summary>
        /// <remarks>
        /// Database connections have a large amount of overhead, and should be returned to the pool when they are no
        /// longer in use. In particular, make sure to avoid letting a connection lease cross an <see langword="await"/>
        /// boundary, as it will prevent code in the asynchronous operation from using the existing connection.
        /// </remarks>
214 215 216
        private PooledConnection GetPooledConnection()
            => new PooledConnection(this, GetConnection());

217
        public void Initialize(Solution solution)
218 219 220 221 222 223
        {
            // Create a connection to the DB and ensure it has tables for the types we care about. 
            using (var pooledConnection = GetPooledConnection())
            {
                var connection = pooledConnection.Connection;

224 225 226 227 228 229 230 231 232 233 234 235 236
                // Enable write-ahead logging to increase write performance by reducing amount of disk writes,
                // by combining writes at checkpoint, salong with using sequential-only writes to populate the log.
                // Also, WAL allows for relaxed ("normal") "synchronous" mode, see below.
                connection.ExecuteCommand("pragma journal_mode=wal", throwOnError: false);

                // Set "synchronous" mode to "normal" instead of default "full" to reduce the amount of buffer flushing syscalls,
                // significantly reducing both the blocked time and the amount of context switches.
                // When coupled with WAL, this (according to https://sqlite.org/pragma.html#pragma_synchronous and 
                // https://www.sqlite.org/wal.html#performance_considerations) is unlikely to significantly affect durability,
                // while significantly increasing performance, because buffer flushing is done for each checkpoint, instead of each
                // transaction. While some writes can be lost, they are never reordered, and higher layers will recover from that.
                connection.ExecuteCommand("pragma synchronous=normal", throwOnError: false);

237 238 239
                // First, create all our tables
                connection.ExecuteCommand(
$@"create table if not exists ""{StringInfoTableName}"" (
240
    ""{DataIdColumnName}"" integer primary key autoincrement not null,
241 242
    ""{DataColumnName}"" varchar)");

C
CyrusNajmabadi 已提交
243 244 245 246 247
                // Ensure that the string-info table's 'Value' column is defined to be 'unique'.
                // We don't allow duplicate strings in this table.
                connection.ExecuteCommand(
$@"create unique index if not exists ""{StringInfoTableName}_{DataColumnName}"" on ""{StringInfoTableName}""(""{DataColumnName}"")");

248 249
                connection.ExecuteCommand(
$@"create table if not exists ""{SolutionDataTableName}"" (
250
    ""{DataIdColumnName}"" varchar primary key not null,
251 252 253 254
    ""{DataColumnName}"" blob)");

                connection.ExecuteCommand(
$@"create table if not exists ""{ProjectDataTableName}"" (
255
    ""{DataIdColumnName}"" integer primary key not null,
256 257 258 259
    ""{DataColumnName}"" blob)");

                connection.ExecuteCommand(
$@"create table if not exists ""{DocumentDataTableName}"" (
260
    ""{DataIdColumnName}"" integer primary key not null,
261 262 263
    ""{DataColumnName}"" blob)");

                // Also get the known set of string-to-id mappings we already have in the DB.
264 265 266 267 268 269
                // Do this in one batch if possible.
                var fetched = TryFetchStringTable(connection);

                // If we weren't able to retrieve the entire string table in one batch,
                // attempt to retrieve it for each 
                var fetchStringTable = !fetched;
270 271 272

                // Try to bulk populate all the IDs we'll need for strings/projects/documents.
                // Bulk population is much faster than trying to do everything individually.
273
                BulkPopulateIds(connection, solution, fetchStringTable);
274 275 276
            }
        }
    }
S
Sam Harwell 已提交
277
}