未验证 提交 6aa6b3e2 编写于 作者: A Alexandru Dima 提交者: GitHub

Merge pull request #77740 from microsoft/alex/tokenization

Towards moving tokenization off the main thread
......@@ -119,7 +119,8 @@ const copyrightFilter = [
'!resources/completions/**',
'!extensions/markdown-language-features/media/highlight.css',
'!extensions/html-language-features/server/src/modes/typescript/*',
'!extensions/*/server/bin/*'
'!extensions/*/server/bin/*',
'!src/vs/editor/test/node/classification/typescript-test.ts',
];
const eslintFilter = [
......
......@@ -130,4 +130,4 @@
}
]
}
}
\ No newline at end of file
}
......@@ -105,7 +105,7 @@ export class VSBuffer {
}
}
function readUInt32BE(source: Uint8Array, offset: number): number {
export function readUInt32BE(source: Uint8Array, offset: number): number {
return (
source[offset] * 2 ** 24
+ source[offset + 1] * 2 ** 16
......@@ -114,7 +114,7 @@ function readUInt32BE(source: Uint8Array, offset: number): number {
);
}
function writeUInt32BE(destination: Uint8Array, value: number, offset: number): void {
export function writeUInt32BE(destination: Uint8Array, value: number, offset: number): void {
destination[offset + 3] = value;
value = value >>> 8;
destination[offset + 2] = value;
......
......@@ -14,6 +14,7 @@ import { IModelContentChange, IModelContentChangedEvent, IModelDecorationsChange
import { SearchData } from 'vs/editor/common/model/textModelSearch';
import { LanguageId, LanguageIdentifier, FormattingOptions } from 'vs/editor/common/modes';
import { ThemeColor } from 'vs/platform/theme/common/themeService';
import { MultilineTokens } from 'vs/editor/common/model/tokensStore';
/**
* Vertical Lane in the overview ruler of the editor.
......@@ -779,6 +780,11 @@ export interface ITextModel {
*/
findPreviousMatch(searchString: string, searchStart: IPosition, isRegex: boolean, matchCase: boolean, wordSeparators: string | null, captureMatches: boolean): FindMatch | null;
/**
* @internal
*/
setTokens(tokens: MultilineTokens[]): void;
/**
* Flush all tokenization state.
* @internal
......
......@@ -22,7 +22,7 @@ import { IntervalNode, IntervalTree, getNodeIsInOverviewRuler, recomputeMaxEnd }
import { PieceTreeTextBufferBuilder } from 'vs/editor/common/model/pieceTreeTextBuffer/pieceTreeTextBufferBuilder';
import { IModelContentChangedEvent, IModelDecorationsChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelOptionsChangedEvent, IModelTokensChangedEvent, InternalModelContentChangeEvent, ModelRawChange, ModelRawContentChangedEvent, ModelRawEOLChanged, ModelRawFlush, ModelRawLineChanged, ModelRawLinesDeleted, ModelRawLinesInserted } from 'vs/editor/common/model/textModelEvents';
import { SearchData, SearchParams, TextModelSearch } from 'vs/editor/common/model/textModelSearch';
import { TextModelTokenization, countEOL } from 'vs/editor/common/model/textModelTokens';
import { TextModelTokenization } from 'vs/editor/common/model/textModelTokens';
import { getWordAtText } from 'vs/editor/common/model/wordHelper';
import { LanguageId, LanguageIdentifier, FormattingOptions } from 'vs/editor/common/modes';
import { LanguageConfigurationRegistry } from 'vs/editor/common/modes/languageConfigurationRegistry';
......@@ -32,7 +32,7 @@ import { BracketsUtils, RichEditBracket, RichEditBrackets } from 'vs/editor/comm
import { ITheme, ThemeColor } from 'vs/platform/theme/common/themeService';
import { withUndefinedAsNull } from 'vs/base/common/types';
import { VSBufferReadableStream, VSBuffer } from 'vs/base/common/buffer';
import { TokensStore, MultilineTokens } from 'vs/editor/common/model/tokensStore';
import { TokensStore, MultilineTokens, countEOL } from 'vs/editor/common/model/tokensStore';
import { Color } from 'vs/base/common/color';
function createTextBufferBuilder() {
......@@ -1279,7 +1279,7 @@ export class TextModel extends Disposable implements model.ITextModel {
for (let i = 0, len = contentChanges.length; i < len; i++) {
const change = contentChanges[i];
const [eolCount, firstLineLength] = countEOL(change.text);
this._tokens.applyEdits(change.range, eolCount, firstLineLength);
this._tokens.acceptEdit(change.range, eolCount, firstLineLength);
this._onDidChangeDecorations.fire();
this._decorationsTree.acceptReplace(change.rangeOffset, change.rangeLength, change.text.length, change.forceMoveMarkers);
......@@ -1704,7 +1704,7 @@ export class TextModel extends Disposable implements model.ITextModel {
//#region Tokenization
public setLineTokens(lineNumber: number, tokens: Uint32Array): void {
public setLineTokens(lineNumber: number, tokens: Uint32Array | ArrayBuffer | null): void {
if (lineNumber < 1 || lineNumber > this.getLineCount()) {
throw new Error('Illegal value for lineNumber');
}
......
......@@ -15,38 +15,7 @@ import { nullTokenize2 } from 'vs/editor/common/modes/nullMode';
import { TextModel } from 'vs/editor/common/model/textModel';
import { Disposable } from 'vs/base/common/lifecycle';
import { StopWatch } from 'vs/base/common/stopwatch';
import { CharCode } from 'vs/base/common/charCode';
import { MultilineTokensBuilder } from 'vs/editor/common/model/tokensStore';
export function countEOL(text: string): [number, number] {
let eolCount = 0;
let firstLineLength = 0;
for (let i = 0, len = text.length; i < len; i++) {
const chr = text.charCodeAt(i);
if (chr === CharCode.CarriageReturn) {
if (eolCount === 0) {
firstLineLength = i;
}
eolCount++;
if (i + 1 < len && text.charCodeAt(i + 1) === CharCode.LineFeed) {
// \r\n... case
i++; // skip \n
} else {
// \r... case
}
} else if (chr === CharCode.LineFeed) {
if (eolCount === 0) {
firstLineLength = i;
}
eolCount++;
}
}
if (eolCount === 0) {
firstLineLength = text.length;
}
return [eolCount, firstLineLength];
}
import { MultilineTokensBuilder, countEOL } from 'vs/editor/common/model/tokensStore';
const enum Constants {
CHEAP_TOKENIZATION_LENGTH_LIMIT = 2048
......@@ -117,6 +86,9 @@ export class TokenizationStateStore {
if (deleteCount === 0) {
return;
}
if (start + deleteCount > this._len) {
deleteCount = this._len - start;
}
this._beginState.splice(start, deleteCount);
this._valid.splice(start, deleteCount);
this._len -= deleteCount;
......
......@@ -8,6 +8,38 @@ import { LineTokens } from 'vs/editor/common/core/lineTokens';
import { Position } from 'vs/editor/common/core/position';
import { IRange } from 'vs/editor/common/core/range';
import { ColorId, FontStyle, LanguageId, MetadataConsts, StandardTokenType, TokenMetadata } from 'vs/editor/common/modes';
import { writeUInt32BE, readUInt32BE } from 'vs/base/common/buffer';
import { CharCode } from 'vs/base/common/charCode';
export function countEOL(text: string): [number, number] {
let eolCount = 0;
let firstLineLength = 0;
for (let i = 0, len = text.length; i < len; i++) {
const chr = text.charCodeAt(i);
if (chr === CharCode.CarriageReturn) {
if (eolCount === 0) {
firstLineLength = i;
}
eolCount++;
if (i + 1 < len && text.charCodeAt(i + 1) === CharCode.LineFeed) {
// \r\n... case
i++; // skip \n
} else {
// \r... case
}
} else if (chr === CharCode.LineFeed) {
if (eolCount === 0) {
firstLineLength = i;
}
eolCount++;
}
}
if (eolCount === 0) {
firstLineLength = text.length;
}
return [eolCount, firstLineLength];
}
function getDefaultMetadata(topLevelLanguageId: LanguageId): number {
return (
......@@ -39,23 +71,226 @@ export class MultilineTokensBuilder {
return;
}
}
this.tokens.push(new MultilineTokens(lineNumber, lineTokens));
this.tokens.push(new MultilineTokens(lineNumber, [lineTokens]));
}
public static deserialize(buff: Uint8Array): MultilineTokens[] {
let offset = 0;
const count = readUInt32BE(buff, offset); offset += 4;
let result: MultilineTokens[] = [];
for (let i = 0; i < count; i++) {
offset = MultilineTokens.deserialize(buff, offset, result);
}
return result;
}
public serialize(): Uint8Array {
const size = this._serializeSize();
const result = new Uint8Array(size);
this._serialize(result);
return result;
}
private _serializeSize(): number {
let result = 0;
result += 4; // 4 bytes for the count
for (let i = 0; i < this.tokens.length; i++) {
result += this.tokens[i].serializeSize();
}
return result;
}
private _serialize(destination: Uint8Array): void {
let offset = 0;
writeUInt32BE(destination, this.tokens.length, offset); offset += 4;
for (let i = 0; i < this.tokens.length; i++) {
offset = this.tokens[i].serialize(destination, offset);
}
}
}
export class MultilineTokens {
public readonly startLineNumber: number;
public readonly tokens: Uint32Array[];
public startLineNumber: number;
public tokens: (Uint32Array | ArrayBuffer | null)[];
constructor(startLineNumber: number, tokens: Uint32Array[]) {
this.startLineNumber = startLineNumber;
this.tokens = tokens;
}
public static deserialize(buff: Uint8Array, offset: number, result: MultilineTokens[]): number {
const view32 = new Uint32Array(buff.buffer);
const startLineNumber = readUInt32BE(buff, offset); offset += 4;
const count = readUInt32BE(buff, offset); offset += 4;
let tokens: Uint32Array[] = [];
for (let i = 0; i < count; i++) {
const byteCount = readUInt32BE(buff, offset); offset += 4;
tokens.push(view32.subarray(offset / 4, offset / 4 + byteCount / 4));
offset += byteCount;
}
result.push(new MultilineTokens(startLineNumber, tokens));
return offset;
}
public serializeSize(): number {
let result = 0;
result += 4; // 4 bytes for the start line number
result += 4; // 4 bytes for the line count
for (let i = 0; i < this.tokens.length; i++) {
const lineTokens = this.tokens[i];
if (!(lineTokens instanceof Uint32Array)) {
throw new Error(`Not supported!`);
}
result += 4; // 4 bytes for the byte count
result += lineTokens.byteLength;
}
return result;
}
public serialize(destination: Uint8Array, offset: number): number {
writeUInt32BE(destination, this.startLineNumber, offset); offset += 4;
writeUInt32BE(destination, this.tokens.length, offset); offset += 4;
for (let i = 0; i < this.tokens.length; i++) {
const lineTokens = this.tokens[i];
if (!(lineTokens instanceof Uint32Array)) {
throw new Error(`Not supported!`);
}
writeUInt32BE(destination, lineTokens.byteLength, offset); offset += 4;
destination.set(new Uint8Array(lineTokens.buffer), offset); offset += lineTokens.byteLength;
}
return offset;
}
public applyEdit(range: IRange, text: string): void {
const [eolCount, firstLineLength] = countEOL(text);
this._acceptDeleteRange(range);
this._acceptInsertText(new Position(range.startLineNumber, range.startColumn), eolCount, firstLineLength);
}
private _acceptDeleteRange(range: IRange): void {
if (range.startLineNumber === range.endLineNumber && range.startColumn === range.endColumn) {
// Nothing to delete
return;
}
const firstLineIndex = range.startLineNumber - this.startLineNumber;
const lastLineIndex = range.endLineNumber - this.startLineNumber;
if (lastLineIndex < 0) {
// this deletion occurs entirely before this block, so we only need to adjust line numbers
const deletedLinesCount = lastLineIndex - firstLineIndex;
this.startLineNumber -= deletedLinesCount;
return;
}
if (firstLineIndex >= this.tokens.length) {
// this deletion occurs entirely after this block, so there is nothing to do
return;
}
if (firstLineIndex < 0 && lastLineIndex >= this.tokens.length) {
// this deletion completely encompasses this block
this.startLineNumber = 0;
this.tokens = [];
}
if (firstLineIndex === lastLineIndex) {
// a delete on a single line
this.tokens[firstLineIndex] = TokensStore._delete(this.tokens[firstLineIndex], range.startColumn - 1, range.endColumn - 1);
return;
}
if (firstLineIndex >= 0) {
// The first line survives
this.tokens[firstLineIndex] = TokensStore._deleteEnding(this.tokens[firstLineIndex], range.startColumn - 1);
if (lastLineIndex < this.tokens.length) {
// The last line survives
const lastLineTokens = TokensStore._deleteBeginning(this.tokens[lastLineIndex], range.endColumn - 1);
// Take remaining text on last line and append it to remaining text on first line
this.tokens[firstLineIndex] = TokensStore._append(this.tokens[firstLineIndex], lastLineTokens);
// Delete middle lines
this.tokens.splice(firstLineIndex + 1, lastLineIndex - firstLineIndex);
} else {
// The last line does not survive
constructor(lineNumber: number, tokens: Uint32Array) {
this.startLineNumber = lineNumber;
this.tokens = [tokens];
// Take remaining text on last line and append it to remaining text on first line
this.tokens[firstLineIndex] = TokensStore._append(this.tokens[firstLineIndex], null);
// Delete lines
this.tokens = this.tokens.slice(0, firstLineIndex + 1);
}
} else {
// The first line does not survive
const deletedBefore = -firstLineIndex;
this.startLineNumber -= deletedBefore;
// Remove beginning from last line
this.tokens[lastLineIndex] = TokensStore._deleteBeginning(this.tokens[lastLineIndex], range.endColumn - 1);
// Delete lines
this.tokens = this.tokens.slice(lastLineIndex);
}
}
private _acceptInsertText(position: Position, eolCount: number, firstLineLength: number): void {
if (eolCount === 0 && firstLineLength === 0) {
// Nothing to insert
return;
}
const lineIndex = position.lineNumber - this.startLineNumber;
if (lineIndex < 0) {
// this insertion occurs before this block, so we only need to adjust line numbers
this.startLineNumber += eolCount;
return;
}
if (lineIndex >= this.tokens.length) {
// this insertion occurs after this block, so there is nothing to do
return;
}
if (eolCount === 0) {
// Inserting text on one line
this.tokens[lineIndex] = TokensStore._insert(this.tokens[lineIndex], position.column - 1, firstLineLength);
return;
}
this.tokens[lineIndex] = TokensStore._deleteEnding(this.tokens[lineIndex], position.column - 1);
this.tokens[lineIndex] = TokensStore._insert(this.tokens[lineIndex], position.column - 1, firstLineLength);
this._insertLines(position.lineNumber, eolCount);
}
private _insertLines(insertIndex: number, insertCount: number): void {
if (insertCount === 0) {
return;
}
let lineTokens: (Uint32Array | ArrayBuffer | null)[] = [];
for (let i = 0; i < insertCount; i++) {
lineTokens[i] = null;
}
this.tokens = arrays.arrayInsert(this.tokens, insertIndex, lineTokens);
}
}
function toUint32Array(arr: Uint32Array | ArrayBuffer): Uint32Array {
if (arr instanceof Uint32Array) {
return arr;
} else {
return new Uint32Array(arr);
}
}
export class TokensStore {
private _lineTokens: (ArrayBuffer | null)[];
private _lineTokens: (Uint32Array | ArrayBuffer | null)[];
private _len: number;
constructor() {
......@@ -69,13 +304,13 @@ export class TokensStore {
}
public getTokens(topLevelLanguageId: LanguageId, lineIndex: number, lineText: string): LineTokens {
let rawLineTokens: ArrayBuffer | null = null;
let rawLineTokens: Uint32Array | ArrayBuffer | null = null;
if (lineIndex < this._len) {
rawLineTokens = this._lineTokens[lineIndex];
}
if (rawLineTokens !== null && rawLineTokens !== EMPTY_LINE_TOKENS) {
return new LineTokens(new Uint32Array(rawLineTokens), lineText);
return new LineTokens(toUint32Array(rawLineTokens), lineText);
}
let lineTokens = new Uint32Array(2);
......@@ -84,7 +319,10 @@ export class TokensStore {
return new LineTokens(lineTokens, lineText);
}
private static _massageTokens(topLevelLanguageId: LanguageId, lineTextLength: number, tokens: Uint32Array): ArrayBuffer {
private static _massageTokens(topLevelLanguageId: LanguageId, lineTextLength: number, _tokens: Uint32Array | ArrayBuffer | null): Uint32Array | ArrayBuffer {
const tokens = _tokens ? toUint32Array(_tokens) : null;
if (lineTextLength === 0) {
let hasDifferentLanguageId = false;
if (tokens && tokens.length > 1) {
......@@ -97,12 +335,20 @@ export class TokensStore {
}
if (!tokens || tokens.length === 0) {
tokens = new Uint32Array(2);
const tokens = new Uint32Array(2);
tokens[0] = lineTextLength;
tokens[1] = getDefaultMetadata(topLevelLanguageId);
return tokens.buffer;
}
return tokens.buffer;
// Ensure the last token covers the end of the text
tokens[tokens.length - 2] = lineTextLength;
if (tokens.byteOffset === 0 && tokens.byteLength === tokens.buffer.byteLength) {
// Store directly the ArrayBuffer pointer to save an object
return tokens.buffer;
}
return tokens;
}
private _ensureLine(lineIndex: number): void {
......@@ -116,6 +362,9 @@ export class TokensStore {
if (deleteCount === 0) {
return;
}
if (start + deleteCount > this._len) {
deleteCount = this._len - start;
}
this._lineTokens.splice(start, deleteCount);
this._len -= deleteCount;
}
......@@ -124,7 +373,7 @@ export class TokensStore {
if (insertCount === 0) {
return;
}
let lineTokens: (ArrayBuffer | null)[] = [];
let lineTokens: (Uint32Array | ArrayBuffer | null)[] = [];
for (let i = 0; i < insertCount; i++) {
lineTokens[i] = null;
}
......@@ -132,7 +381,7 @@ export class TokensStore {
this._len += insertCount;
}
public setTokens(topLevelLanguageId: LanguageId, lineIndex: number, lineTextLength: number, _tokens: Uint32Array): void {
public setTokens(topLevelLanguageId: LanguageId, lineIndex: number, lineTextLength: number, _tokens: Uint32Array | ArrayBuffer | null): void {
const tokens = TokensStore._massageTokens(topLevelLanguageId, lineTextLength, _tokens);
this._ensureLine(lineIndex);
this._lineTokens[lineIndex] = tokens;
......@@ -140,7 +389,7 @@ export class TokensStore {
//#region Editing
public applyEdits(range: IRange, eolCount: number, firstLineLength: number): void {
public acceptEdit(range: IRange, eolCount: number, firstLineLength: number): void {
this._acceptDeleteRange(range);
this._acceptInsertText(new Position(range.startLineNumber, range.startColumn), eolCount, firstLineLength);
}
......@@ -165,7 +414,7 @@ export class TokensStore {
this._lineTokens[firstLineIndex] = TokensStore._deleteEnding(this._lineTokens[firstLineIndex], range.startColumn - 1);
const lastLineIndex = range.endLineNumber - 1;
let lastLineTokens: ArrayBuffer | null = null;
let lastLineTokens: Uint32Array | ArrayBuffer | null = null;
if (lastLineIndex < this._len) {
lastLineTokens = TokensStore._deleteBeginning(this._lineTokens[lastLineIndex], range.endColumn - 1);
}
......@@ -201,29 +450,29 @@ export class TokensStore {
this._insertLines(position.lineNumber, eolCount);
}
private static _deleteBeginning(lineTokens: ArrayBuffer | null, toChIndex: number): ArrayBuffer | null {
public static _deleteBeginning(lineTokens: Uint32Array | ArrayBuffer | null, toChIndex: number): Uint32Array | ArrayBuffer | null {
if (lineTokens === null || lineTokens === EMPTY_LINE_TOKENS) {
return lineTokens;
}
return TokensStore._delete(lineTokens, 0, toChIndex);
}
private static _deleteEnding(lineTokens: ArrayBuffer | null, fromChIndex: number): ArrayBuffer | null {
public static _deleteEnding(lineTokens: Uint32Array | ArrayBuffer | null, fromChIndex: number): Uint32Array | ArrayBuffer | null {
if (lineTokens === null || lineTokens === EMPTY_LINE_TOKENS) {
return lineTokens;
}
const tokens = new Uint32Array(lineTokens);
const tokens = toUint32Array(lineTokens);
const lineTextLength = tokens[tokens.length - 2];
return TokensStore._delete(lineTokens, fromChIndex, lineTextLength);
}
private static _delete(lineTokens: ArrayBuffer | null, fromChIndex: number, toChIndex: number): ArrayBuffer | null {
public static _delete(lineTokens: Uint32Array | ArrayBuffer | null, fromChIndex: number, toChIndex: number): Uint32Array | ArrayBuffer | null {
if (lineTokens === null || lineTokens === EMPTY_LINE_TOKENS || fromChIndex === toChIndex) {
return lineTokens;
}
const tokens = new Uint32Array(lineTokens);
const tokens = toUint32Array(lineTokens);
const tokensCount = (tokens.length >>> 1);
// special case: deleting everything
......@@ -275,7 +524,7 @@ export class TokensStore {
return tmp.buffer;
}
private static _append(lineTokens: ArrayBuffer | null, _otherTokens: ArrayBuffer | null): ArrayBuffer | null {
public static _append(lineTokens: Uint32Array | ArrayBuffer | null, _otherTokens: Uint32Array | ArrayBuffer | null): Uint32Array | ArrayBuffer | null {
if (_otherTokens === EMPTY_LINE_TOKENS) {
return lineTokens;
}
......@@ -289,8 +538,8 @@ export class TokensStore {
// cannot determine combined line length...
return null;
}
const myTokens = new Uint32Array(lineTokens);
const otherTokens = new Uint32Array(_otherTokens);
const myTokens = toUint32Array(lineTokens);
const otherTokens = toUint32Array(_otherTokens);
const otherTokensCount = (otherTokens.length >>> 1);
let result = new Uint32Array(myTokens.length + otherTokens.length);
......@@ -304,13 +553,13 @@ export class TokensStore {
return result.buffer;
}
private static _insert(lineTokens: ArrayBuffer | null, chIndex: number, textLength: number): ArrayBuffer | null {
public static _insert(lineTokens: Uint32Array | ArrayBuffer | null, chIndex: number, textLength: number): Uint32Array | ArrayBuffer | null {
if (lineTokens === null || lineTokens === EMPTY_LINE_TOKENS) {
// nothing to do
return lineTokens;
}
const tokens = new Uint32Array(lineTokens);
const tokens = toUint32Array(lineTokens);
const tokensCount = (tokens.length >>> 1);
let fromTokenIndex = LineTokens.findIndexInTokensArray(tokens, chIndex);
......
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { StandardTokenType } from 'vs/editor/common/modes';
import { CharCode } from 'vs/base/common/charCode';
class ParserContext {
public readonly text: string;
public readonly len: number;
public readonly tokens: number[];
public pos: number;
private currentTokenStartOffset: number;
private currentTokenType: StandardTokenType;
constructor(text: string) {
this.text = text;
this.len = this.text.length;
this.tokens = [];
this.pos = 0;
this.currentTokenStartOffset = 0;
this.currentTokenType = StandardTokenType.Other;
}
private _safeCharCodeAt(index: number): number {
if (index >= this.len) {
return CharCode.Null;
}
return this.text.charCodeAt(index);
}
peek(distance: number = 0): number {
return this._safeCharCodeAt(this.pos + distance);
}
next(): number {
const result = this._safeCharCodeAt(this.pos);
this.pos++;
return result;
}
advance(distance: number): void {
this.pos += distance;
}
eof(): boolean {
return this.pos >= this.len;
}
beginToken(tokenType: StandardTokenType, deltaPos: number = 0): void {
this.currentTokenStartOffset = this.pos + deltaPos;
this.currentTokenType = tokenType;
}
endToken(deltaPos: number = 0): void {
const length = this.pos + deltaPos - this.currentTokenStartOffset;
// check if it is touching previous token
if (this.tokens.length > 0) {
const previousStartOffset = this.tokens[this.tokens.length - 3];
const previousLength = this.tokens[this.tokens.length - 2];
const previousTokenType = this.tokens[this.tokens.length - 1];
const previousEndOffset = previousStartOffset + previousLength;
if (this.currentTokenStartOffset === previousEndOffset && previousTokenType === this.currentTokenType) {
// extend previous token
this.tokens[this.tokens.length - 2] += length;
return;
}
}
this.tokens.push(this.currentTokenStartOffset, length, this.currentTokenType);
}
}
export function parse(text: string): number[] {
const ctx = new ParserContext(text);
while (!ctx.eof()) {
parseRoot(ctx);
}
return ctx.tokens;
}
function parseRoot(ctx: ParserContext): void {
let curlyCount = 0;
while (!ctx.eof()) {
const ch = ctx.peek();
switch (ch) {
case CharCode.SingleQuote:
parseSimpleString(ctx, CharCode.SingleQuote);
break;
case CharCode.DoubleQuote:
parseSimpleString(ctx, CharCode.DoubleQuote);
break;
case CharCode.BackTick:
parseInterpolatedString(ctx);
break;
case CharCode.Slash:
parseSlash(ctx);
break;
case CharCode.OpenCurlyBrace:
ctx.advance(1);
curlyCount++;
break;
case CharCode.CloseCurlyBrace:
ctx.advance(1);
curlyCount--;
if (curlyCount < 0) {
return;
}
break;
default:
ctx.advance(1);
}
}
}
function parseSimpleString(ctx: ParserContext, closingQuote: number): void {
ctx.beginToken(StandardTokenType.String);
// skip the opening quote
ctx.advance(1);
while (!ctx.eof()) {
const ch = ctx.next();
if (ch === CharCode.Backslash) {
// skip \r\n or any other character following a backslash
const advanceCount = (ctx.peek() === CharCode.CarriageReturn && ctx.peek(1) === CharCode.LineFeed ? 2 : 1);
ctx.advance(advanceCount);
} else if (ch === closingQuote) {
// hit end quote, so stop
break;
}
}
ctx.endToken();
}
function parseInterpolatedString(ctx: ParserContext): void {
ctx.beginToken(StandardTokenType.String);
// skip the opening quote
ctx.advance(1);
while (!ctx.eof()) {
const ch = ctx.next();
if (ch === CharCode.Backslash) {
// skip \r\n or any other character following a backslash
const advanceCount = (ctx.peek() === CharCode.CarriageReturn && ctx.peek(1) === CharCode.LineFeed ? 2 : 1);
ctx.advance(advanceCount);
} else if (ch === CharCode.BackTick) {
// hit end quote, so stop
break;
} else if (ch === CharCode.DollarSign) {
if (ctx.peek() === CharCode.OpenCurlyBrace) {
ctx.advance(1);
ctx.endToken();
parseRoot(ctx);
ctx.beginToken(StandardTokenType.String, -1);
}
}
}
ctx.endToken();
}
function parseSlash(ctx: ParserContext): void {
const nextCh = ctx.peek(1);
if (nextCh === CharCode.Asterisk) {
parseMultiLineComment(ctx);
return;
}
if (nextCh === CharCode.Slash) {
parseSingleLineComment(ctx);
return;
}
if (tryParseRegex(ctx)) {
return;
}
ctx.advance(1);
}
function tryParseRegex(ctx: ParserContext): boolean {
// See https://www.ecma-international.org/ecma-262/10.0/index.html#prod-RegularExpressionLiteral
// TODO: avoid regex...
let contentBefore = ctx.text.substr(ctx.pos - 100, 100);
if (/[a-zA-Z0-9](\s*)$/.test(contentBefore)) {
// Cannot start after an identifier
return false;
}
let pos = 0;
let len = ctx.len - ctx.pos;
let inClass = false;
// skip /
pos++;
while (pos < len) {
const ch = ctx.peek(pos++);
if (ch === CharCode.CarriageReturn || ch === CharCode.LineFeed) {
return false;
}
if (ch === CharCode.Backslash) {
const nextCh = ctx.peek();
if (nextCh === CharCode.CarriageReturn || nextCh === CharCode.LineFeed) {
return false;
}
// skip next character
pos++;
continue;
}
if (inClass) {
if (ch === CharCode.CloseSquareBracket) {
inClass = false;
continue;
}
} else {
if (ch === CharCode.Slash) {
// cannot be directly followed by a /
if (ctx.peek(pos) === CharCode.Slash) {
return false;
}
// consume flags
do {
let nextCh = ctx.peek(pos);
if (nextCh >= CharCode.a && nextCh <= CharCode.z) {
pos++;
continue;
} else {
break;
}
} while (true);
// TODO: avoid regex...
if (/^(\s*)(\.|;|\/|,|\)|\]|\}|$)/.test(ctx.text.substr(ctx.pos + pos))) {
// Must be followed by an operator of kinds
ctx.beginToken(StandardTokenType.RegEx);
ctx.advance(pos);
ctx.endToken();
return true;
}
return false;
}
if (ch === CharCode.OpenSquareBracket) {
inClass = true;
continue;
}
}
}
return false;
}
function parseMultiLineComment(ctx: ParserContext): void {
ctx.beginToken(StandardTokenType.Comment);
// skip the /*
ctx.advance(2);
while (!ctx.eof()) {
const ch = ctx.next();
if (ch === CharCode.Asterisk) {
if (ctx.peek() === CharCode.Slash) {
ctx.advance(1);
break;
}
}
}
ctx.endToken();
}
function parseSingleLineComment(ctx: ParserContext): void {
ctx.beginToken(StandardTokenType.Comment);
// skip the //
ctx.advance(2);
while (!ctx.eof()) {
const ch = ctx.next();
if (ch === CharCode.CarriageReturn || ch === CharCode.LineFeed) {
break;
}
}
ctx.endToken();
}
///
/* tslint:disable */
const x01 = "string";
/// ^^^^^^^^ string
const x02 = '\'';
/// ^^^^ string
const x03 = '\n\'\t';
/// ^^^^^^^^ string
const x04 = 'this is\
/// ^^^^^^^^^ string\
a multiline string';
/// <------------------- string
const x05 = x01;// just some text
/// ^^^^^^^^^^^^^^^^^ comment
const x06 = x05;/* multi
/// ^^^^^^^^ comment
line *comment */
/// <---------------- comment
const x07 = 4 / 5;
const x08 = `howdy`;
/// ^^^^^^^ string
const x09 = `\'\"\``;
/// ^^^^^^^^ string
const x10 = `$[]`;
/// ^^^^^ string
const x11 = `${x07 +/**/3}px`;
/// ^^^ string
/// ^^^^ comment
/// ^^^^ string
const x12 = `${x07 + (function () { return 5; })()/**/}px`;
/// ^^^ string
/// ^^^^ comment
/// ^^^^ string
const x13 = /([\w\-]+)?(#([\w\-]+))?((.([\w\-]+))*)/;
/// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ regex
const x14 = /\./g;
/// ^^^^^ regex
const x15 = Math.abs(x07) / x07; // speed
/// ^^^^^^^^ comment
const x16 = / x07; /.test('3');
/// ^^^^^^^^ regex
/// ^^^ string
const x17 = `.dialog-modal-block${true ? '.dimmed' : ''}`;
/// ^^^^^^^^^^^^^^^^^^^^^^ string
/// ^^^^^^^^^ string
/// ^^^^ string
const x18 = Math.min((14 <= 0.5 ? 123 / (2 * 1) : ''.length / (2 - (2 * 1))), 1);
/// ^^ string
const x19 = `${3 / '5'.length} km/h)`;
/// ^^^ string
/// ^^^ string
/// ^^^^^^^ string
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as assert from 'assert';
import { StandardTokenType } from 'vs/editor/common/modes';
import * as fs from 'fs';
import { getPathFromAmdModule } from 'vs/base/common/amd';
import { parse } from 'vs/editor/common/modes/tokenization/typescript';
import { toStandardTokenType } from 'vs/editor/common/modes/supports/tokenization';
interface IParseFunc {
(text: string): number[];
}
interface IAssertion {
testLineNumber: number;
startOffset: number;
length: number;
tokenType: StandardTokenType;
}
interface ITest {
content: string;
assertions: IAssertion[];
}
function parseTest(fileName: string): ITest {
interface ILineWithAssertions {
line: string;
assertions: ILineAssertion[];
}
interface ILineAssertion {
testLineNumber: number;
startOffset: number;
length: number;
expectedTokenType: StandardTokenType;
}
const testContents = fs.readFileSync(fileName).toString();
const lines = testContents.split(/\r\n|\n/);
const magicToken = lines[0];
let currentElement: ILineWithAssertions = {
line: lines[1],
assertions: []
};
let parsedTest: ILineWithAssertions[] = [];
for (let i = 2; i < lines.length; i++) {
let line = lines[i];
if (line.substr(0, magicToken.length) === magicToken) {
// this is an assertion line
let m1 = line.substr(magicToken.length).match(/^( +)([\^]+) (\w+)\\?$/);
if (m1) {
currentElement.assertions.push({
testLineNumber: i + 1,
startOffset: magicToken.length + m1[1].length,
length: m1[2].length,
expectedTokenType: toStandardTokenType(m1[3])
});
} else {
let m2 = line.substr(magicToken.length).match(/^( +)<(-+) (\w+)\\?$/);
if (m2) {
currentElement.assertions.push({
testLineNumber: i + 1,
startOffset: 0,
length: m2[2].length,
expectedTokenType: toStandardTokenType(m2[3])
});
} else {
throw new Error(`Invalid test line at line number ${i + 1}.`);
}
}
} else {
// this is a line to be parsed
parsedTest.push(currentElement);
currentElement = {
line: line,
assertions: []
};
}
}
parsedTest.push(currentElement);
let assertions: IAssertion[] = [];
let offset = 0;
for (let i = 0; i < parsedTest.length; i++) {
const parsedTestLine = parsedTest[i];
for (let j = 0; j < parsedTestLine.assertions.length; j++) {
const assertion = parsedTestLine.assertions[j];
assertions.push({
testLineNumber: assertion.testLineNumber,
startOffset: offset + assertion.startOffset,
length: assertion.length,
tokenType: assertion.expectedTokenType
});
}
offset += parsedTestLine.line.length + 1;
}
let content: string = parsedTest.map(parsedTestLine => parsedTestLine.line).join('\n');
return { content, assertions };
}
function executeTest(fileName: string, parseFunc: IParseFunc): void {
const { content, assertions } = parseTest(fileName);
const actual = parseFunc(content);
let actualIndex = 0, actualCount = actual.length / 3;
for (let i = 0; i < assertions.length; i++) {
const assertion = assertions[i];
while (actualIndex < actualCount && actual[3 * actualIndex] + actual[3 * actualIndex + 1] <= assertion.startOffset) {
actualIndex++;
}
assert.ok(
actual[3 * actualIndex] <= assertion.startOffset,
`Line ${assertion.testLineNumber} : startOffset : ${actual[3 * actualIndex]} <= ${assertion.startOffset}`
);
assert.ok(
actual[3 * actualIndex] + actual[3 * actualIndex + 1] >= assertion.startOffset + assertion.length,
`Line ${assertion.testLineNumber} : length : ${actual[3 * actualIndex]} + ${actual[3 * actualIndex + 1]} >= ${assertion.startOffset} + ${assertion.length}.`
);
assert.equal(
actual[3 * actualIndex + 2],
assertion.tokenType,
`Line ${assertion.testLineNumber} : tokenType`);
}
}
suite('Classification', () => {
test('TypeScript', () => {
executeTest(getPathFromAmdModule(require, 'vs/editor/test/node/classification/typescript-test.ts').replace(/\bout\b/, 'src'), parse);
});
});
......@@ -43,7 +43,7 @@ export abstract class AbstractTextMateService extends Disposable implements ITex
private _grammarDefinitions: IValidGrammarDefinition[] | null;
private _grammarFactory: TMGrammarFactory | null;
private _tokenizersRegistrations: IDisposable[];
private _currentTokenColors: ITokenColorizationRule[] | null;
protected _currentTheme: IRawTheme | null;
constructor(
@IModeService private readonly _modeService: IModeService,
......@@ -64,6 +64,8 @@ export abstract class AbstractTextMateService extends Disposable implements ITex
this._grammarFactory = null;
this._tokenizersRegistrations = [];
this._currentTheme = null;
grammarsExtPoint.setHandler((extensions) => {
this._grammarDefinitions = null;
if (this._grammarFactory) {
......@@ -242,11 +244,11 @@ export abstract class AbstractTextMateService extends Disposable implements ITex
}
private _updateTheme(grammarFactory: TMGrammarFactory, colorTheme: IColorTheme, forceUpdate: boolean): void {
if (!forceUpdate && AbstractTextMateService.equalsTokenRules(this._currentTokenColors, colorTheme.tokenColors)) {
if (!forceUpdate && this._currentTheme && AbstractTextMateService.equalsTokenRules(this._currentTheme.settings, colorTheme.tokenColors)) {
return;
}
this._currentTokenColors = colorTheme.tokenColors;
this._doUpdateTheme(grammarFactory, { name: colorTheme.label, settings: colorTheme.tokenColors });
this._currentTheme = { name: colorTheme.label, settings: colorTheme.tokenColors };
this._doUpdateTheme(grammarFactory, this._currentTheme);
}
protected _doUpdateTheme(grammarFactory: TMGrammarFactory, theme: IRawTheme): void {
......
......@@ -14,12 +14,15 @@ import { ILogService } from 'vs/platform/log/common/log';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { createWebWorker, MonacoWebWorker } from 'vs/editor/common/services/webWorker';
import { IModelService } from 'vs/editor/common/services/modelService';
import { IOnigLib } from 'vscode-textmate';
import { IOnigLib, IRawTheme } from 'vscode-textmate';
import { IValidGrammarDefinition } from 'vs/workbench/services/textMate/common/TMScopeRegistry';
import { TextMateWorker } from 'vs/workbench/services/textMate/electron-browser/textMateWorker';
import { ITextModel } from 'vs/editor/common/model';
import { Disposable } from 'vs/base/common/lifecycle';
import { UriComponents, URI } from 'vs/base/common/uri';
import { MultilineTokensBuilder } from 'vs/editor/common/model/tokensStore';
import { TMGrammarFactory } from 'vs/workbench/services/textMate/common/TMGrammarFactory';
import { IModelContentChangedEvent } from 'vs/editor/common/model/textModelEvents';
import { IStorageService } from 'vs/platform/storage/common/storage';
const RUN_TEXTMATE_IN_WORKER = false;
......@@ -29,6 +32,7 @@ class ModelWorkerTextMateTokenizer extends Disposable {
private readonly _worker: TextMateWorker;
private readonly _model: ITextModel;
private _isSynced: boolean;
private _pendingChanges: IModelContentChangedEvent[] = [];
constructor(worker: TextMateWorker, model: ITextModel) {
super();
......@@ -42,6 +46,7 @@ class ModelWorkerTextMateTokenizer extends Disposable {
this._register(this._model.onDidChangeContent((e) => {
if (this._isSynced) {
this._worker.acceptModelChanged(this._model.uri.toString(), e);
this._pendingChanges.push(e);
}
}));
......@@ -84,11 +89,36 @@ class ModelWorkerTextMateTokenizer extends Disposable {
super.dispose();
this._endSync();
}
private _confirm(versionId: number): void {
while (this._pendingChanges.length > 0 && this._pendingChanges[0].versionId <= versionId) {
this._pendingChanges.shift();
}
}
public setTokens(versionId: number, rawTokens: ArrayBuffer): void {
this._confirm(versionId);
const tokens = MultilineTokensBuilder.deserialize(new Uint8Array(rawTokens));
for (let i = 0; i < this._pendingChanges.length; i++) {
const change = this._pendingChanges[i];
for (let j = 0; j < tokens.length; j++) {
for (let k = 0; k < change.changes.length; k++) {
tokens[j].applyEdit(change.changes[k].range, change.changes[k].text);
}
}
}
this._model.setTokens(tokens);
}
}
export class TextMateWorkerHost {
constructor(@IFileService private readonly _fileService: IFileService) {
constructor(
private readonly textMateService: TextMateService,
@IFileService private readonly _fileService: IFileService
) {
}
async readFile(_resource: UriComponents): Promise<string> {
......@@ -96,6 +126,11 @@ export class TextMateWorkerHost {
const content = await this._fileService.readFile(resource);
return content.value.toString();
}
async setTokens(_resource: UriComponents, versionId: number, tokens: Uint8Array): Promise<void> {
const resource = URI.revive(_resource);
this.textMateService.setTokens(resource, versionId, tokens);
}
}
export class TextMateService extends AbstractTextMateService {
......@@ -155,7 +190,7 @@ export class TextMateService extends AbstractTextMateService {
this._killWorker();
if (RUN_TEXTMATE_IN_WORKER) {
const workerHost = new TextMateWorkerHost(this._fileService);
const workerHost = new TextMateWorkerHost(this, this._fileService);
const worker = createWebWorker<TextMateWorker>(this._modelService, {
createData: {
grammarDefinitions
......@@ -172,11 +207,21 @@ export class TextMateService extends AbstractTextMateService {
return;
}
this._workerProxy = proxy;
if (this._currentTheme) {
this._workerProxy.acceptTheme(this._currentTheme);
}
this._modelService.getModels().forEach((model) => this._onModelAdded(model));
});
}
}
protected _doUpdateTheme(grammarFactory: TMGrammarFactory, theme: IRawTheme): void {
super._doUpdateTheme(grammarFactory, theme);
if (this._currentTheme && this._workerProxy) {
this._workerProxy.acceptTheme(this._currentTheme);
}
}
protected _onDidDisposeGrammarFactory(): void {
this._killWorker();
}
......@@ -193,6 +238,14 @@ export class TextMateService extends AbstractTextMateService {
}
this._workerProxy = null;
}
setTokens(resource: URI, versionId: number, tokens: ArrayBuffer): void {
const key = resource.toString();
if (!this._tokenizers[key]) {
return;
}
this._tokenizers[key].setTokens(versionId, tokens);
}
}
registerSingleton(ITextMateService, TextMateService);
\ No newline at end of file
registerSingleton(ITextMateService, TextMateService);
......@@ -10,6 +10,10 @@ import { IValidEmbeddedLanguagesMap, IValidTokenTypeMap, IValidGrammarDefinition
import { TMGrammarFactory, ICreateGrammarResult } from 'vs/workbench/services/textMate/common/TMGrammarFactory';
import { IModelChangedEvent, MirrorTextModel } from 'vs/editor/common/model/mirrorTextModel';
import { TextMateWorkerHost } from 'vs/workbench/services/textMate/electron-browser/textMateService';
import { TokenizationStateStore } from 'vs/editor/common/model/textModelTokens';
import { IGrammar, StackElement, IRawTheme } from 'vscode-textmate';
import { MultilineTokensBuilder, countEOL } from 'vs/editor/common/model/tokensStore';
import { LineTokens } from 'vs/editor/common/core/lineTokens';
export interface IValidGrammarDefinitionDTO {
location: UriComponents;
......@@ -34,26 +38,79 @@ export interface IRawModelData {
class TextMateWorkerModel extends MirrorTextModel {
private readonly _tokenizationStateStore: TokenizationStateStore;
private readonly _worker: TextMateWorker;
private _languageId: LanguageId;
private _grammar: IGrammar | null;
private _isDisposed: boolean;
constructor(uri: URI, lines: string[], eol: string, versionId: number, worker: TextMateWorker, languageId: LanguageId) {
super(uri, lines, eol, versionId);
this._tokenizationStateStore = new TokenizationStateStore();
this._worker = worker;
this._languageId = languageId;
this._isDisposed = false;
this._grammar = null;
this._resetTokenization();
}
onLanguageId(languageId: LanguageId): void {
public dispose(): void {
this._isDisposed = true;
super.dispose();
}
public onLanguageId(languageId: LanguageId): void {
this._languageId = languageId;
this._resetTokenization();
}
onEvents(e: IModelChangedEvent): void {
super.onEvents(e);
for (let i = 0; i < e.changes.length; i++) {
const change = e.changes[i];
const [eolCount] = countEOL(change.text);
this._tokenizationStateStore.applyEdits(change.range, eolCount);
}
this._ensureTokens();
}
private _resetTokenization(): void {
this._worker.getOrCreateGrammar(this._languageId).then((r) => {
console.log(r);
this._grammar = null;
this._tokenizationStateStore.flush(null);
const languageId = this._languageId;
this._worker.getOrCreateGrammar(languageId).then((r) => {
if (this._isDisposed || languageId !== this._languageId) {
return;
}
this._grammar = r.grammar;
this._tokenizationStateStore.flush(r.initialState);
this._ensureTokens();
});
}
private _ensureTokens(): void {
if (!this._grammar) {
return;
}
const builder = new MultilineTokensBuilder();
const lineCount = this._lines.length;
// Validate all states up to and including endLineIndex
for (let lineIndex = this._tokenizationStateStore.invalidLineStartIndex; lineIndex < lineCount; lineIndex++) {
const text = this._lines[lineIndex];
const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex);
const r = this._grammar.tokenizeLine2(text, <StackElement>lineStartState!);
LineTokens.convertToEndOffset(r.tokens, text.length);
builder.add(lineIndex + 1, r.tokens);
this._tokenizationStateStore.setEndState(lineCount, lineIndex, r.ruleStack);
lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it
}
this._worker._setTokens(this._uri, this._versionId, builder.serialize());
}
}
export class TextMateWorker {
......@@ -91,7 +148,7 @@ export class TextMateWorker {
}
this._grammarFactory = new TMGrammarFactory({
logTrace: (msg: string) => console.log(msg),
logTrace: (msg: string) => {/* console.log(msg) */ },
logError: (msg: string, err: any) => console.error(msg, err),
readFile: (resource: URI) => this._host.readFile(resource)
}, grammarDefinitions, vscodeTextmate, undefined);
......@@ -112,7 +169,10 @@ export class TextMateWorker {
}
public acceptRemovedModel(strURL: string): void {
delete this._models[strURL];
if (this._models[strURL]) {
this._models[strURL].dispose();
delete this._models[strURL];
}
}
public getOrCreateGrammar(languageId: LanguageId): Promise<ICreateGrammarResult> {
......@@ -121,6 +181,14 @@ export class TextMateWorker {
}
return this._grammarCache[languageId];
}
public acceptTheme(theme: IRawTheme): void {
this._grammarFactory.setTheme(theme);
}
public _setTokens(resource: URI, versionId: number, tokens: Uint8Array): void {
this._host.setTokens(resource, versionId, tokens);
}
}
export function create(ctx: IWorkerContext<TextMateWorkerHost>, createData: ICreateData): TextMateWorker {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册