提交 659914b7 编写于 作者: A Alex Dima

Simplify ITokenizationSupport:

 - remove ILineTokens.actualStopOffset
 - remove stopAtOffset
 - refactor MonarchLexer
上级 ab2b0d2d
......@@ -145,7 +145,7 @@ function _actualColorize(lines: string[], tabSize: number, tokenizationSupport:
for (let i = 0, length = lines.length; i < length; i++) {
let line = lines[i];
let tokenizeResult = tokenizationSupport.tokenize(line, state);
let tokenizeResult = tokenizationSupport.tokenize(line, state, 0);
let renderResult = renderLine(new RenderLineInput(
line,
......
......@@ -21,7 +21,6 @@ import { ModeTransition } from 'vs/editor/common/core/modeTransition';
import { TokensInflatorMap } from 'vs/editor/common/model/tokensBinaryEncoding';
import { Position } from 'vs/editor/common/core/position';
import { LanguageConfigurationRegistry } from 'vs/editor/common/modes/languageConfigurationRegistry';
import { Token } from 'vs/editor/common/core/token';
import { LineTokens, LineToken } from 'vs/editor/common/core/lineTokens';
import { getWordAtText } from 'vs/editor/common/model/wordHelper';
......@@ -355,7 +354,6 @@ export class TextModelWithTokens extends TextModel implements editorCommon.IToke
var linesLength = this._lines.length;
var endLineIndex = lineNumber - 1;
var stopLineTokenizationAfter = 1000000000; // 1 billion, if a line is so long, you have other trouble :).
// Validate all states up to and including endLineIndex
for (var lineIndex = this._invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
......@@ -366,27 +364,14 @@ export class TextModelWithTokens extends TextModel implements editorCommon.IToke
try {
// Tokenize only the first X characters
let freshState = this._lines[lineIndex].getState().clone();
r = this._tokenizationSupport.tokenize(this._lines[lineIndex].text, freshState, 0, stopLineTokenizationAfter);
r = this._tokenizationSupport.tokenize(this._lines[lineIndex].text, freshState, 0);
} catch (e) {
e.friendlyMessage = TextModelWithTokens.MODE_TOKENIZATION_FAILED_MSG;
onUnexpectedError(e);
}
if (r && r.tokens && r.tokens.length > 0) {
// Cannot have a stop offset before the last token
r.actualStopOffset = Math.max(r.actualStopOffset, r.tokens[r.tokens.length - 1].startIndex + 1);
}
if (r && r.actualStopOffset < text.length) {
// Treat the rest of the line (if above limit) as one default token
r.tokens.push(new Token(r.actualStopOffset, ''));
// Use as end state the starting state
r.endState = this._lines[lineIndex].getState();
}
if (!r) {
r = nullTokenize(this.getModeId(), text, this._lines[lineIndex].getState());
r = nullTokenize(this.getModeId(), text, this._lines[lineIndex].getState(), 0);
}
if (!r.modeTransitions) {
r.modeTransitions = [];
......
......@@ -38,11 +38,60 @@ export interface IMode {
*/
export interface ILineTokens {
tokens: Token[];
actualStopOffset: number;
endState: IState;
modeTransitions: ModeTransition[];
}
/**
* Helpers to manage the "collapsed" metadata of an entire StackElement stack.
* The following assumptions have been made:
* - languageId < 256 => needs 8 bits
* - unique color count < 512 => needs 9 bits
*
* The binary format is:
* --------------------------------------------
* 3322 2222 2222 1111 1111 1100 0000 0000
* 1098 7654 3210 9876 5432 1098 7654 3210
* --------------------------------------------
* xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx
* bbbb bbbb bfff ffff ffFF FTTT LLLL LLLL
* --------------------------------------------
* L = LanguageId (8 bits)
* T = StandardTokenType (3 bits)
* F = FontStyle (3 bits)
* f = foreground color (9 bits)
* b = background color (9 bits)
*
* @internal
*/
export const enum MetadataConsts {
LANGUAGEID_MASK = 0b00000000000000000000000011111111,
TOKEN_TYPE_MASK = 0b00000000000000000000011100000000,
FONT_STYLE_MASK = 0b00000000000000000011100000000000,
FOREGROUND_MASK = 0b00000000011111111100000000000000,
BACKGROUND_MASK = 0b11111111100000000000000000000000,
LANGUAGEID_OFFSET = 0,
TOKEN_TYPE_OFFSET = 8,
FONT_STYLE_OFFSET = 11,
FOREGROUND_OFFSET = 14,
BACKGROUND_OFFSET = 23
}
/**
* @internal
*/
export interface ILineTokens3 {
/**
* The tokens in binary format. Each token occupies two array indices. For token i:
* - at offset 2*i => startIndex
* - at offset 2*i + 1 => metadata
*
*/
readonly tokens: Uint32Array;
readonly endState: IState;
}
/**
* @internal
*/
......@@ -51,8 +100,9 @@ export interface ITokenizationSupport {
getInitialState(): IState;
// add offsetDelta to each of the returned indices
// stop tokenizing at absolute value stopAtOffset (i.e. stream.pos() + offsetDelta > stopAtOffset)
tokenize(line: string, state: IState, offsetDelta?: number, stopAtOffset?: number): ILineTokens;
tokenize(line: string, state: IState, offsetDelta: number): ILineTokens;
// tokenize3(line: string, state: IState, offsetDelta: number): ILineTokens3;
}
/**
......
......@@ -14,7 +14,7 @@ import * as modes from 'vs/editor/common/modes';
import * as monarchCommon from 'vs/editor/common/modes/monarch/monarchCommon';
import { IModeService } from 'vs/editor/common/services/modeService';
import { Token } from 'vs/editor/common/core/token';
import { NULL_STATE, nullTokenize, NULL_MODE_ID } from 'vs/editor/common/modes/nullMode';
import { NULL_STATE, NULL_MODE_ID } from 'vs/editor/common/modes/nullMode';
import { ModeTransition } from 'vs/editor/common/core/modeTransition';
const CACHE_STACK_DEPTH = 5;
......@@ -230,6 +230,73 @@ export class MonarchLineState implements modes.IState {
const hasOwnProperty = Object.hasOwnProperty;
interface IMonarchTokensCollector {
enterMode(startOffset: number, modeId: string): void;
emit(startOffset: number, type: string): void;
nestedModeTokenize(embeddedModeLine: string, embeddedModeData: EmbeddedModeData, offsetDelta: number): modes.IState;
}
class MonarchClassicTokensCollector implements IMonarchTokensCollector {
private _modeTransitions: ModeTransition[];
private _tokens: Token[];
private _lastModeId: string;
private _lastTokenType: string;
constructor() {
this._modeTransitions = [];
this._tokens = [];
this._lastModeId = null;
this._lastTokenType = null;
}
public enterMode(startOffset: number, modeId: string): void {
if (this._lastModeId === modeId) {
// Avoid transitioning to the same mode (this can happen in case of empty embedded modes)
return;
}
this._lastModeId = modeId;
this._modeTransitions.push(new ModeTransition(startOffset, modeId));
}
public emit(startOffset: number, type: string): void {
if (this._lastTokenType === type) {
return;
}
this._lastTokenType = type;
this._tokens.push(new Token(startOffset, type));
}
public nestedModeTokenize(embeddedModeLine: string, embeddedModeData: EmbeddedModeData, offsetDelta: number): modes.IState {
const nestedModeId = embeddedModeData.modeId;
const embeddedModeState = embeddedModeData.state;
const nestedModeTokenizationSupport = modes.TokenizationRegistry.get(nestedModeId);
if (!nestedModeTokenizationSupport) {
this.enterMode(offsetDelta, nestedModeId);
this.emit(offsetDelta, '');
return embeddedModeState;
}
let nestedResult = nestedModeTokenizationSupport.tokenize(embeddedModeLine, embeddedModeState, offsetDelta);
this._tokens = this._tokens.concat(nestedResult.tokens);
this._lastTokenType = null;
this._modeTransitions = this._modeTransitions.concat(nestedResult.modeTransitions);
this._lastModeId = null;
return nestedResult.endState;
}
public finalize(endState: MonarchLineState): modes.ILineTokens {
return {
tokens: this._tokens,
modeTransitions: this._modeTransitions,
endState: endState
};
}
}
export class MonarchTokenizer implements modes.ITokenizationSupport {
private readonly _modeService: IModeService;
......@@ -270,10 +337,18 @@ export class MonarchTokenizer implements modes.ITokenizationSupport {
public tokenize(line: string, _lineState: modes.IState, offsetDelta: number): modes.ILineTokens {
let lineState = (<MonarchLineState>_lineState);
let tokensCollector = new MonarchClassicTokensCollector();
let endLineState = this._tokenize(line, lineState, offsetDelta, tokensCollector);
return tokensCollector.finalize(endLineState);
}
private _tokenize(line: string, lineState: MonarchLineState, offsetDelta: number, collector: IMonarchTokensCollector): MonarchLineState {
if (lineState.embeddedModeData) {
return this._nestedTokenize(line, lineState, offsetDelta, [], []);
return this._nestedTokenize(line, lineState, offsetDelta, collector);
} else {
return this._myTokenize(line, lineState, offsetDelta, [], []);
return this._myTokenize(line, lineState, offsetDelta, collector);
}
}
......@@ -322,55 +397,28 @@ export class MonarchTokenizer implements modes.ITokenizationSupport {
return popOffset;
}
private _safeNestedModeTokenize(embeddedModeLine: string, embeddedModeData: EmbeddedModeData, offsetDelta: number): modes.ILineTokens {
const nestedModeId = embeddedModeData.modeId;
const embeddedModeState = embeddedModeData.state;
const nestedModeTokenizationSupport = modes.TokenizationRegistry.get(nestedModeId);
if (nestedModeTokenizationSupport) {
return nestedModeTokenizationSupport.tokenize(embeddedModeLine, embeddedModeState, offsetDelta);
}
// The nested mode doesn't have tokenization support,
// unfortunatelly this means we have to fake it
return nullTokenize(nestedModeId, embeddedModeLine, embeddedModeState, offsetDelta);
}
private _nestedTokenize(line: string, lineState: MonarchLineState, offsetDelta: number, prependTokens: Token[], prependModeTransitions: ModeTransition[]): modes.ILineTokens {
private _nestedTokenize(line: string, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {
let popOffset = this._findLeavingNestedModeOffset(line, lineState);
if (popOffset === -1) {
// tokenization will not leave nested mode
let nestedModeLineTokens = this._safeNestedModeTokenize(line, lineState.embeddedModeData, offsetDelta);
// Prepend nested mode's result to our result
return {
tokens: prependTokens.concat(nestedModeLineTokens.tokens),
actualStopOffset: nestedModeLineTokens.actualStopOffset,
modeTransitions: prependModeTransitions.concat(nestedModeLineTokens.modeTransitions),
endState: MonarchLineStateFactory.create(lineState.stack, new EmbeddedModeData(lineState.embeddedModeData.modeId, nestedModeLineTokens.endState))
};
let nestedEndState = tokensCollector.nestedModeTokenize(line, lineState.embeddedModeData, offsetDelta);
return MonarchLineStateFactory.create(lineState.stack, new EmbeddedModeData(lineState.embeddedModeData.modeId, nestedEndState));
}
let nestedModeLine = line.substring(0, popOffset);
if (nestedModeLine.length > 0) {
// tokenize with the nested mode
let nestedModeLineTokens = this._safeNestedModeTokenize(nestedModeLine, lineState.embeddedModeData, offsetDelta);
// Prepend nested mode's result to our result
prependTokens = prependTokens.concat(nestedModeLineTokens.tokens);
prependModeTransitions = prependModeTransitions.concat(nestedModeLineTokens.modeTransitions);
tokensCollector.nestedModeTokenize(nestedModeLine, lineState.embeddedModeData, offsetDelta);
}
let restOfTheLine = line.substring(popOffset);
return this._myTokenize(restOfTheLine, lineState, offsetDelta + popOffset, prependTokens, prependModeTransitions);
return this._myTokenize(restOfTheLine, lineState, offsetDelta + popOffset, tokensCollector);
}
private _myTokenize(line: string, lineState: MonarchLineState, offsetDelta: number, prependTokens: Token[], prependModeTransitions: ModeTransition[]): modes.ILineTokens {
if (prependModeTransitions.length === 0 || prependModeTransitions[prependModeTransitions.length - 1].modeId !== this._modeId) {
// Avoid transitioning to the same mode (this can happen in case of empty embedded modes)
prependModeTransitions.push(new ModeTransition(offsetDelta, this._modeId));
}
private _myTokenize(line: string, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {
tokensCollector.enterMode(offsetDelta, this._modeId);
const lineLength = line.length;
......@@ -614,9 +662,7 @@ export class MonarchTokenizer implements modes.ITokenizationSupport {
tokenType = monarchCommon.sanitize(token);
}
if (prependTokens.length === 0 || prependTokens[prependTokens.length - 1].type !== tokenType) {
prependTokens[prependTokens.length] = new Token(pos0 + offsetDelta, tokenType);
}
tokensCollector.emit(pos0 + offsetDelta, tokenType);
}
if (enteringEmbeddedMode) {
......@@ -631,24 +677,14 @@ export class MonarchTokenizer implements modes.ITokenizationSupport {
if (pos < lineLength) {
// there is content from the embedded mode on this line
let restOfLine = line.substr(pos);
return this._nestedTokenize(restOfLine, MonarchLineStateFactory.create(stack, embeddedModeData), offsetDelta + pos, prependTokens, prependModeTransitions);
return this._nestedTokenize(restOfLine, MonarchLineStateFactory.create(stack, embeddedModeData), offsetDelta + pos, tokensCollector);
} else {
return {
tokens: prependTokens,
endState: MonarchLineStateFactory.create(stack, embeddedModeData),
actualStopOffset: offsetDelta + line.length,
modeTransitions: prependModeTransitions
};
return MonarchLineStateFactory.create(stack, embeddedModeData);
}
}
}
return {
tokens: prependTokens,
endState: MonarchLineStateFactory.create(stack, embeddedModeData),
actualStopOffset: offsetDelta + line.length,
modeTransitions: prependModeTransitions
};
return MonarchLineStateFactory.create(stack, embeddedModeData);
}
private _getNestedEmbeddedModeData(mimetypeOrModeId: string): EmbeddedModeData {
......
......@@ -23,14 +23,13 @@ export const NULL_STATE: IState = new NullStateImpl();
export const NULL_MODE_ID = 'vs.editor.nullMode';
export function nullTokenize(modeId: string, buffer: string, state: IState, deltaOffset: number = 0, stopAtOffset?: number): ILineTokens {
export function nullTokenize(modeId: string, buffer: string, state: IState, deltaOffset: number): ILineTokens {
let tokens: Token[] = [new Token(deltaOffset, '')];
let modeTransitions: ModeTransition[] = [new ModeTransition(deltaOffset, modeId)];
return {
tokens: tokens,
actualStopOffset: deltaOffset + buffer.length,
endState: state,
modeTransitions: modeTransitions
};
......
......@@ -14,13 +14,11 @@ export class RawLineTokens implements modes.ILineTokens {
tokens: Token[];
modeTransitions: ModeTransition[];
actualStopOffset: number;
endState: modes.IState;
constructor(tokens: Token[], modeTransitions: ModeTransition[], actualStopOffset: number, endState: modes.IState) {
constructor(tokens: Token[], modeTransitions: ModeTransition[], endState: modes.IState) {
this.tokens = tokens;
this.modeTransitions = modeTransitions;
this.actualStopOffset = actualStopOffset;
this.endState = endState;
}
}
......
......@@ -24,7 +24,7 @@ function _getSafeTokenizationSupport(languageId: string): ITokenizationSupport {
}
return {
getInitialState: () => NULL_STATE,
tokenize: (buffer: string, state: IState, deltaOffset: number = 0, stopAtOffset?: number) => nullTokenize(null, buffer, state, deltaOffset, stopAtOffset)
tokenize: (buffer: string, state: IState, deltaOffset: number) => nullTokenize(null, buffer, state, deltaOffset)
};
}
......@@ -97,7 +97,7 @@ function _tokenizeLines(text: string, tokenizationSupport: ITokenizationSupport,
}
function _tokenizeLine(line: string, tokenizationSupport: ITokenizationSupport, emitToken: IEmitTokenFunc, startState: IState): IState {
var tokenized = tokenizationSupport.tokenize(line, startState),
var tokenized = tokenizationSupport.tokenize(line, startState, 0),
endState = tokenized.endState,
tokens = tokenized.tokens,
offset = 0,
......
......@@ -319,7 +319,7 @@ export class TokenizationSupport2Adapter implements modes.ITokenizationSupport {
return this._actual.getInitialState();
}
public tokenize(line: string, state: modes.IState, offsetDelta: number = 0, stopAtOffset?: number): modes.ILineTokens {
public tokenize(line: string, state: modes.IState, offsetDelta: number): modes.ILineTokens {
let actualResult = this._actual.tokenize(line, state);
let tokens: Token[] = [];
actualResult.tokens.forEach((t) => {
......@@ -342,7 +342,6 @@ export class TokenizationSupport2Adapter implements modes.ITokenizationSupport {
return {
tokens: tokens,
actualStopOffset: offsetDelta + line.length,
endState: endState,
modeTransitions: [new ModeTransition(offsetDelta, this._modeId)],
};
......
......@@ -282,7 +282,7 @@ function createTokenizationSupport(languageRegistration: TMLanguageRegistration,
var tokenizer = new Tokenizer(languageRegistration, modeId, grammar);
return {
getInitialState: () => new TMState(null),
tokenize: (line, state, offsetDelta?, stopAtOffset?) => tokenizer.tokenize(line, <TMState>state, offsetDelta, stopAtOffset)
tokenize: (line, state, offsetDelta) => tokenizer.tokenize(line, <TMState>state, offsetDelta)
};
}
......@@ -454,14 +454,13 @@ class Tokenizer {
this._decodeMap = new DecodeMap(languageRegistration);
}
public tokenize(line: string, state: TMState, offsetDelta: number = 0, stopAtOffset?: number): ILineTokens {
public tokenize(line: string, state: TMState, offsetDelta: number): ILineTokens {
// Do not attempt to tokenize if a line has over 20k
// or if the rule stack contains more than 100 rules (indicator of broken grammar that forgets to pop rules)
if (line.length >= 20000 || depth(state.ruleStack) > 100) {
return new RawLineTokens(
[new Token(offsetDelta, '')],
[new ModeTransition(offsetDelta, this._modeId)],
offsetDelta,
state
);
}
......@@ -517,7 +516,6 @@ export function decodeTextMateTokens(topLevelModeId: string, decodeMap: DecodeMa
return new RawLineTokens(
tokens,
modeTransitions,
offsetDelta + line.length,
resultState
);
}
......
......@@ -39,7 +39,6 @@ suite('Editor Model - Model Modes 1', () => {
calledState.calledFor.push(line.charAt(0));
return {
tokens: [new Token(0, '')],
actualStopOffset: line.length,
endState: state,
modeTransitions: null
};
......@@ -183,7 +182,6 @@ suite('Editor Model - Model Modes 2', () => {
(<ModelState2>state).prevLineContent = line;
return {
tokens: [new Token(0, '')],
actualStopOffset: line.length,
endState: state,
modeTransitions: null
};
......@@ -311,7 +309,6 @@ suite('Editor Model - Token Iterator', () => {
}
return {
tokens: tokens,
actualStopOffset: line.length,
endState: state,
modeTransitions: null
};
......
......@@ -271,11 +271,10 @@ suite('TextModelWithTokens regression tests', () => {
getInitialState: () => {
return new IndicisiveModeState();
},
tokenize: (line, state, offsetDelta, stopAtOffset) => {
tokenize: (line, state, offsetDelta) => {
let myId = ++_tokenId;
return {
tokens: [new Token(0, 'custom.' + myId)],
actualStopOffset: line.length,
endState: state,
modeTransitions: []
};
......
......@@ -73,7 +73,6 @@ class Mode extends MockMode {
}
return {
tokens: tokens,
actualStopOffset: -1,
endState: null,
modeTransitions: null
};
......
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
'use strict';
import * as assert from 'assert';
import * as modes from 'vs/editor/common/modes';
export interface ITestToken {
startIndex: number;
type: string;
}
export interface ITestItem {
line: string;
tokens: ITestToken[];
}
export function assertTokenization(tokenizationSupport: modes.ITokenizationSupport, tests: ITestItem[]): void {
var state = tokenizationSupport.getInitialState();
for (var i = 0, len = tests.length; i < len; i++) {
assert.ok(true, tests[i].line);
var result = tokenizationSupport.tokenize(tests[i].line, state);
if (tests[i].tokens) {
assert.deepEqual(result.tokens, tests[i].tokens, JSON.stringify(result.tokens, null, '\t'));
}
state = result.endState;
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册