Semantic tokenization

上级 308ff31f
......@@ -112,47 +112,5 @@ export function register(
selector: vscode.DocumentSelector,
client: ITypeScriptServiceClient
) {
const provider = new SemanticColoringProvider(client);
const run = async () => {
const ed = vscode.window.activeTextEditor;
if (!ed) {
return;
}
// const doc = ed.document;
const cancellationTokenSource = new vscode.CancellationTokenSource();
provider.provideSemanticColoring(ed.document, cancellationTokenSource.token);
// const file = client.toOpenedFilePath(doc);
// if (!file) {
// return;
// }
// const args: ExperimentalProtocol.EncodedSemanticClassificationsRequestArgs = {
// file: file,
// start: 0,
// length: doc.getText().length,
// };
// const response = await client.execute('encodedSemanticClassifications-full', args, cancellationTokenSource.token);
// if (response.type !== 'response') {
// return;
// }
// if (!response.body) {
// return;
// }
// console.log(response.body);
};
vscode.window.onDidChangeActiveTextEditor(run);
run();
console.log(`I am running...`);
// return vscode.Disposable.from();
// return vscode.languages.registerRenameProvider(selector,
// new TypeScriptRenameProvider(client, fileConfigurationManager));
return vscode.languages.registerSemanticColoringProvider(selector, provider);
// return vscode.languages.registerSemanticColoringProvider(selector, )
return vscode.languages.registerSemanticColoringProvider(selector, new SemanticColoringProvider(client));
}
......@@ -67,6 +67,11 @@ export class LineTokens implements IViewLineTokens {
return 0;
}
public getMetadata(tokenIndex: number): number {
const metadata = this._tokens[(tokenIndex << 1) + 1];
return metadata;
}
public getLanguageId(tokenIndex: number): LanguageId {
const metadata = this._tokens[(tokenIndex << 1) + 1];
return TokenMetadata.getLanguageId(metadata);
......@@ -132,8 +137,8 @@ export class LineTokens implements IViewLineTokens {
while (low < high) {
let mid = low + Math.floor((high - low) / 2);
let endOffset = tokens[(mid << 1)];
const mid = low + Math.floor((high - low) / 2);
const endOffset = tokens[(mid << 1)];
if (endOffset === desiredIndex) {
return mid + 1;
......
......@@ -14,7 +14,7 @@ import { IModelContentChange, IModelContentChangedEvent, IModelDecorationsChange
import { SearchData } from 'vs/editor/common/model/textModelSearch';
import { LanguageId, LanguageIdentifier, FormattingOptions } from 'vs/editor/common/modes';
import { ThemeColor } from 'vs/platform/theme/common/themeService';
import { MultilineTokens } from 'vs/editor/common/model/tokensStore';
import { MultilineTokens, MultilineTokens2 } from 'vs/editor/common/model/tokensStore';
/**
* Vertical Lane in the overview ruler of the editor.
......@@ -791,6 +791,11 @@ export interface ITextModel {
*/
setTokens(tokens: MultilineTokens[]): void;
/**
* @internal
*/
setSemanticTokens(tokens: MultilineTokens2[]): void;
/**
* Flush all tokenization state.
* @internal
......
......@@ -32,7 +32,7 @@ import { BracketsUtils, RichEditBracket, RichEditBrackets } from 'vs/editor/comm
import { ITheme, ThemeColor } from 'vs/platform/theme/common/themeService';
import { withUndefinedAsNull } from 'vs/base/common/types';
import { VSBufferReadableStream, VSBuffer } from 'vs/base/common/buffer';
import { TokensStore, MultilineTokens, countEOL } from 'vs/editor/common/model/tokensStore';
import { TokensStore, MultilineTokens, countEOL, MultilineTokens2, TokensStore2 } from 'vs/editor/common/model/tokensStore';
import { Color } from 'vs/base/common/color';
function createTextBufferBuilder() {
......@@ -276,7 +276,7 @@ export class TextModel extends Disposable implements model.ITextModel {
private _languageIdentifier: LanguageIdentifier;
private readonly _languageRegistryListener: IDisposable;
private readonly _tokens: TokensStore;
private readonly _tokens2: TokensStore;
private readonly _tokens2: TokensStore2;
private readonly _tokenization: TextModelTokenization;
//#endregion
......@@ -340,7 +340,7 @@ export class TextModel extends Disposable implements model.ITextModel {
this._trimAutoWhitespaceLines = null;
this._tokens = new TokensStore();
this._tokens2 = new TokensStore();
this._tokens2 = new TokensStore2();
this._tokenization = new TextModelTokenization(this);
}
......@@ -1721,8 +1721,14 @@ export class TextModel extends Disposable implements model.ITextModel {
});
}
public setSemanticTokens(tokens: MultilineTokens[]): void {
public setSemanticTokens(tokens: MultilineTokens2[]): void {
this._tokens2.set(tokens);
// TODO@semantic: could we reduce the event here?
this._emitModelTokensChangedEvent({
tokenizationSupportChanged: false,
ranges: [{ fromLineNumber: 1, toLineNumber: this.getLineCount() }]
});
}
public tokenizeViewport(startLineNumber: number, endLineNumber: number): void {
......@@ -1785,7 +1791,8 @@ export class TextModel extends Disposable implements model.ITextModel {
private _getLineTokens(lineNumber: number): LineTokens {
const lineText = this.getLineContent(lineNumber);
return this._tokens.getTokens(this._languageIdentifier.id, lineNumber - 1, lineText);
const syntacticTokens = this._tokens.getTokens(this._languageIdentifier.id, lineNumber - 1, lineText);
return this._tokens2.addSemanticTokens(lineNumber, syntacticTokens);
}
public getLanguageIdentifier(): LanguageIdentifier {
......
......@@ -109,6 +109,205 @@ export class MultilineTokensBuilder {
}
}
export interface IEncodedTokens {
empty(): IEncodedTokens;
getTokenCount(): number;
getDeltaLine(tokenIndex: number): number;
getMaxDeltaLine(): number;
getStartCharacter(tokenIndex: number): number;
getEndCharacter(tokenIndex: number): number;
getMetadata(tokenIndex: number): number;
delete(startDeltaLine: number, startCharacter: number, endDeltaLine: number, endCharacter: number): IEncodedTokens;
}
export class SparseEncodedTokens implements IEncodedTokens {
/**
* The encoding of tokens is:
* 4*i deltaLine (from `startLineNumber`)
* 4*i+1 startCharacter (from the line start)
* 4*i+2 endCharacter (from the line start)
* 4*i+3 metadata
*/
private tokens: Uint32Array;
constructor(tokens: Uint32Array) {
this.tokens = tokens;
}
public empty(): IEncodedTokens {
return new SparseEncodedTokens(new Uint32Array(0));
}
public delete(startDeltaLine: number, startCharacter: number, endDeltaLine: number, endCharacter: number): IEncodedTokens {
throw new Error(`Not implemented`); // TODO@semantic
}
public getMaxDeltaLine(): number {
const tokenCount = this.getTokenCount();
if (tokenCount === 0) {
return -1;
}
return this.getDeltaLine(tokenCount - 1);
}
public getTokenCount(): number {
return this.tokens.length / 4;
}
public getDeltaLine(tokenIndex: number): number {
return this.tokens[4 * tokenIndex];
}
public getStartCharacter(tokenIndex: number): number {
return this.tokens[4 * tokenIndex + 1];
}
public getEndCharacter(tokenIndex: number): number {
return this.tokens[4 * tokenIndex + 2];
}
public getMetadata(tokenIndex: number): number {
return this.tokens[4 * tokenIndex + 3];
}
}
export class LineTokens2 {
private readonly _actual: IEncodedTokens;
private readonly _startTokenIndex: number;
private readonly _endTokenIndex: number;
constructor(actual: IEncodedTokens, startTokenIndex: number, endTokenIndex: number) {
this._actual = actual;
this._startTokenIndex = startTokenIndex;
this._endTokenIndex = endTokenIndex;
}
public getCount(): number {
return this._endTokenIndex - this._startTokenIndex + 1;
}
public getStartCharacter(tokenIndex: number): number {
return this._actual.getStartCharacter(this._startTokenIndex + tokenIndex);
}
public getEndCharacter(tokenIndex: number): number {
return this._actual.getEndCharacter(this._startTokenIndex + tokenIndex);
}
public getMetadata(tokenIndex: number): number {
return this._actual.getMetadata(this._startTokenIndex + tokenIndex);
}
}
export class MultilineTokens2 {
public startLineNumber: number;
public endLineNumber: number;
public tokens: IEncodedTokens;
constructor(startLineNumber: number, tokens: IEncodedTokens) {
this.startLineNumber = startLineNumber;
this.tokens = tokens;
this.endLineNumber = this.startLineNumber + this.tokens.getMaxDeltaLine();
}
private _setTokens(tokens: IEncodedTokens): void {
this.tokens = tokens;
this.endLineNumber = this.startLineNumber + this.tokens.getMaxDeltaLine();
}
public getLineTokens(lineNumber: number): LineTokens2 | null {
if (this.startLineNumber <= lineNumber && lineNumber <= this.endLineNumber) {
const findResult = MultilineTokens2._findTokensWithLine(this.tokens, lineNumber - this.startLineNumber);
if (findResult) {
const [startTokenIndex, endTokenIndex] = findResult;
return new LineTokens2(this.tokens, startTokenIndex, endTokenIndex);
}
}
return null;
}
private static _findTokensWithLine(tokens: IEncodedTokens, deltaLine: number): [number, number] | null {
let low = 0;
let high = tokens.getTokenCount() - 1;
while (low < high) {
const mid = low + Math.floor((high - low) / 2);
const midDeltaLine = tokens.getDeltaLine(mid);
if (midDeltaLine < deltaLine) {
low = mid + 1;
} else if (midDeltaLine > deltaLine) {
high = mid - 1;
} else {
let min = mid;
while (min > low && tokens.getDeltaLine(min - 1) === deltaLine) {
min--;
}
let max = mid;
while (max < high && tokens.getDeltaLine(max + 1) === deltaLine) {
max++;
}
return [min, max];
}
}
if (tokens.getDeltaLine(low) === deltaLine) {
return [low, low];
}
return null;
}
public applyEdit(range: IRange, text: string): void {
// const [eolCount, firstLineLength] = countEOL(text);
this._acceptDeleteRange(range);
// this._acceptInsertText(new Position(range.startLineNumber, range.startColumn), eolCount, firstLineLength);
}
private _acceptDeleteRange(range: IRange): void {
if (range.startLineNumber === range.endLineNumber && range.startColumn === range.endColumn) {
// Nothing to delete
return;
}
const firstLineIndex = range.startLineNumber - this.startLineNumber;
const lastLineIndex = range.endLineNumber - this.startLineNumber;
if (lastLineIndex < 0) {
// this deletion occurs entirely before this block, so we only need to adjust line numbers
const deletedLinesCount = lastLineIndex - firstLineIndex;
this.startLineNumber -= deletedLinesCount;
return;
}
const tokenMaxDeltaLine = this.tokens.getMaxDeltaLine();
if (firstLineIndex >= tokenMaxDeltaLine + 1) {
// this deletion occurs entirely after this block, so there is nothing to do
return;
}
if (firstLineIndex < 0 && lastLineIndex >= tokenMaxDeltaLine + 1) {
// this deletion completely encompasses this block
this.startLineNumber = 0;
this._setTokens(this.tokens.empty());
return;
}
if (firstLineIndex < 0) {
const deletedBefore = -firstLineIndex;
this.startLineNumber -= deletedBefore;
this._setTokens(this.tokens.delete(0, 0, lastLineIndex, range.endColumn - 1));
} else {
this._setTokens(this.tokens.delete(firstLineIndex, range.startColumn - 1, lastLineIndex, range.endColumn - 1));
}
}
}
export class MultilineTokens {
public startLineNumber: number;
......@@ -193,6 +392,7 @@ export class MultilineTokens {
// this deletion completely encompasses this block
this.startLineNumber = 0;
this.tokens = [];
return;
}
if (firstLineIndex === lastLineIndex) {
......@@ -289,6 +489,130 @@ function toUint32Array(arr: Uint32Array | ArrayBuffer): Uint32Array {
}
}
export class TokensStore2 {
private _pieces: MultilineTokens2[];
constructor() {
this._pieces = [];
}
public flush(): void {
this._pieces = [];
}
public set(pieces: MultilineTokens2[]) {
this._pieces = pieces;
}
public addSemanticTokens(lineNumber: number, aTokens: LineTokens): LineTokens {
const pieces = this._pieces;
if (pieces.length === 0) {
return aTokens;
}
const pieceIndex = TokensStore2._findFirstPieceWithLine(pieces, lineNumber);
const bTokens = this._pieces[pieceIndex].getLineTokens(lineNumber);
if (!bTokens) {
return aTokens;
}
const aLen = aTokens.getCount();
const bLen = bTokens.getCount();
let aIndex = 0;
let result: number[] = [], resultLen = 0;
for (let bIndex = 0; bIndex < bLen; bIndex++) {
const bStartCharacter = bTokens.getStartCharacter(bIndex);
const bEndCharacter = bTokens.getEndCharacter(bIndex);
const bMetadata = bTokens.getMetadata(bIndex); // TODO@semantic: should use languageId from aTokens :/ :/ :/
// push any token from `a` that is before `b`
while (aIndex < aLen && aTokens.getEndOffset(aIndex) <= bStartCharacter) {
result[resultLen++] = aTokens.getEndOffset(aIndex);
result[resultLen++] = aTokens.getMetadata(aIndex);
aIndex++;
}
// push the token from `a` if it intersects the token from `b`
if (aIndex < aLen && aTokens.getStartOffset(aIndex) < bStartCharacter) {
result[resultLen++] = bStartCharacter;
result[resultLen++] = aTokens.getMetadata(aIndex);
}
// skip any tokens from `a` that are contained inside `b`
while (aIndex < aLen && aTokens.getEndOffset(aIndex) <= bEndCharacter) {
aIndex++;
}
// push the token from `b`
result[resultLen++] = bEndCharacter;
result[resultLen++] = bMetadata;
}
// push the remaining tokens from `a`
while (aIndex < aLen) {
result[resultLen++] = aTokens.getEndOffset(aIndex);
result[resultLen++] = aTokens.getMetadata(aIndex);
aIndex++;
}
return new LineTokens(new Uint32Array(result), aTokens.getLineContent());
console.log(result);
// let currentMetadata = 0;
// const len = pieces.length;
// while (pieceIndex < len) {
// }
console.log(`addSemanticTokens for ${lineNumber}`);
console.log(bTokens);
// console.log(`pieceIndex: ${pieceIndex}`);
return aTokens;
}
// private static _findLine(piece: Multiline)
private static _findFirstPieceWithLine(pieces: MultilineTokens2[], lineNumber: number): number {
let low = 0;
let high = pieces.length - 1;
while (low < high) {
let mid = low + Math.floor((high - low) / 2);
if (pieces[mid].endLineNumber < lineNumber) {
low = mid + 1;
} else if (pieces[mid].startLineNumber > lineNumber) {
high = mid - 1;
} else {
while (mid > low && pieces[mid - 1].startLineNumber <= lineNumber && lineNumber <= pieces[mid - 1].endLineNumber) {
mid--;
}
return mid;
}
}
return low;
}
//#region Editing
public acceptEdit(range: IRange, eolCount: number, firstLineLength: number): void {
console.log(`TODO@semantic --> acceptEdit !!!!`);
// this._acceptDeleteRange(range);
// this._acceptInsertText(new Position(range.startLineNumber, range.startColumn), eolCount, firstLineLength);
}
//#endregion
}
export class TokensStore {
private _lineTokens: (Uint32Array | ArrayBuffer | null)[];
private _len: number;
......
......@@ -22,6 +22,7 @@ import { ITextResourcePropertiesService } from 'vs/editor/common/services/resour
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { RunOnceScheduler } from 'vs/base/common/async';
import { CancellationTokenSource } from 'vs/base/common/cancellation';
import { SparseEncodedTokens, MultilineTokens2 } from 'vs/editor/common/model/tokensStore';
function MODEL_ID(resource: URI): string {
return resource.toString();
......@@ -524,17 +525,21 @@ class ModelSemanticColoring extends Disposable {
}
this._currentResponse = tokens;
// TODO@semantic: diff here and reduce to only really needed tokens...
// TODO@semantic: might also be a good idea to split areas... ?
if (this._currentResponse) {
const result: MultilineTokens2[] = [];
for (const area of this._currentResponse.areas) {
const tokenCount = area.data.length / 5;
const tokens = new Uint32Array(2 * tokenCount);
const srcTokens = area.data;
const tokenCount = srcTokens.length / 5;
const destTokens = new Uint32Array(4 * tokenCount);
for (let i = 0; i < tokenCount; i++) {
const offset = 5 * i;
const deltaLine = area.data[offset];
const startCharacter = area.data[offset + 1];
const endCharacter = area.data[offset + 2];
const tokenType = area.data[offset + 3];
const tokenModifiers = area.data[offset + 4];
const srcOffset = 5 * i;
const deltaLine = srcTokens[srcOffset];
const startCharacter = srcTokens[srcOffset + 1];
const endCharacter = srcTokens[srcOffset + 2];
// const tokenType = srcTokens[srcOffset + 3];
// const tokenModifiers = srcTokens[srcOffset + 4];
// TODO@semantic: map here tokenType and tokenModifiers to metadata
const fontStyle = FontStyle.Italic | FontStyle.Bold | FontStyle.Underline;
const foregroundColorId = 3;
......@@ -542,12 +547,21 @@ class ModelSemanticColoring extends Disposable {
(fontStyle << MetadataConsts.FONT_STYLE_OFFSET)
| (foregroundColorId << MetadataConsts.FOREGROUND_OFFSET)
) >>> 0;
// tokens[2 * i] =
const destOffset = 4 * i;
destTokens[destOffset] = deltaLine;
destTokens[destOffset + 1] = startCharacter;
destTokens[destOffset + 2] = endCharacter;
destTokens[destOffset + 3] = metadata;
}
const tokens = new MultilineTokens2(area.line, new SparseEncodedTokens(destTokens));
result.push(tokens);
}
console.log(`_setSemanticTokens`, tokens);
// Convert this into editor friendly tokens
// for (let )
this._model.setSemanticTokens(result);
} else {
// TODO@semantic: should we clear semantic tokens on the text model here?
}
}
......
......@@ -27,7 +27,7 @@ import { ExtensionIdentifier, IExtensionDescription } from 'vs/platform/extensio
import { IURITransformer } from 'vs/base/common/uriIpc';
import { DisposableStore, dispose } from 'vs/base/common/lifecycle';
import { VSBuffer } from 'vs/base/common/buffer';
import { encodeSemanticTokensDto, ISemanticTokensFullAreaDto, ISemanticTokensDto } from 'vs/workbench/api/common/shared/semanticTokens';
import { encodeSemanticTokensDto, ISemanticTokensDto } from 'vs/workbench/api/common/shared/semanticTokens';
// --- adapter
......@@ -665,8 +665,8 @@ class SemanticColoringAdapter {
return r;
}
releaseSemanticColoring(semanticColoringResultId: number): Promise<void> {
}
// releaseSemanticColoring(semanticColoringResultId: number): Promise<void> {
// }
// provideSignatureHelp(resource: URI, token: CancellationToken): Promise<extHostProtocol.ISignatureHelpDto | undefined> {
// const doc = this._documents.getDocument(resource);
......
......@@ -64,10 +64,21 @@ export function decodeSemanticTokensDto(buff: VSBuffer): ISemanticTokensDto {
function encodeArea(area: ISemanticTokensFullAreaDto | ISemanticTokensDeltaAreaDto, buff: VSBuffer, offset: number): number {
buff.writeUInt8(area.type === 'full' ? EncodedSemanticTokensAreaType.Full : EncodedSemanticTokensAreaType.Delta, offset); offset += 1;
buff.writeUInt32BE(area.line, offset); offset += 4;
buff.writeUInt32BE(area.line + 1, offset); offset += 4;
if (area.type === 'full') {
buff.writeUInt32BE(area.data.byteLength, offset); offset += 4;
buff.set(VSBuffer.wrap(area.data), offset); offset += area.data.byteLength;
const tokens = area.data;
const tokenCount = (tokens.length / 5) | 0;
buff.writeUInt32BE(tokenCount, offset); offset += 4;
// here we are explicitly iterating an writing the ints again to ensure writing the desired endianness.
for (let i = 0; i < tokenCount; i++) {
const tokenOffset = 5 * i;
buff.writeUInt32BE(tokens[tokenOffset], offset); offset += 4;
buff.writeUInt32BE(tokens[tokenOffset + 1], offset); offset += 4;
buff.writeUInt32BE(tokens[tokenOffset + 2], offset); offset += 4;
buff.writeUInt32BE(tokens[tokenOffset + 3], offset); offset += 4;
buff.writeUInt32BE(tokens[tokenOffset + 4], offset); offset += 4;
}
// buff.set(VSBuffer.wrap(uint8), offset); offset += area.data.byteLength;
} else {
buff.writeUInt32BE(area.oldIndex, offset); offset += 4;
}
......@@ -79,8 +90,10 @@ function encodedAreaSize(area: ISemanticTokensFullAreaDto | ISemanticTokensDelta
result += 1; // type
result += 4; // line
if (area.type === 'full') {
result += 4; // data byte length
result += area.data.byteLength;
const tokens = area.data;
const tokenCount = (tokens.length / 5) | 0;
result += 4; // token count
result += tokenCount * 5 * 4;
return result;
} else {
result += 4; // old index
......@@ -92,15 +105,21 @@ function decodeArea(buff: VSBuffer, offset: number, areas: (ISemanticTokensFullA
const type: EncodedSemanticTokensAreaType = buff.readUInt8(offset); offset += 1;
const line = buff.readUInt32BE(offset); offset += 4;
if (type === EncodedSemanticTokensAreaType.Full) {
const dataByteLength = buff.readUInt32BE(offset); offset += 4;
const data = buff.slice(offset, offset + dataByteLength); offset += dataByteLength;
const buffer = data.buffer;
const bufferByteOffset = buffer.byteOffset;
const bufferByteLength = buffer.byteLength;
// here we are explicitly iterating and reading the ints again to ensure reading the desired endianness.
const tokenCount = buff.readUInt32BE(offset); offset += 4;
const data = new Uint32Array(5 * tokenCount);
for (let i = 0; i < tokenCount; i++) {
const destOffset = 5 * i;
data[destOffset] = buff.readUInt32BE(offset); offset += 4;
data[destOffset + 1] = buff.readUInt32BE(offset); offset += 4;
data[destOffset + 2] = buff.readUInt32BE(offset); offset += 4;
data[destOffset + 3] = buff.readUInt32BE(offset); offset += 4;
data[destOffset + 4] = buff.readUInt32BE(offset); offset += 4;
}
areas.push({
type: 'full',
line: line,
data: new Uint32Array(buffer, bufferByteOffset, bufferByteLength / 4)
data: data
});
return offset;
} else {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册