提交 3f1dd9a5 编写于 作者: A Alex Dima

Add ModelBuilder

上级 6f8bd003
......@@ -592,7 +592,7 @@ export class TextModel extends OrderGuaranteeEventEmitter implements editorCommo
carriageReturnCnt++;
}
// Split the text into liens
// Split the text into lines
var lines = rawText.split(/\r\n|\r|\n/);
// Remove the BOM (if present)
......
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
'use strict';
import {IDisposable} from 'vs/base/common/lifecycle';
import * as crypto from 'crypto';
import {DefaultEndOfLine, ITextModelCreationOptions, ITextModelResolvedOptions, IRawText} from 'vs/editor/common/editorCommon';
import * as strings from 'vs/base/common/strings';
import {guessIndentation} from 'vs/editor/common/model/indentationGuesser';
export interface IStringStream {
onData(listener: (chunk:string)=>void): IDisposable;
onEnd(listener: ()=>void): IDisposable;
}
export class ModelBuilderResult {
rawText: IRawText;
hash: string;
}
class ModelLineBasedBuilder {
private hash: crypto.Hash;
private BOM: string;
private lines: string[];
private currLineIndex: number;
constructor() {
this.hash = crypto.createHash('sha1');
this.BOM = '';
this.lines = [];
this.currLineIndex = 0;
}
public acceptLines(lines:string[], linesLength:number): void {
if (this.currLineIndex === 0) {
// Remove the BOM (if present)
if (strings.startsWithUTF8BOM(lines[0])) {
this.BOM = strings.UTF8_BOM_CHARACTER;
lines[0] = lines[0].substr(1);
}
}
for (let i = 0; i < linesLength; i++) {
this.lines[this.currLineIndex++] = lines[i];
this.hash.update(lines[i] + '\n');
}
}
public finish(totalLength:number, carriageReturnCnt:number, opts:ITextModelCreationOptions): ModelBuilderResult {
let lineFeedCnt = this.lines.length - 1;
let EOL = '';
if (lineFeedCnt === 0) {
// This is an empty file or a file with precisely one line
EOL = (opts.defaultEOL === DefaultEndOfLine.LF ? '\n' : '\r\n');
} else if (carriageReturnCnt > lineFeedCnt / 2) {
// More than half of the file contains \r\n ending lines
EOL = '\r\n';
} else {
// At least one line more ends in \n
EOL = '\n';
}
let resolvedOpts: ITextModelResolvedOptions;
if (opts.detectIndentation) {
let guessedIndentation = guessIndentation(this.lines, opts.tabSize, opts.insertSpaces);
resolvedOpts = {
tabSize: guessedIndentation.tabSize,
insertSpaces: guessedIndentation.insertSpaces,
trimAutoWhitespace: opts.trimAutoWhitespace,
defaultEOL: opts.defaultEOL
};
} else {
resolvedOpts = {
tabSize: opts.tabSize,
insertSpaces: opts.insertSpaces,
trimAutoWhitespace: opts.trimAutoWhitespace,
defaultEOL: opts.defaultEOL
};
}
return {
rawText: {
BOM: this.BOM,
EOL: EOL,
lines: this.lines,
length: totalLength,
options: resolvedOpts
},
hash: this.hash.digest('hex')
};
}
}
export function computeHash(rawText:IRawText): string {
let hash = crypto.createHash('sha1');
for (let i = 0, len = rawText.lines.length; i < len; i++) {
hash.update(rawText.lines[i] + '\n');
}
return hash.digest('hex');
}
export class ModelBuilder {
private leftoverPrevChunk: string;
private leftoverEndsInCR: boolean;
private totalCRCount: number;
private lineBasedBuilder: ModelLineBasedBuilder;
private totalLength: number;
constructor() {
this.leftoverPrevChunk = '';
this.leftoverEndsInCR = false;
this.totalCRCount = 0;
this.lineBasedBuilder = new ModelLineBasedBuilder();
this.totalLength = 0;
}
private _updateCRCount(chunk:string): void {
// Count how many \r are present in chunk to determine the majority EOL sequence
let chunkCarriageReturnCnt = 0;
let lastCarriageReturnIndex = -1;
while ((lastCarriageReturnIndex = chunk.indexOf('\r', lastCarriageReturnIndex + 1)) !== -1) {
chunkCarriageReturnCnt++;
}
this.totalCRCount += chunkCarriageReturnCnt;
}
public acceptChunk(chunk:string): void {
if (chunk.length === 0) {
return;
}
this.totalLength += chunk.length;
this._updateCRCount(chunk);
// Avoid dealing with a chunk that ends in \r (push the \r to the next chunk)
if (this.leftoverEndsInCR) {
chunk = '\r' + chunk;
}
if (chunk.charCodeAt(chunk.length - 1) === 13 /*\r*/) {
this.leftoverEndsInCR = true;
chunk = chunk.substr(0, chunk.length - 1);
} else {
this.leftoverEndsInCR = false;
}
let lines = chunk.split(/\r\n|\r|\n/);
if (lines.length === 1) {
// no \r or \n encountered
this.leftoverPrevChunk += lines[0];
return;
}
lines[0] = this.leftoverPrevChunk + lines[0];
this.lineBasedBuilder.acceptLines(lines, lines.length - 1);
this.leftoverPrevChunk = lines[lines.length - 1];
}
public finish(opts:ITextModelCreationOptions): ModelBuilderResult {
let finalLines = [this.leftoverPrevChunk];
if (this.leftoverEndsInCR) {
finalLines.push('');
}
this.lineBasedBuilder.acceptLines(finalLines, finalLines.length);
return this.lineBasedBuilder.finish(this.totalLength, this.totalCRCount, opts);
}
}
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
'use strict';
import * as assert from 'assert';
import {ModelBuilder, computeHash} from 'vs/editor/node/model/modelBuilder';
import {ITextModelCreationOptions, IRawText} from 'vs/editor/common/editorCommon';
import {TextModel} from 'vs/editor/common/model/textModel';
import * as strings from 'vs/base/common/strings';
export function testModelBuilder(chunks:string[], opts:ITextModelCreationOptions = TextModel.DEFAULT_CREATION_OPTIONS): string {
let expectedRawText = TextModel.toRawText(chunks.join(''), opts);
let expectedHash = computeHash(expectedRawText);
let builder = new ModelBuilder();
for (let i = 0, len = chunks.length; i < len; i++) {
builder.acceptChunk(chunks[i]);
}
let actual = builder.finish(opts);
assert.deepEqual({
rawText: expectedRawText,
hash: expectedHash
}, actual);
return expectedHash;
}
function toRawText(lines:string[]): IRawText {
return {
BOM: '',
lines: lines,
EOL: '\n',
length: 0,
options: null
};
}
export function testDifferentHash(lines1:string[], lines2:string[]): void {
let hash1 = computeHash(toRawText(lines1));
let hash2 = computeHash(toRawText(lines2));
assert.notEqual(hash1, hash2);
}
suite('ModelBuilder', () => {
test('uses sha1', () => {
// These are the sha1s of the string + \n
assert.equal(computeHash(toRawText([''])), 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc');
assert.equal(computeHash(toRawText(['hello world'])), '22596363b3de40b06f981fb85d82312e8c0ed511');
});
test('no chunks', () => {
testModelBuilder([]);
});
test('single empty chunk', () => {
testModelBuilder(['']);
});
test('single line in one chunk', () => {
testModelBuilder(['Hello world']);
});
test('single line in multiple chunks', () => {
testModelBuilder(['Hello', ' ', 'world']);
});
test('two lines in single chunk', () => {
testModelBuilder(['Hello world\nHow are you?']);
});
test('two lines in multiple chunks 1', () => {
testModelBuilder(['Hello worl', 'd\nHow are you?']);
});
test('two lines in multiple chunks 2', () => {
testModelBuilder(['Hello worl', 'd' , '\n', 'H', 'ow are you?']);
});
test('two lines in multiple chunks 3', () => {
testModelBuilder(['Hello worl', 'd' , '\nHow are you?']);
});
test('multiple lines in single chunks', () => {
testModelBuilder(['Hello world\nHow are you?\nIs everything good today?\nDo you enjoy the weather?']);
});
test('multiple lines in multiple chunks 1', () => {
testModelBuilder(['Hello world\nHow are you', '?\nIs everything good today?\nDo you enjoy the weather?']);
});
test('multiple lines in multiple chunks 1', () => {
testModelBuilder(['Hello world', '\nHow are you', '?\nIs everything good today?', '\nDo you enjoy the weather?']);
});
test('multiple lines in multiple chunks 1', () => {
testModelBuilder(['Hello world\n', 'How are you', '?\nIs everything good today?', '\nDo you enjoy the weather?']);
});
test('carriage return detection (1 \r\n 2 \n)', () => {
testModelBuilder(['Hello world\r\n', 'How are you', '?\nIs everything good today?', '\nDo you enjoy the weather?']);
});
test('carriage return detection (2 \r\n 1 \n)', () => {
testModelBuilder(['Hello world\r\n', 'How are you', '?\r\nIs everything good today?', '\nDo you enjoy the weather?']);
});
test('carriage return detection (3 \r\n 0 \n)', () => {
testModelBuilder(['Hello world\r\n', 'How are you', '?\r\nIs everything good today?', '\r\nDo you enjoy the weather?']);
});
test('carriage return detection (isolated \r)', () => {
testModelBuilder(['Hello world', '\r', '\n', 'How are you', '?', '\r', '\n', 'Is everything good today?', '\r', '\n', 'Do you enjoy the weather?']);
});
test('BOM handling', () => {
testModelBuilder([strings.UTF8_BOM_CHARACTER + 'Hello world!']);
});
});
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
'use strict';
import {testModelBuilder, testDifferentHash} from 'vs/editor/test/node/model/modelBuilder.test';
const GENERATE_TESTS = false;
suite('ModelBuilder Auto Tests', () => {
test('auto1', () => {
testModelBuilder(['sarjniow','\r','\nbpb','ofb','\njzldgxx','\r\nkzwfjysng']);
});
test('auto2', () => {
testModelBuilder(['i','yyernubi\r\niimgn\n','ut\r']);
});
test('auto3', () => {
testDifferentHash([''], ['', '', '']);
});
});
function getRandomInt(min: number, max: number): number {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
function getRandomEOLSequence(): string {
let rnd = getRandomInt(1, 3);
if (rnd === 1) {
return '\n';
}
if (rnd === 2) {
return '\r';
}
if (rnd === 3) {
return '\r\n';
}
}
function getRandomString(minLength: number, maxLength: number): string {
let length = getRandomInt(minLength, maxLength);
let r = '';
for (let i = 0; i < length; i++) {
r += String.fromCharCode(getRandomInt('a'.charCodeAt(0), 'z'.charCodeAt(0)));
}
return r;
}
function generateRandomFile(): string {
let lineCount = getRandomInt(1, 10);
let mixedEOLSequence = getRandomInt(1, 2) === 1 ? true : false;
let fixedEOL = getRandomEOLSequence();
let lines: string[] = [];
for (let i = 0; i < lineCount; i++) {
if (i !== 0) {
if (mixedEOLSequence) {
lines.push(getRandomEOLSequence());
} else {
lines.push(fixedEOL);
}
}
lines.push(getRandomString(0, 10));
}
return lines.join('');
}
function generateRandomChunks(file:string): string[] {
let result:string[] = [];
let cnt = getRandomInt(1, 20);
let maxOffset = file.length;
while (cnt > 0 && maxOffset > 0) {
let offset = getRandomInt(0, maxOffset);
result.unshift(file.substring(offset, maxOffset));
// let length = getRandomInt(0, maxOffset - offset);
// let text = generateFile(true);
// result.push({
// offset: offset,
// length: length,
// text: text
// });
maxOffset = offset;
cnt--;
}
if (maxOffset !== 0) {
result.unshift(file.substring(0, maxOffset));
}
return result;
}
let HASH_TO_CONTENT: {[hash:string]:string;} = {};
function testRandomFile(file:string): boolean {
let tests = getRandomInt(5, 10);
for (let i = 0; i < tests; i++) {
let chunks = generateRandomChunks(file);
try {
let hash = testModelBuilder(chunks);
let logicalContent = JSON.stringify(file.split(/\r\n|\r|\n/));
if (HASH_TO_CONTENT.hasOwnProperty(hash)) {
let prevLogicalContent = HASH_TO_CONTENT[hash];
if (prevLogicalContent !== logicalContent) {
console.log('HASH COLLISION: ');
console.log(prevLogicalContent);
console.log(logicalContent);
return false;
}
} else {
HASH_TO_CONTENT[hash] = logicalContent;
}
} catch(err) {
console.log(err);
console.log(JSON.stringify(chunks));
return false;
}
}
return true;
}
if (GENERATE_TESTS) {
let number = 1;
while(true) {
console.log('------BEGIN NEW TEST: ' + number);
if (!testRandomFile(generateRandomFile())) {
break;
}
console.log('------END NEW TEST: ' + (number++));
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册