Add ModelBuilder

3f1dd9a5 · Alex Dima · 6f8bd003 · 3f1dd9a5 · 3f1dd9a5 · 3f1dd9a5
4 changed file
--- a/src/vs/editor/common/model/textModel.ts
+++ b/src/vs/editor/common/model/textModel.ts
@@ -592,7 +592,7 @@ export class TextModel extends OrderGuaranteeEventEmitter implements editorCommo
 			carriageReturnCnt++;
 		}

-		// Split the text into liens
+		// Split the text into lines
 		var lines = rawText.split(/\r\n|\r|\n/);

 		// Remove the BOM (if present)

--- a/src/vs/editor/node/model/modelBuilder.ts
+++ b/src/vs/editor/node/model/modelBuilder.ts
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+'use strict';
+
+import {IDisposable} from 'vs/base/common/lifecycle';
+import * as crypto from 'crypto';
+import {DefaultEndOfLine, ITextModelCreationOptions, ITextModelResolvedOptions, IRawText} from 'vs/editor/common/editorCommon';
+import * as strings from 'vs/base/common/strings';
+import {guessIndentation} from 'vs/editor/common/model/indentationGuesser';
+
+export interface IStringStream {
+	onData(listener: (chunk:string)=>void): IDisposable;
+	onEnd(listener: ()=>void): IDisposable;
+}
+
+export class ModelBuilderResult {
+	rawText: IRawText;
+	hash: string;
+}
+
+class ModelLineBasedBuilder {
+
+	private hash: crypto.Hash;
+	private BOM: string;
+	private lines: string[];
+	private currLineIndex: number;
+
+	constructor() {
+		this.hash = crypto.createHash('sha1');
+		this.BOM = '';
+		this.lines = [];
+		this.currLineIndex = 0;
+	}
+
+	public acceptLines(lines:string[], linesLength:number): void {
+		if (this.currLineIndex === 0) {
+			// Remove the BOM (if present)
+			if (strings.startsWithUTF8BOM(lines[0])) {
+				this.BOM = strings.UTF8_BOM_CHARACTER;
+				lines[0] = lines[0].substr(1);
+			}
+		}
+
+		for (let i = 0; i < linesLength; i++) {
+			this.lines[this.currLineIndex++] = lines[i];
+			this.hash.update(lines[i] + '\n');
+		}
+	}
+
+	public finish(totalLength:number, carriageReturnCnt:number, opts:ITextModelCreationOptions): ModelBuilderResult {
+
+		let lineFeedCnt = this.lines.length - 1;
+		let EOL = '';
+		if (lineFeedCnt === 0) {
+			// This is an empty file or a file with precisely one line
+			EOL = (opts.defaultEOL === DefaultEndOfLine.LF ? '\n' : '\r\n');
+		} else if (carriageReturnCnt > lineFeedCnt / 2) {
+			// More than half of the file contains \r\n ending lines
+			EOL = '\r\n';
+		} else {
+			// At least one line more ends in \n
+			EOL = '\n';
+		}
+
+		let resolvedOpts: ITextModelResolvedOptions;
+		if (opts.detectIndentation) {
+			let guessedIndentation = guessIndentation(this.lines, opts.tabSize, opts.insertSpaces);
+			resolvedOpts = {
+				tabSize: guessedIndentation.tabSize,
+				insertSpaces: guessedIndentation.insertSpaces,
+				trimAutoWhitespace: opts.trimAutoWhitespace,
+				defaultEOL: opts.defaultEOL
+			};
+		} else {
+			resolvedOpts = {
+				tabSize: opts.tabSize,
+				insertSpaces: opts.insertSpaces,
+				trimAutoWhitespace: opts.trimAutoWhitespace,
+				defaultEOL: opts.defaultEOL
+			};
+		}
+
+		return {
+			rawText: {
+				BOM: this.BOM,
+				EOL: EOL,
+				lines: this.lines,
+				length: totalLength,
+				options: resolvedOpts
+			},
+			hash: this.hash.digest('hex')
+		};
+	}
+}
+
+export function computeHash(rawText:IRawText): string {
+	let hash = crypto.createHash('sha1');
+	for (let i = 0, len = rawText.lines.length; i < len; i++) {
+		hash.update(rawText.lines[i] + '\n');
+	}
+	return hash.digest('hex');
+}
+
+export class ModelBuilder {
+
+	private leftoverPrevChunk: string;
+	private leftoverEndsInCR: boolean;
+	private totalCRCount: number;
+	private lineBasedBuilder: ModelLineBasedBuilder;
+	private totalLength: number;
+
+	constructor() {
+		this.leftoverPrevChunk = '';
+		this.leftoverEndsInCR = false;
+		this.totalCRCount = 0;
+		this.lineBasedBuilder = new ModelLineBasedBuilder();
+		this.totalLength = 0;
+	}
+
+	private _updateCRCount(chunk:string): void {
+		// Count how many \r are present in chunk to determine the majority EOL sequence
+		let chunkCarriageReturnCnt = 0;
+		let lastCarriageReturnIndex = -1;
+		while ((lastCarriageReturnIndex = chunk.indexOf('\r', lastCarriageReturnIndex + 1)) !== -1) {
+			chunkCarriageReturnCnt++;
+		}
+		this.totalCRCount += chunkCarriageReturnCnt;
+	}
+
+	public acceptChunk(chunk:string): void {
+		if (chunk.length === 0) {
+			return;
+		}
+		this.totalLength += chunk.length;
+
+		this._updateCRCount(chunk);
+
+		// Avoid dealing with a chunk that ends in \r (push the \r to the next chunk)
+		if (this.leftoverEndsInCR) {
+			chunk = '\r' + chunk;
+		}
+		if (chunk.charCodeAt(chunk.length - 1) === 13 /*\r*/) {
+			this.leftoverEndsInCR = true;
+			chunk = chunk.substr(0, chunk.length - 1);
+		} else {
+			this.leftoverEndsInCR = false;
+		}
+
+		let lines = chunk.split(/\r\n|\r|\n/);
+
+		if (lines.length === 1) {
+			// no \r or \n encountered
+			this.leftoverPrevChunk += lines[0];
+			return;
+		}
+
+		lines[0] = this.leftoverPrevChunk + lines[0];
+		this.lineBasedBuilder.acceptLines(lines, lines.length - 1);
+		this.leftoverPrevChunk = lines[lines.length - 1];
+	}
+
+	public finish(opts:ITextModelCreationOptions): ModelBuilderResult {
+		let finalLines = [this.leftoverPrevChunk];
+		if (this.leftoverEndsInCR) {
+			finalLines.push('');
+		}
+		this.lineBasedBuilder.acceptLines(finalLines, finalLines.length);
+		return this.lineBasedBuilder.finish(this.totalLength, this.totalCRCount, opts);
+	}
+}
--- a/src/vs/editor/test/node/model/modelBuilder.test.ts
+++ b/src/vs/editor/test/node/model/modelBuilder.test.ts
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+'use strict';
+
+import * as assert from 'assert';
+import {ModelBuilder, computeHash} from 'vs/editor/node/model/modelBuilder';
+import {ITextModelCreationOptions, IRawText} from 'vs/editor/common/editorCommon';
+import {TextModel} from 'vs/editor/common/model/textModel';
+import * as strings from 'vs/base/common/strings';
+
+export function testModelBuilder(chunks:string[], opts:ITextModelCreationOptions = TextModel.DEFAULT_CREATION_OPTIONS): string {
+	let expectedRawText = TextModel.toRawText(chunks.join(''), opts);
+	let expectedHash = computeHash(expectedRawText);
+
+	let builder = new ModelBuilder();
+	for (let i = 0, len = chunks.length; i < len; i++) {
+		builder.acceptChunk(chunks[i]);
+	}
+	let actual = builder.finish(opts);
+
+	assert.deepEqual({
+		rawText: expectedRawText,
+		hash: expectedHash
+	}, actual);
+
+	return expectedHash;
+}
+
+function toRawText(lines:string[]): IRawText {
+	return {
+		BOM: '',
+		lines: lines,
+		EOL: '\n',
+		length: 0,
+		options: null
+	};
+}
+
+export function testDifferentHash(lines1:string[], lines2:string[]): void {
+	let hash1 = computeHash(toRawText(lines1));
+	let hash2 = computeHash(toRawText(lines2));
+	assert.notEqual(hash1, hash2);
+}
+
+suite('ModelBuilder', () => {
+
+	test('uses sha1', () => {
+		// These are the sha1s of the string + \n
+		assert.equal(computeHash(toRawText([''])), 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc');
+		assert.equal(computeHash(toRawText(['hello world'])), '22596363b3de40b06f981fb85d82312e8c0ed511');
+	});
+
+	test('no chunks', () => {
+		testModelBuilder([]);
+	});
+
+	test('single empty chunk', () => {
+		testModelBuilder(['']);
+	});
+
+	test('single line in one chunk', () => {
+		testModelBuilder(['Hello world']);
+	});
+
+	test('single line in multiple chunks', () => {
+		testModelBuilder(['Hello', ' ', 'world']);
+	});
+
+	test('two lines in single chunk', () => {
+		testModelBuilder(['Hello world\nHow are you?']);
+	});
+
+	test('two lines in multiple chunks 1', () => {
+		testModelBuilder(['Hello worl', 'd\nHow are you?']);
+	});
+
+	test('two lines in multiple chunks 2', () => {
+		testModelBuilder(['Hello worl', 'd' , '\n', 'H', 'ow are you?']);
+	});
+
+	test('two lines in multiple chunks 3', () => {
+		testModelBuilder(['Hello worl', 'd' , '\nHow are you?']);
+	});
+
+	test('multiple lines in single chunks', () => {
+		testModelBuilder(['Hello world\nHow are you?\nIs everything good today?\nDo you enjoy the weather?']);
+	});
+
+	test('multiple lines in multiple chunks 1', () => {
+		testModelBuilder(['Hello world\nHow are you', '?\nIs everything good today?\nDo you enjoy the weather?']);
+	});
+
+	test('multiple lines in multiple chunks 1', () => {
+		testModelBuilder(['Hello world', '\nHow are you', '?\nIs everything good today?', '\nDo you enjoy the weather?']);
+	});
+
+	test('multiple lines in multiple chunks 1', () => {
+		testModelBuilder(['Hello world\n', 'How are you', '?\nIs everything good today?', '\nDo you enjoy the weather?']);
+	});
+
+	test('carriage return detection (1 \r\n 2 \n)', () => {
+		testModelBuilder(['Hello world\r\n', 'How are you', '?\nIs everything good today?', '\nDo you enjoy the weather?']);
+	});
+
+	test('carriage return detection (2 \r\n 1 \n)', () => {
+		testModelBuilder(['Hello world\r\n', 'How are you', '?\r\nIs everything good today?', '\nDo you enjoy the weather?']);
+	});
+
+	test('carriage return detection (3 \r\n 0 \n)', () => {
+		testModelBuilder(['Hello world\r\n', 'How are you', '?\r\nIs everything good today?', '\r\nDo you enjoy the weather?']);
+	});
+
+	test('carriage return detection (isolated \r)', () => {
+		testModelBuilder(['Hello world', '\r', '\n', 'How are you', '?', '\r', '\n', 'Is everything good today?', '\r', '\n', 'Do you enjoy the weather?']);
+	});
+
+	test('BOM handling', () => {
+		testModelBuilder([strings.UTF8_BOM_CHARACTER + 'Hello world!']);
+	});
+});
--- a/src/vs/editor/test/node/model/modelBuilderAuto.test.ts
+++ b/src/vs/editor/test/node/model/modelBuilderAuto.test.ts
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+'use strict';
+
+import {testModelBuilder, testDifferentHash} from 'vs/editor/test/node/model/modelBuilder.test';
+
+const GENERATE_TESTS = false;
+
+suite('ModelBuilder Auto Tests', () => {
+
+	test('auto1', () => {
+		testModelBuilder(['sarjniow','\r','\nbpb','ofb','\njzldgxx','\r\nkzwfjysng']);
+	});
+
+	test('auto2', () => {
+		testModelBuilder(['i','yyernubi\r\niimgn\n','ut\r']);
+	});
+
+	test('auto3', () => {
+		testDifferentHash([''], ['', '', '']);
+	});
+});
+
+function getRandomInt(min: number, max: number): number {
+	return Math.floor(Math.random() * (max - min + 1)) + min;
+}
+
+function getRandomEOLSequence(): string {
+	let rnd = getRandomInt(1, 3);
+	if (rnd === 1) {
+		return '\n';
+	}
+	if (rnd === 2) {
+		return '\r';
+	}
+	if (rnd === 3) {
+		return '\r\n';
+	}
+}
+
+function getRandomString(minLength: number, maxLength: number): string {
+	let length = getRandomInt(minLength, maxLength);
+	let r = '';
+	for (let i = 0; i < length; i++) {
+		r += String.fromCharCode(getRandomInt('a'.charCodeAt(0), 'z'.charCodeAt(0)));
+	}
+	return r;
+}
+
+function generateRandomFile(): string {
+	let lineCount = getRandomInt(1, 10);
+	let mixedEOLSequence = getRandomInt(1, 2) === 1 ? true : false;
+	let fixedEOL = getRandomEOLSequence();
+	let lines: string[] = [];
+	for (let i = 0; i < lineCount; i++) {
+		if (i !== 0) {
+			if (mixedEOLSequence) {
+				lines.push(getRandomEOLSequence());
+			} else {
+				lines.push(fixedEOL);
+			}
+		}
+		lines.push(getRandomString(0, 10));
+
+	}
+	return lines.join('');
+}
+
+function generateRandomChunks(file:string): string[] {
+	let result:string[] = [];
+	let cnt = getRandomInt(1, 20);
+
+	let maxOffset = file.length;
+
+	while (cnt > 0 && maxOffset > 0) {
+
+		let offset = getRandomInt(0, maxOffset);
+		result.unshift(file.substring(offset, maxOffset));
+		// let length = getRandomInt(0, maxOffset - offset);
+		// let text = generateFile(true);
+
+		// result.push({
+		// 	offset: offset,
+		// 	length: length,
+		// 	text: text
+		// });
+
+		maxOffset = offset;
+		cnt--;
+	}
+	if (maxOffset !== 0) {
+		result.unshift(file.substring(0, maxOffset));
+	}
+	return result;
+}
+
+let HASH_TO_CONTENT: {[hash:string]:string;} = {};
+
+function testRandomFile(file:string): boolean {
+	let tests = getRandomInt(5, 10);
+	for (let i = 0; i < tests; i++) {
+		let chunks = generateRandomChunks(file);
+		try {
+			let hash = testModelBuilder(chunks);
+			let logicalContent = JSON.stringify(file.split(/\r\n|\r|\n/));
+			if (HASH_TO_CONTENT.hasOwnProperty(hash)) {
+				let prevLogicalContent = HASH_TO_CONTENT[hash];
+				if (prevLogicalContent !== logicalContent) {
+					console.log('HASH COLLISION: ');
+					console.log(prevLogicalContent);
+					console.log(logicalContent);
+					return false;
+				}
+			} else {
+				HASH_TO_CONTENT[hash] = logicalContent;
+			}
+		} catch(err) {
+			console.log(err);
+			console.log(JSON.stringify(chunks));
+			return false;
+		}
+	}
+	return true;
+}
+
+if (GENERATE_TESTS) {
+	let number = 1;
+	while(true) {
+		console.log('------BEGIN NEW TEST: ' + number);
+
+		if (!testRandomFile(generateRandomFile())) {
+			break;
+		}
+
+		console.log('------END NEW TEST: ' + (number++));
+	}
+}