Add line by line parser for merge conflicts.

Prior implementation in mergeConflictParserLegcacy which does a one shot regular expression.

Add line by line parser for merge conflicts.
Prior implementation in mergeConflictParserLegcacy which does a one shot regular expression.
76b6b177 · Phil Price · 59fbe319 · 76b6b177 · 76b6b177
2 changed file
--- a/extensions/merge-conflict/src/mergeConflictParser.ts
+++ b/extensions/merge-conflict/src/mergeConflictParser.ts
@@ -5,125 +5,106 @@
 import * as vscode from 'vscode';
 import * as interfaces from './interfaces';
 import { DocumentMergeConflict } from './documentMergeConflict';
-import * as vm from 'vm';
+
+const startMarker = '<<<<<<<';
+const splitterMarker = '=======';
+const endMarker = '>>>>>>>';
+
+interface IPartialMergeConflictDescriptor {
+	currentHeader: vscode.Range;
+	splitter?: vscode.Range;
+	incomingFooter?: vscode.Range;
+}

 export class MergeConflictParser {

 	static scanDocument(document: vscode.TextDocument): interfaces.IDocumentMergeConflict[] {

-		// Conflict matching regex, comments are in the format of "description - [group index] group name"
-		// Premise is: Match the current change (<<<<<<), match anything up to the splitter (======) then
-		// match anything up to the incoming change (>>>>>>), this leaves some oddities with newlines not being
-		// pulled into the "body" of each change, DocumentMergeConflict.applyEdit will deal with these cases
-		// and append newlines when needed
+		// Scan each line in the document, we already know there is atleast a <<<<<<< and
+		// >>>>>> marker within the document, we need to group these into conflict ranges.

-		const conflictMatcher = new RegExp([
-			/(^<<<<<<<\s(.+)\r?\n)/,             // "Current" conflict header - [1] entire line, [2] name
-			/([\s\S]*?)/,                        // "Current" conflict body - [3] body text
-			/(^=======\r?\n)/,                   // Splitter - [4] entire line
-			/([\s\S]*?)/,                        // Incoming conflict body - [5]
-			/(^>>>>>>>\s(.+)\r?\n)/              // Incoming conflict header - [6] entire line, [7] name
-		].map(r => r.source).join(''), 'mg');
+		let currentConflict: IPartialMergeConflictDescriptor | null = null;

-		const offsetGroups = [1, 3, 4, 5, 6]; // Skip inner matches when calculating length
+		const conflictDescriptors: interfaces.IDocumentMergeConflictDescriptor[] = [];

-		let text = document.getText();
-		let sandboxScope = {
-			result: [],
-			conflictMatcher,
-			text: text
-		};
-		const context = vm.createContext(sandboxScope);
-		const script = new vm.Script(`
-			let match;
-			while (match = conflictMatcher.exec(text)) {
-				// Ensure we don't get stuck in an infinite loop
-				if (match.index === conflictMatcher.lastIndex) {
-					conflictMatcher.lastIndex++;
+		for (let i = 0; i < document.lineCount; i++) {
+			const line = document.lineAt(i);
+
+			if (line.text.startsWith(startMarker)) {
+				if (currentConflict !== null) {
+					// Error, we should not see a startMarker before we've seen an endMarker
+					currentConflict = null;
+					break;
 				}

-				result.push(match);
-			}`);
+				currentConflict = {
+					currentHeader: line.range
+				};
+			}
+			else if (line.text.startsWith(splitterMarker)) {

-		try {
-			// If the regex takes longer than 1s consider it dead
-			script.runInContext(context, { timeout: 1000 });
-		}
-		catch (ex) {
-			return [];
-		}
+				if (currentConflict === null) {
+					continue; // Ignore
+				}

-		return sandboxScope.result.map(match => new DocumentMergeConflict(document, MergeConflictParser.matchesToDescriptor(document, match, offsetGroups)));
-	}
+				currentConflict.splitter = line.range;
+			}
+			else if (line.text.startsWith(endMarker)) {
+				if (currentConflict === null) {
+					continue; // Ignore
+				}

-	static containsConflict(document: vscode.TextDocument): boolean {
-		if (!document) {
-			return false;
-		}
+				currentConflict.incomingFooter = line.range;

-		// TODO: Ask source control if the file contains a conflict
-		let text = document.getText();
-		return text.includes('<<<<<<<') && text.includes('>>>>>>>');
-	}
+				let completeDescriptor = MergeConflictParser.completePartialMergeDescriptor(currentConflict);

-	static matchesToDescriptor(document: vscode.TextDocument, match: RegExpExecArray, offsets?: number[]): interfaces.IDocumentMergeConflictDescriptor {
+				if (completeDescriptor !== null) {
+					conflictDescriptors.push(completeDescriptor);
+				}

-		var item: interfaces.IDocumentMergeConflictDescriptor = {
-			range: new vscode.Range(document.positionAt(match.index), document.positionAt(match.index + match[0].length)),
-			current: {
-				name: match[2],
-				header: this.getMatchPositions(document, match, 1, offsets),
-				content: this.getMatchPositions(document, match, 3, offsets),
-			},
-			splitter: this.getMatchPositions(document, match, 4, offsets),
-			incoming: {
-				name: match[9],
-				header: this.getMatchPositions(document, match, 6, offsets),
-				content: this.getMatchPositions(document, match, 5, offsets),
+				currentConflict = null;
 			}
-		};
-
-		return item;
-	}
-
-
-	static getMatchPositions(document: vscode.TextDocument, match: RegExpExecArray, groupIndex: number, offsetGroups?: number[]): vscode.Range {
-		// Javascript doesnt give of offsets within the match, we need to calculate these
-		// based of the prior groups, skipping nested matches (yuck).
-		if (!offsetGroups) {
-			offsetGroups = match.map((i, idx) => idx);
 		}

-		let start = match.index;
-
-		for (var i = 0; i < offsetGroups.length; i++) {
-			let value = offsetGroups[i];
+		return conflictDescriptors
+			.filter(Boolean)
+			.map(descriptor => new DocumentMergeConflict(document, descriptor));
+	}

-			if (value >= groupIndex) {
-				break;
-			}
+	private static completePartialMergeDescriptor(partial: IPartialMergeConflictDescriptor): interfaces.IDocumentMergeConflictDescriptor | null {
+		// Assume that descriptor.current.header, descriptor.incoming.header and descriptor.spliiter
+		// have valid ranges, fill in content and total ranges from these parts.

-			start += match[value] !== undefined ? match[value].length : 0;
+		if (!partial.currentHeader || !partial.splitter || !partial.incomingFooter) {
+			return null;
 		}

-		const groupMatch = match[groupIndex];
-		let targetMatchLength = groupMatch !== undefined ? groupMatch.length : -1;
-		let end = (start + targetMatchLength);
-
-		if (groupMatch !== undefined) {
-			// Move the end up if it's capped by a trailing \r\n, this is so regions don't expand into
-			// the line below, and can be "pulled down" by editing the line below
-			if (match[groupIndex].lastIndexOf('\n') === targetMatchLength - 1) {
-				end--;
+		return {
+			current: {
+				header: partial.currentHeader,
+				// Current content is range between header and splitter
+				content: new vscode.Range(partial.currentHeader.end, partial.splitter.start),
+				name: ''
+			},
+			splitter: partial.splitter,
+			incoming: {
+				header: partial.incomingFooter,
+				// Incoming content is range between splitter and footer
+				content: new vscode.Range(partial.splitter.end, partial.incomingFooter.end),
+				name: ''
+			},
+			// Entire range is between current header start and incoming header end
+			range: new vscode.Range(partial.currentHeader.start, partial.incomingFooter.end)
+		};
+	}

-				// .. for windows encodings of new lines
-				if (match[groupIndex].lastIndexOf('\r') === targetMatchLength - 2) {
-					end--;
-				}
-			}
+	static containsConflict(document: vscode.TextDocument): boolean {
+		if (!document) {
+			return false;
 		}

-		return new vscode.Range(document.positionAt(start), document.positionAt(end));
+		// TODO: Ask source control if the file contains a conflict
+		let text = document.getText();
+		return text.includes('<<<<<<<') && text.includes('>>>>>>>');
 	}
-
 }
--- a/extensions/merge-conflict/src/mergeConflictParserLegacy.ts
+++ b/extensions/merge-conflict/src/mergeConflictParserLegacy.ts
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+import * as vscode from 'vscode';
+import * as interfaces from './interfaces';
+import { DocumentMergeConflict } from './documentMergeConflict';
+import * as vm from 'vm';
+
+export class MergeConflictParser {
+
+	static scanDocument(document: vscode.TextDocument): interfaces.IDocumentMergeConflict[] {
+
+		// Conflict matching regex, comments are in the format of "description - [group index] group name"
+		// Premise is: Match the current change (<<<<<<), match anything up to the splitter (======) then
+		// match anything up to the incoming change (>>>>>>), this leaves some oddities with newlines not being
+		// pulled into the "body" of each change, DocumentMergeConflict.applyEdit will deal with these cases
+		// and append newlines when needed
+
+		const conflictMatcher = new RegExp([
+			/(^<<<<<<<\s(.+)\r?\n)/,             // "Current" conflict header - [1] entire line, [2] name
+			/([\s\S]*?)/,                        // "Current" conflict body - [3] body text
+			/(^=======\r?\n)/,                   // Splitter - [4] entire line
+			/([\s\S]*?)/,                        // Incoming conflict body - [5]
+			/(^>>>>>>>\s(.+)\r?\n)/              // Incoming conflict header - [6] entire line, [7] name
+		].map(r => r.source).join(''), 'mg');
+
+		const offsetGroups = [1, 3, 4, 5, 6]; // Skip inner matches when calculating length
+
+		let text = document.getText();
+		let sandboxScope = {
+			result: [],
+			conflictMatcher,
+			text: text
+		};
+		const context = vm.createContext(sandboxScope);
+		const script = new vm.Script(`
+			let match;
+			while (match = conflictMatcher.exec(text)) {
+				// Ensure we don't get stuck in an infinite loop
+				if (match.index === conflictMatcher.lastIndex) {
+					conflictMatcher.lastIndex++;
+				}
+
+				result.push(match);
+			}`);
+
+		try {
+			// If the regex takes longer than 1s consider it dead
+			script.runInContext(context, { timeout: 1000 });
+		}
+		catch (ex) {
+			return [];
+		}
+
+		return sandboxScope.result.map(match => new DocumentMergeConflict(document, MergeConflictParser.matchesToDescriptor(document, match, offsetGroups)));
+	}
+
+	static containsConflict(document: vscode.TextDocument): boolean {
+		if (!document) {
+			return false;
+		}
+
+		// TODO: Ask source control if the file contains a conflict
+		let text = document.getText();
+		return text.includes('<<<<<<<') && text.includes('>>>>>>>');
+	}
+
+	static matchesToDescriptor(document: vscode.TextDocument, match: RegExpExecArray, offsets?: number[]): interfaces.IDocumentMergeConflictDescriptor {
+
+		var item: interfaces.IDocumentMergeConflictDescriptor = {
+			range: new vscode.Range(document.positionAt(match.index), document.positionAt(match.index + match[0].length)),
+			current: {
+				name: match[2],
+				header: this.getMatchPositions(document, match, 1, offsets),
+				content: this.getMatchPositions(document, match, 3, offsets),
+			},
+			splitter: this.getMatchPositions(document, match, 4, offsets),
+			incoming: {
+				name: match[9],
+				header: this.getMatchPositions(document, match, 6, offsets),
+				content: this.getMatchPositions(document, match, 5, offsets),
+			}
+		};
+
+		return item;
+	}
+
+
+	static getMatchPositions(document: vscode.TextDocument, match: RegExpExecArray, groupIndex: number, offsetGroups?: number[]): vscode.Range {
+		// Javascript doesnt give of offsets within the match, we need to calculate these
+		// based of the prior groups, skipping nested matches (yuck).
+		if (!offsetGroups) {
+			offsetGroups = match.map((i, idx) => idx);
+		}
+
+		let start = match.index;
+
+		for (var i = 0; i < offsetGroups.length; i++) {
+			let value = offsetGroups[i];
+
+			if (value >= groupIndex) {
+				break;
+			}
+
+			start += match[value] !== undefined ? match[value].length : 0;
+		}
+
+		const groupMatch = match[groupIndex];
+		let targetMatchLength = groupMatch !== undefined ? groupMatch.length : -1;
+		let end = (start + targetMatchLength);
+
+		if (groupMatch !== undefined) {
+			// Move the end up if it's capped by a trailing \r\n, this is so regions don't expand into
+			// the line below, and can be "pulled down" by editing the line below
+			if (match[groupIndex].lastIndexOf('\n') === targetMatchLength - 1) {
+				end--;
+
+				// .. for windows encodings of new lines
+				if (match[groupIndex].lastIndexOf('\r') === targetMatchLength - 2) {
+					end--;
+				}
+			}
+		}
+
+		return new vscode.Range(document.positionAt(start), document.positionAt(end));
+	}
+
+}