提交 9d40f0c6 编写于 作者: B Benjamin Pasero

tweak scoring to prevent match scattering

上级 5b4c9d25
......@@ -14,180 +14,218 @@ import { stripWildcards } from 'vs/base/common/strings';
export type Score = [number /* score */, number[] /* match positions */];
export type ScorerCache = { [key: string]: IItemScore };
const NO_SCORE: Score = [0, []];
const NO_MATCH = 0;
const NO_SCORE: Score = [NO_MATCH, []];
export function _doScore(target: string, query: string, queryLower: string, fuzzy: boolean): Score {
// const DEBUG = false;
// const DEBUG_MATRIX = false;
export function score(target: string, query: string, queryLower: string, fuzzy: boolean): Score {
if (!target || !query) {
return NO_SCORE; // return early if target or query are undefined
}
if (target.length < query.length) {
const targetLength = target.length;
const queryLength = query.length;
if (targetLength < queryLength) {
return NO_SCORE; // impossible for query to be contained in target
}
// console.group(`Target: ${target}, Query: ${query}`);
// if (DEBUG) {
// console.group(`Target: ${target}, Query: ${query}`);
// }
const queryLen = query.length;
const targetLower = target.toLowerCase();
let res = NO_SCORE;
// When not searching fuzzy, we require the query to be contained fully
// in the target string. We set the offset to search from to that location.
// in the target string contiguously.
if (!fuzzy) {
const indexOfQueryInTarget = targetLower.indexOf(queryLower);
if (indexOfQueryInTarget === -1) {
// console.log(`Characters not matching consecutively ${queryLower} within ${targetLower}`);
// if (DEBUG) {
// console.log(`Characters not matching consecutively ${queryLower} within ${targetLower}`);
// }
return NO_SCORE;
}
res = _doScoreFromOffset(target, query, targetLower, queryLower, queryLen, indexOfQueryInTarget);
}
// When searching fuzzy we run the scorer for each location of the first query
// character so that we can produce better results in case the pattern matches
// multiple times on the target (prevent scattering of matching positions).
// When searching fuzzy, we require the query to be contained fully
// in the target string as separate substrings
else {
const queryFirstCharacter = queryLower[0];
let offset = 0;
while ((offset = targetLower.indexOf(queryFirstCharacter, offset)) !== -1) {
const scoreFromOffset = _doScoreFromOffset(target, query, targetLower, queryLower, queryLen, offset);
if (isBetterScore(res, scoreFromOffset)) {
res = scoreFromOffset;
let targetOffset = 0;
for (let queryIndex = 0; queryIndex < queryLength; queryIndex++) {
targetOffset = targetLower.indexOf(queryLower[queryIndex], targetOffset);
if (targetOffset === -1) {
return NO_SCORE;
}
offset++;
}
}
// console.log(`%cFinal Score: ${score}`, 'font-weight: bold');
// console.groupEnd();
const res = doScore(query, queryLower, queryLength, target, targetLower, targetLength);
// if (DEBUG) {
// console.log(`%cFinal Score: ${res[0]}`, 'font-weight: bold');
// console.groupEnd();
// }
return res;
}
function isBetterScore(score: Score, candidate: Score): boolean {
if (candidate[0] > score[0]) {
return true; // candidate has higher score
}
function doScore(query: string, queryLower: string, queryLength: number, target: string, targetLower: string, targetLength: number): [number, number[]] {
const scores = [];
const matches = [];
//
// Build Scorer Matrix
// The matrix is composed of query q and target t. For each index we score
// q[i] with t[i] and compare that with the previous score. If the score is
// equal or larger, we keep the match. In addition to the score, we also keep
// the length of the consecutive matches to use as boost for the score.
//
// t a r g e t
// q
// u
// e
// r
// y
//
for (let queryIndex = 0; queryIndex < queryLength; queryIndex++) {
for (let targetIndex = 0; targetIndex < targetLength; targetIndex++) {
const currentIndex = queryIndex * targetLength + targetIndex;
const leftIndex = currentIndex - 1;
const diagIndex = (queryIndex - 1) * targetLength + targetIndex - 1;
const leftScore = targetIndex > 0 ? scores[leftIndex] : 0;
const diagScore = queryIndex > 0 && targetIndex > 0 ? scores[diagIndex] : 0;
const matchesSequenceLength = queryIndex > 0 && targetIndex > 0 ? matches[diagIndex] : 0;
const score = computeCharScore(query, queryLower, queryIndex, target, targetLower, targetIndex, matchesSequenceLength);
// We have a score and its equal or larger than the left score
// Match: sequence continues growing from previous diag value
// Score: increases by diag score value
if (score && diagScore + score >= leftScore) {
matches[currentIndex] = matchesSequenceLength + 1;
scores[currentIndex] = diagScore + score;
}
if (score[0] > candidate[0]) {
return false; // candidate has lower score
// We either have no score or the score is lower than the left score
// Match: reset to 0
// Score: pick up from left hand side
else {
matches[currentIndex] = NO_MATCH;
scores[currentIndex] = leftScore;
}
}
}
// Score is the same, check by match compactness
const matchStart = score[1][0];
const matchEnd = score[1][score[1].length - 1];
const matchLength = matchEnd - matchStart;
const candidateMatchStart = candidate[1][0];
const candidateMatchEnd = candidate[1][candidate[1].length - 1];
const candidateMatchLength = candidateMatchEnd - candidateMatchStart;
if (candidateMatchLength < matchLength) {
return true; // candidate has more compact matches
// Restore Positions (starting from bottom right of matrix)
const positions = [];
let queryIndex = queryLength - 1;
let targetIndex = targetLength - 1;
while (queryIndex >= 0 && targetIndex >= 0) {
const currentIndex = queryIndex * targetLength + targetIndex;
const match = matches[currentIndex];
if (match === NO_MATCH) {
targetIndex--; // go left
} else {
positions.push(targetIndex);
// go up and left
queryIndex--;
targetIndex--;
}
}
return false;
// Print matrix
// if (DEBUG_MATRIX) {
// printMatrix(query, target, matches, scores);
// }
return [scores[queryLength * targetLength - 1], positions.reverse()];
}
// Based on material from:
/*!
BEGIN THIRD PARTY
*/
/*!
* string_score.js: String Scoring Algorithm 0.1.22
*
* http://joshaven.com/string_score
* https://github.com/joshaven/string_score
*
* Copyright (C) 2009-2014 Joshaven Potter <yourtech@gmail.com>
* Special thanks to all of the contributors listed here https://github.com/joshaven/string_score
* MIT License: http://opensource.org/licenses/MIT
*
* Date: Tue Mar 1 2011
* Updated: Tue Mar 10 2015
*/
function _doScoreFromOffset(target: string, query: string, targetLower: string, queryLower: string, queryLen: number, offset: number): Score {
const matchingPositions: number[] = [];
let targetIndex = offset;
let queryIndex = 0;
function computeCharScore(query: string, queryLower: string, queryIndex: number, target: string, targetLower: string, targetIndex: number, matchesSequenceLength: number): number {
let score = 0;
while (queryIndex < queryLen) {
// Check for query character being contained in target
const indexOfQueryInTarget = targetLower.indexOf(queryLower[queryIndex], targetIndex);
if (indexOfQueryInTarget < 0) {
// console.log(`Character not part of target ${query[index]}`);
score = 0;
break;
}
// Fill into positions array
matchingPositions.push(indexOfQueryInTarget);
// Character match bonus
score += 1;
// console.groupCollapsed(`%cCharacter match bonus: +1 (char: ${query[index]} at index ${indexOf}, total score: ${score})`, 'font-weight: normal');
if (queryLower[queryIndex] !== targetLower[targetIndex]) {
return score; // no match of characters
}
// Consecutive match bonus
if (targetIndex === indexOfQueryInTarget && queryIndex > 0) {
score += 5;
// Character match bonus
score += 1;
// console.log('Consecutive match bonus: +5');
}
// if (DEBUG) {
// console.groupCollapsed(`%cCharacter match bonus: +1 (char: ${queryLower[queryIndex]} at index ${targetIndex}, total score: ${score})`, 'font-weight: normal');
// }
// Same case bonus
if (target[indexOfQueryInTarget] === query[queryIndex]) {
score += 1;
// Consecutive match bonus
if (matchesSequenceLength > 0) {
score += (matchesSequenceLength * 5);
// console.log('Same case bonus: +1');
}
// if (DEBUG) {
// console.log('Consecutive match bonus: ' + (matchesSequenceLength * 5));
// }
}
// Start of word bonus
if (indexOfQueryInTarget === 0) {
score += 8;
// Same case bonus
if (query[queryIndex] === target[targetIndex]) {
score += 1;
// console.log('Start of word bonus: +8');
}
// if (DEBUG) {
// console.log('Same case bonus: +1');
// }
}
// After separator bonus
else if (isSeparatorAtPos(target, indexOfQueryInTarget - 1)) {
score += 7;
// Start of word bonus
if (targetIndex === 0) {
score += 8;
// console.log('After separtor bonus: +7');
}
// if (DEBUG) {
// console.log('Start of word bonus: +8');
// }
}
// Inside word upper case bonus
else if (isUpper(target.charCodeAt(indexOfQueryInTarget))) {
score += 1;
// After separator bonus
else if (isSeparatorAtPos(target, targetIndex - 1)) {
score += 4;
// console.log('Inside word upper case bonus: +1');
}
// if (DEBUG) {
// console.log('After separtor bonus: +4');
// }
}
// console.groupEnd();
// Inside word upper case bonus
else if (isUpper(target.charCodeAt(targetIndex))) {
score += 1;
targetIndex = indexOfQueryInTarget + 1;
queryIndex++;
// if (DEBUG) {
// console.log('Inside word upper case bonus: +1');
// }
}
const res: Score = (score > 0) ? [score, matchingPositions] : NO_SCORE;
// console.log(`%cFinal Score: ${score}`, 'font-weight: bold');
// console.groupEnd();
// if (DEBUG) {
// console.groupEnd();
// }
return res;
return score;
}
/*!
END THIRD PARTY
*/
// function printMatrix(query: string, target: string, matches: number[], scores: number[]): void {
// console.log('\t' + target.split('').join('\t'));
// for (let queryIndex = 0; queryIndex < query.length; queryIndex++) {
// let line = query[queryIndex] + '\t';
// for (let targetIndex = 0; targetIndex < target.length; targetIndex++) {
// const currentIndex = queryIndex * target.length + targetIndex;
// line = line + 'M' + matches[currentIndex] + '/' + 'S' + scores[currentIndex] + '\t';
// }
// console.log(line);
// }
// }
/**
* Scoring on structural items that have a label and optional description.
......@@ -315,7 +353,7 @@ function doScoreItem<T>(label: string, description: string, path: string, query:
}
// 4.) prefer scores on the label if any
const [labelScore, labelPositions] = _doScore(label, query.value, query.lowercase, fuzzy);
const [labelScore, labelPositions] = score(label, query.value, query.lowercase, fuzzy);
if (labelScore) {
return { score: labelScore + LABEL_SCORE_THRESHOLD, labelMatch: createMatches(labelPositions) };
}
......@@ -331,7 +369,7 @@ function doScoreItem<T>(label: string, description: string, path: string, query:
const descriptionPrefixLength = descriptionPrefix.length;
const descriptionAndLabel = `${descriptionPrefix}${label}`;
const [labelDescriptionScore, labelDescriptionPositions] = _doScore(descriptionAndLabel, query.value, query.lowercase, fuzzy);
const [labelDescriptionScore, labelDescriptionPositions] = score(descriptionAndLabel, query.value, query.lowercase, fuzzy);
if (labelDescriptionScore) {
const labelDescriptionMatches = createMatches(labelDescriptionPositions);
const labelMatch: IMatch[] = [];
......
......@@ -44,7 +44,7 @@ class NullAccessorClass implements scorer.IItemAccessor<URI> {
}
function _doScore(target: string, query: string, fuzzy: boolean): scorer.Score {
return scorer._doScore(target, query, query.toLowerCase(), fuzzy);
return scorer.score(target, query, query.toLowerCase(), fuzzy);
}
function scoreItem<T>(item: T, query: string, fuzzy: boolean, accessor: scorer.IItemAccessor<T>, cache: scorer.ScorerCache): scorer.IItemScore {
......@@ -209,6 +209,18 @@ suite('Quick Open Scorer', () => {
assert.equal(pathRes.descriptionMatch[0].end, 26);
});
test('scoreItem - avoid match scattering (bug #36119)', function () {
const resource = URI.file('projects/ui/cula/ats/target.mk');;
const pathRes = scoreItem(resource, 'tcltarget.mk', true, ResourceAccessor, cache);
assert.ok(pathRes.score);
assert.ok(pathRes.descriptionMatch);
assert.ok(pathRes.labelMatch);
assert.equal(pathRes.labelMatch.length, 1);
assert.equal(pathRes.labelMatch[0].start, 0);
assert.equal(pathRes.labelMatch[0].end, 9);
});
test('scoreItem - prefers more compact matches', function () {
const resource = URI.file('/1a111d1/11a1d1/something.txt');
......@@ -541,13 +553,19 @@ suite('Quick Open Scorer', () => {
let res = [resourceA, resourceB, resourceC, resourceD].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceC);
res = [resourceC, resourceB, resourceA, resourceD].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceC);
query = isWindows ? 'un1\\index.js' : 'un1/index.js';
res = [resourceA, resourceB, resourceC, resourceD].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
res = [resourceC, resourceB, resourceA, resourceD].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
});
test('compareFilesByScore - avoid match scattering (bug #21019)', function () {
test('compareFilesByScore - avoid match scattering (bug #21019 1.)', function () {
const resourceA = URI.file('app/containers/Services/NetworkData/ServiceDetails/ServiceLoad/index.js');
const resourceB = URI.file('app/containers/Services/NetworkData/ServiceDetails/ServiceDistribution/index.js');
const resourceC = URI.file('app/containers/Services/NetworkData/ServiceDetailTabs/ServiceTabs/StatVideo/index.js');
......@@ -556,6 +574,22 @@ suite('Quick Open Scorer', () => {
let res = [resourceA, resourceB, resourceC].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceC);
res = [resourceC, resourceB, resourceA].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceC);
});
test('compareFilesByScore - avoid match scattering (bug #21019 2.)', function () {
const resourceA = URI.file('src/build-helper/store/redux.ts');
const resourceB = URI.file('src/repository/store/redux.ts');
let query = 'reproreduxts';
let res = [resourceA, resourceB].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
res = [resourceB, resourceA].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
});
test('compareFilesByScore - avoid match scattering (bug #26649)', function () {
......@@ -567,6 +601,9 @@ suite('Quick Open Scorer', () => {
let res = [resourceA, resourceB, resourceC].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceC);
res = [resourceC, resourceB, resourceA].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceC);
});
test('compareFilesByScore - avoid match scattering (bug #33247)', function () {
......@@ -577,6 +614,9 @@ suite('Quick Open Scorer', () => {
let res = [resourceA, resourceB].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
res = [resourceB, resourceA].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
});
test('compareFilesByScore - avoid match scattering (bug #33247 comment)', function () {
......@@ -587,6 +627,40 @@ suite('Quick Open Scorer', () => {
let res = [resourceA, resourceB].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
res = [resourceB, resourceA].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
});
test('compareFilesByScore - avoid match scattering (bug #36166)', function () {
const resourceA = URI.file('django/contrib/sites/locale/ga/LC_MESSAGES/django.mo');
const resourceB = URI.file('django/core/signals.py');
let query = 'djancosig';
let res = [resourceA, resourceB].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
res = [resourceB, resourceA].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
});
test('compareFilesByScore - avoid match scattering (bug #32918)', function () {
const resourceA = URI.file('adsys/protected/config.php');
const resourceB = URI.file('adsys/protected/framework/smarty/sysplugins/smarty_internal_config.php');
const resourceC = URI.file('duowanVideo/wap/protected/config.php');
let query = 'protectedconfig.php';
let res = [resourceA, resourceB, resourceC].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceA);
assert.equal(res[1], resourceC);
assert.equal(res[2], resourceB);
res = [resourceC, resourceB, resourceA].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceA);
assert.equal(res[1], resourceC);
assert.equal(res[2], resourceB);
});
test('compareFilesByScore - prefer shorter hit (bug #20546)', function () {
......@@ -597,6 +671,23 @@ suite('Quick Open Scorer', () => {
let res = [resourceA, resourceB].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
res = [resourceB, resourceA].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
});
test('compareFilesByScore - avoid match scattering (bug #12095)', function () {
const resourceA = URI.file('src/vs/workbench/parts/files/common/explorerViewModel.ts');
const resourceB = URI.file('src/vs/workbench/parts/files/browser/views/explorerView.ts');
const resourceC = URI.file('src/vs/workbench/parts/files/browser/views/explorerViewer.ts');
let query = 'filesexplorerview.ts';
let res = [resourceA, resourceB, resourceC].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
res = [resourceA, resourceC, resourceB].sort((r1, r2) => compareItemsByScore(r1, r2, query, true, ResourceAccessor, cache));
assert.equal(res[0], resourceB);
});
test('prepareSearchForScoring', function () {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册