提交 4b00bed1 编写于 作者: D Daniel Kelling

Add support for Unicode-aware regular expressions in Monaco Monarch language definitions

上级 a1ed3861
......@@ -24,6 +24,7 @@ export interface ILexerMin {
languageId: string;
noThrow: boolean;
ignoreCase: boolean;
unicode: boolean;
usesEmbedded: boolean;
defaultToken: string;
stateNames: { [stateName: string]: any; };
......@@ -34,6 +35,7 @@ export interface ILexer extends ILexerMin {
maxStack: number;
start: string | null;
ignoreCase: boolean;
unicode: boolean;
tokenPostfix: string;
tokenizer: { [stateName: string]: IRule[]; };
......
......@@ -79,7 +79,7 @@ function createKeywordMatcher(arr: string[], caseInsensitive: boolean = false):
// Lexer helpers
/**
* Compiles a regular expression string, adding the 'i' flag if 'ignoreCase' is set.
* Compiles a regular expression string, adding the 'i' flag if 'ignoreCase' is set, and the 'u' flag if 'unicode' is set.
* Also replaces @\w+ or sequences with the content of the specified attribute
*/
function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
......@@ -103,7 +103,8 @@ function compileRegExp(lexer: monarchCommon.ILexerMin, str: string): RegExp {
});
}
return new RegExp(str, (lexer.ignoreCase ? 'i' : ''));
let flags = (lexer.ignoreCase ? 'i' : '') + (lexer.unicode ? 'u' : '');
return new RegExp(str, flags);
}
/**
......@@ -400,6 +401,7 @@ export function compile(languageId: string, json: IMonarchLanguage): monarchComm
// Set standard fields: be defensive about types
lexer.start = (typeof json.start === 'string' ? json.start : null);
lexer.ignoreCase = bool(json.ignoreCase, false);
lexer.unicode = bool(json.unicode, false);
lexer.tokenPostfix = string(json.tokenPostfix, '.' + lexer.languageId);
lexer.defaultToken = string(json.defaultToken, 'source');
......@@ -410,6 +412,7 @@ export function compile(languageId: string, json: IMonarchLanguage): monarchComm
let lexerMin: monarchCommon.ILexerMin = <any>json;
lexerMin.languageId = languageId;
lexerMin.ignoreCase = lexer.ignoreCase;
lexerMin.unicode = lexer.unicode;
lexerMin.noThrow = lexer.noThrow;
lexerMin.usesEmbedded = lexer.usesEmbedded;
lexerMin.stateNames = json.tokenizer;
......
......@@ -497,7 +497,8 @@ export class MonarchTokenizer implements modes.ITokenizationSupport {
let regex = rule.regex;
let regexSource = rule.regex.source;
if (regexSource.substr(0, 4) === '^(?:' && regexSource.substr(regexSource.length - 1, 1) === ')') {
regex = new RegExp(regexSource.substr(4, regexSource.length - 5), regex.ignoreCase ? 'i' : '');
let flags = (regex.ignoreCase ? 'i' : '') + (regex.unicode ? 'u' : '');
regex = new RegExp(regexSource.substr(4, regexSource.length - 5), flags);
}
let result = line.search(regex);
......
......@@ -21,6 +21,10 @@ export interface IMonarchLanguage {
* is the language case insensitive?
*/
ignoreCase?: boolean;
/**
* is the language unicode-aware? (i.e., /\u{1D306}/)
*/
unicode?: boolean;
/**
* if no match in the tokenizer assign this token class (default 'source')
*/
......
......@@ -6286,6 +6286,10 @@ declare namespace monaco.languages {
* is the language case insensitive?
*/
ignoreCase?: boolean;
/**
* is the language unicode-aware? (i.e., /\u{1D306}/)
*/
unicode?: boolean;
/**
* if no match in the tokenizer assign this token class (default 'source')
*/
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册