提交 0222e024 编写于 作者: A Andrew Gallant

Fixes a bug with --smart-case.

This was a subtle bug, but the big picture was that the smart case
information wasn't being carried through to the literal extraction in
some cases. When this happened, it was possible to get back an incomplete
set of literals, which would therefore miss some valid matches.

The fix to this is to actually parse the regex and determine whether
smart case applies before doing anything else. It's a little extra work,
but parsing is pretty fast.

Fixes #199
上级 5bd0edbb
......@@ -9,7 +9,7 @@ principled.
*/
use std::cmp;
use regex::bytes::Regex;
use regex::bytes::RegexBuilder;
use syntax::{
Expr, Literals, Lit,
ByteClass, ByteRange, CharClass, ClassRange, Repeater,
......@@ -33,7 +33,7 @@ impl LiteralSets {
}
}
pub fn to_regex(&self) -> Option<Regex> {
pub fn to_regex_builder(&self) -> Option<RegexBuilder> {
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
debug!("literal prefixes detected: {:?}", self.prefixes);
// When this is true, the regex engine will do a literal scan.
......@@ -79,14 +79,12 @@ impl LiteralSets {
debug!("required literals found: {:?}", req_lits);
let alts: Vec<String> =
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
// Literals always compile.
Some(Regex::new(&alts.join("|")).unwrap())
Some(RegexBuilder::new(&alts.join("|")))
} else if lit.is_empty() {
None
} else {
// Literals always compile.
debug!("required literal found: {:?}", show(lit));
Some(Regex::new(&bytes_to_regex(lit)).unwrap())
Some(RegexBuilder::new(&bytes_to_regex(lit)))
}
}
}
......
......@@ -144,14 +144,19 @@ impl GrepBuilder {
let expr = try!(self.parse());
let literals = LiteralSets::create(&expr);
let re = try!(self.regex(&expr));
let required = literals.to_regex().or_else(|| {
let expr = match strip_unicode_word_boundaries(&expr) {
None => return None,
Some(expr) => expr,
};
debug!("Stripped Unicode word boundaries. New AST:\n{:?}", expr);
self.regex(&expr).ok()
});
let required = match literals.to_regex_builder() {
Some(builder) => Some(try!(self.regex_build(builder))),
None => {
match strip_unicode_word_boundaries(&expr) {
None => None,
Some(expr) => {
debug!("Stripped Unicode word boundaries. \
New AST:\n{:?}", expr);
self.regex(&expr).ok()
}
}
}
};
Ok(Grep {
re: re,
required: required,
......@@ -162,11 +167,12 @@ impl GrepBuilder {
/// Creates a new regex from the given expression with the current
/// configuration.
fn regex(&self, expr: &Expr) -> Result<Regex> {
let casei =
self.opts.case_insensitive
|| (self.opts.case_smart && !has_uppercase_literal(expr));
RegexBuilder::new(&expr.to_string())
.case_insensitive(casei)
self.regex_build(RegexBuilder::new(&expr.to_string()))
}
/// Builds a new regex from the given builder using the caller's settings.
fn regex_build(&self, builder: RegexBuilder) -> Result<Regex> {
builder
.multi_line(true)
.unicode(true)
.size_limit(self.opts.size_limit)
......@@ -182,12 +188,30 @@ impl GrepBuilder {
try!(syntax::ExprBuilder::new()
.allow_bytes(true)
.unicode(true)
.case_insensitive(self.opts.case_insensitive)
.case_insensitive(try!(self.is_case_insensitive()))
.parse(&self.pattern));
let expr = try!(nonl::remove(expr, self.opts.line_terminator));
debug!("regex ast:\n{:#?}", expr);
Ok(expr)
}
/// Determines whether the case insensitive flag should be enabled or not.
///
/// An error is returned if the regex could not be parsed.
fn is_case_insensitive(&self) -> Result<bool> {
if self.opts.case_insensitive {
return Ok(true);
}
if !self.opts.case_smart {
return Ok(false);
}
let expr =
try!(syntax::ExprBuilder::new()
.allow_bytes(true)
.unicode(true)
.parse(&self.pattern));
Ok(!has_uppercase_literal(&expr))
}
}
impl Grep {
......
......@@ -874,6 +874,15 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, "baz:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/199
clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("foo", "tEsT");
cmd.arg("--smart-case");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:tEsT\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/206
clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create_dir("foo");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册