search_buffer.rs 12.8 KB
Newer Older
1
/*!
L
Leonardo Yvens 已提交
2
The `search_buffer` module is responsible for searching a single file all in a
3 4 5
single buffer. Typically, the source of the buffer is a memory map. This can
be useful for when memory maps are faster than streaming search.

A
Andrew Gallant 已提交
6
Note that this module doesn't quite support everything that `search_stream`
K
Kosta Welke 已提交
7
does. Notably, showing contexts.
8
*/
A
Andrew Gallant 已提交
9 10 11 12
use std::cmp;
use std::path::Path;

use grep::Grep;
13
use termcolor::WriteColor;
A
Andrew Gallant 已提交
14 15

use printer::Printer;
16
use search_stream::{IterLines, Options, count_lines, is_binary};
A
Andrew Gallant 已提交
17 18 19 20 21 22 23

pub struct BufferSearcher<'a, W: 'a> {
    opts: Options,
    printer: &'a mut Printer<W>,
    grep: &'a Grep,
    path: &'a Path,
    buf: &'a [u8],
24
    match_line_count: u64,
25
    match_count: Option<u64>,
A
Andrew Gallant 已提交
26
    line_count: Option<u64>,
27
    byte_offset: Option<u64>,
A
Andrew Gallant 已提交
28 29 30
    last_line: usize,
}

31
impl<'a, W: WriteColor> BufferSearcher<'a, W> {
A
Andrew Gallant 已提交
32 33 34 35 36 37 38 39 40 41 42 43
    pub fn new(
        printer: &'a mut Printer<W>,
        grep: &'a Grep,
        path: &'a Path,
        buf: &'a [u8],
    ) -> BufferSearcher<'a, W> {
        BufferSearcher {
            opts: Options::default(),
            printer: printer,
            grep: grep,
            path: path,
            buf: buf,
44
            match_line_count: 0,
45
            match_count: None,
A
Andrew Gallant 已提交
46
            line_count: None,
47
            byte_offset: None,
A
Andrew Gallant 已提交
48 49 50 51
            last_line: 0,
        }
    }

52 53 54 55 56 57 58 59 60 61
    /// If enabled, searching will print a 0-based offset of the
    /// matching line (or the actual match if -o is specified) before
    /// printing the line itself.
    ///
    /// Disabled by default.
    pub fn byte_offset(mut self, yes: bool) -> Self {
        self.opts.byte_offset = yes;
        self
    }

A
Andrew Gallant 已提交
62 63 64 65 66 67 68 69
    /// If enabled, searching will print a count instead of each match.
    ///
    /// Disabled by default.
    pub fn count(mut self, yes: bool) -> Self {
        self.opts.count = yes;
        self
    }

70 71 72 73 74 75 76 77 78
    /// If enabled, searching will print the count of individual matches
    /// instead of each match.
    ///
    /// Disabled by default.
    pub fn count_matches(mut self, yes: bool) -> Self {
        self.opts.count_matches = yes;
        self
    }

79 80 81 82 83 84 85 86
    /// If enabled, searching will print the path instead of each match.
    ///
    /// Disabled by default.
    pub fn files_with_matches(mut self, yes: bool) -> Self {
        self.opts.files_with_matches = yes;
        self
    }

D
Daniel Luz 已提交
87 88 89 90 91 92 93 94 95
    /// If enabled, searching will print the path of files that *don't* match
    /// the given pattern.
    ///
    /// Disabled by default.
    pub fn files_without_matches(mut self, yes: bool) -> Self {
        self.opts.files_without_matches = yes;
        self
    }

A
Andrew Gallant 已提交
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
    /// Set the end-of-line byte used by this searcher.
    pub fn eol(mut self, eol: u8) -> Self {
        self.opts.eol = eol;
        self
    }

    /// If enabled, matching is inverted so that lines that *don't* match the
    /// given pattern are treated as matches.
    pub fn invert_match(mut self, yes: bool) -> Self {
        self.opts.invert_match = yes;
        self
    }

    /// If enabled, compute line numbers and prefix each line of output with
    /// them.
    pub fn line_number(mut self, yes: bool) -> Self {
        self.opts.line_number = yes;
        self
    }

A
Andrew Gallant 已提交
116 117 118 119 120 121 122 123
    /// Limit the number of matches to the given count.
    ///
    /// The default is None, which corresponds to no limit.
    pub fn max_count(mut self, count: Option<u64>) -> Self {
        self.opts.max_count = count;
        self
    }

124 125 126 127 128 129 130
    /// If enabled, don't show any output and quit searching after the first
    /// match is found.
    pub fn quiet(mut self, yes: bool) -> Self {
        self.opts.quiet = yes;
        self
    }

A
Andrew Gallant 已提交
131 132 133 134 135 136 137 138
    /// If enabled, search binary files as if they were text.
    pub fn text(mut self, yes: bool) -> Self {
        self.opts.text = yes;
        self
    }

    #[inline(never)]
    pub fn run(mut self) -> u64 {
139
        let binary_upto = cmp::min(10_240, self.buf.len());
140
        if !self.opts.text && is_binary(&self.buf[..binary_upto], true) {
A
Andrew Gallant 已提交
141 142 143
            return 0;
        }

144
        self.match_line_count = 0;
A
Andrew Gallant 已提交
145
        self.line_count = if self.opts.line_number { Some(0) } else { None };
146 147 148
        // The memory map searcher uses one contiguous block of bytes, so the
        // offsets given the printer are sufficient to compute the byte offset.
        self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
149
        self.match_count = if self.opts.count_matches { Some(0) } else { None };
A
Andrew Gallant 已提交
150 151 152 153 154 155 156 157
        let mut last_end = 0;
        for m in self.grep.iter(self.buf) {
            if self.opts.invert_match {
                self.print_inverted_matches(last_end, m.start());
            } else {
                self.print_match(m.start(), m.end());
            }
            last_end = m.end();
158
            if self.opts.terminate(self.match_line_count) {
159 160
                break;
            }
A
Andrew Gallant 已提交
161
        }
162
        if self.opts.invert_match && !self.opts.terminate(self.match_line_count) {
A
Andrew Gallant 已提交
163 164 165
            let upto = self.buf.len();
            self.print_inverted_matches(last_end, upto);
        }
166 167
        if self.opts.count && self.match_line_count > 0 {
            self.printer.path_count(self.path, self.match_line_count);
168 169 170 171
        } else if self.opts.count_matches
            && self.match_count.map_or(false, |c| c > 0)
        {
            self.printer.path_count(self.path, self.match_count.unwrap());
A
Andrew Gallant 已提交
172
        }
173
        if self.opts.files_with_matches && self.match_line_count > 0 {
174 175
            self.printer.path(self.path);
        }
176
        if self.opts.files_without_matches && self.match_line_count == 0 {
D
Daniel Luz 已提交
177 178
            self.printer.path(self.path);
        }
179
        self.match_line_count
A
Andrew Gallant 已提交
180 181
    }

182 183 184 185 186 187 188 189 190
    #[inline(always)]
    fn count_individual_matches(&mut self, start: usize, end: usize) {
        if let Some(ref mut count) = self.match_count {
            for _ in self.grep.regex().find_iter(&self.buf[start..end]) {
                *count += 1;
            }
        }
    }

A
Andrew Gallant 已提交
191 192
    #[inline(always)]
    pub fn print_match(&mut self, start: usize, end: usize) {
193
        self.match_line_count += 1;
194
        self.count_individual_matches(start, end);
195
        if self.opts.skip_matches() {
A
Andrew Gallant 已提交
196 197 198 199 200 201
            return;
        }
        self.count_lines(start);
        self.add_line(end);
        self.printer.matched(
            self.grep.regex(), self.path, self.buf,
202
            start, end, self.line_count, self.byte_offset);
A
Andrew Gallant 已提交
203 204 205 206 207 208 209
    }

    #[inline(always)]
    fn print_inverted_matches(&mut self, start: usize, end: usize) {
        debug_assert!(self.opts.invert_match);
        let mut it = IterLines::new(self.opts.eol, start);
        while let Some((s, e)) = it.next(&self.buf[..end]) {
210
            if self.opts.terminate(self.match_line_count) {
A
Andrew Gallant 已提交
211 212
                return;
            }
A
Andrew Gallant 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
            self.print_match(s, e);
        }
    }

    #[inline(always)]
    fn count_lines(&mut self, upto: usize) {
        if let Some(ref mut line_count) = self.line_count {
            *line_count += count_lines(
                &self.buf[self.last_line..upto], self.opts.eol);
            self.last_line = upto;
        }
    }

    #[inline(always)]
    fn add_line(&mut self, line_end: usize) {
        if let Some(ref mut line_count) = self.line_count {
            *line_count += 1;
            self.last_line = line_end;
        }
    }
}

#[cfg(test)]
mod tests {
    use std::path::Path;

A
Andrew Gallant 已提交
239
    use grep::GrepBuilder;
A
Andrew Gallant 已提交
240 241

    use printer::Printer;
242
    use termcolor;
A
Andrew Gallant 已提交
243 244 245

    use super::BufferSearcher;

A
Andrew Gallant 已提交
246
    const SHERLOCK: &'static str = "\
A
Andrew Gallant 已提交
247 248 249 250 251 252 253
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
";
A
Andrew Gallant 已提交
254

A
Andrew Gallant 已提交
255 256 257 258
    fn test_path() -> &'static Path {
        &Path::new("/baz.rs")
    }

259
    type TestSearcher<'a> = BufferSearcher<'a, termcolor::NoColor<Vec<u8>>>;
A
Andrew Gallant 已提交
260 261 262 263 264 265

    fn search<F: FnMut(TestSearcher) -> TestSearcher>(
        pat: &str,
        haystack: &str,
        mut map: F,
    ) -> (u64, String) {
266
        let outbuf = termcolor::NoColor::new(vec![]);
A
Andrew Gallant 已提交
267
        let mut pp = Printer::new(outbuf).with_filename(true);
A
Andrew Gallant 已提交
268 269 270 271 272 273
        let grep = GrepBuilder::new(pat).build().unwrap();
        let count = {
            let searcher = BufferSearcher::new(
                &mut pp, &grep, test_path(), haystack.as_bytes());
            map(searcher).run()
        };
274
        (count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
A
Andrew Gallant 已提交
275 276 277 278
    }

    #[test]
    fn basic_search() {
A
Andrew Gallant 已提交
279
        let (count, out) = search("Sherlock", SHERLOCK, |s|s);
A
Andrew Gallant 已提交
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
        assert_eq!(2, count);
        assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
");
    }

    #[test]
    fn binary() {
        let text = "Sherlock\n\x00Holmes\n";
        let (count, out) = search("Sherlock|Holmes", text, |s|s);
        assert_eq!(0, count);
        assert_eq!(out, "");
    }


    #[test]
    fn binary_text() {
        let text = "Sherlock\n\x00Holmes\n";
        let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
        assert_eq!(2, count);
        assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
    }

    #[test]
    fn line_numbers() {
        let (count, out) = search(
A
Andrew Gallant 已提交
307
            "Sherlock", SHERLOCK, |s| s.line_number(true));
A
Andrew Gallant 已提交
308 309 310 311 312 313 314
        assert_eq!(2, count);
        assert_eq!(out, "\
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
");
    }

315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
    #[test]
    fn byte_offset() {
        let (_, out) = search(
            "Sherlock", SHERLOCK, |s| s.byte_offset(true));
        assert_eq!(out, "\
/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes
");
    }

    #[test]
    fn byte_offset_inverted() {
        let (_, out) = search("Sherlock", SHERLOCK, |s| {
            s.invert_match(true).byte_offset(true)
        });
        assert_eq!(out, "\
/baz.rs:65:Holmeses, success in the province of detective work must always
/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:321:and exhibited clearly, with a label attached.
");
    }

A
Andrew Gallant 已提交
338 339 340
    #[test]
    fn count() {
        let (count, out) = search(
A
Andrew Gallant 已提交
341
            "Sherlock", SHERLOCK, |s| s.count(true));
A
Andrew Gallant 已提交
342 343 344 345
        assert_eq!(2, count);
        assert_eq!(out, "/baz.rs:2\n");
    }

346 347 348 349 350 351 352
    #[test]
    fn count_matches() {
        let (_, out) = search(
            "the", SHERLOCK, |s| s.count_matches(true));
        assert_eq!(out, "/baz.rs:4\n");
    }

353 354 355 356 357 358 359 360
    #[test]
    fn files_with_matches() {
        let (count, out) = search(
            "Sherlock", SHERLOCK, |s| s.files_with_matches(true));
        assert_eq!(1, count);
        assert_eq!(out, "/baz.rs\n");
    }

D
Daniel Luz 已提交
361 362 363 364 365 366 367 368
    #[test]
    fn files_without_matches() {
        let (count, out) = search(
            "zzzz", SHERLOCK, |s| s.files_without_matches(true));
        assert_eq!(0, count);
        assert_eq!(out, "/baz.rs\n");
    }

A
Andrew Gallant 已提交
369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
    #[test]
    fn max_count() {
        let (count, out) = search(
            "Sherlock", SHERLOCK, |s| s.max_count(Some(1)));
        assert_eq!(1, count);
        assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
");
    }

    #[test]
    fn invert_match_max_count() {
        let (count, out) = search(
            "zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1)));
        assert_eq!(1, count);
        assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
");
    }

A
Andrew Gallant 已提交
389 390 391
    #[test]
    fn invert_match() {
        let (count, out) = search(
A
Andrew Gallant 已提交
392
            "Sherlock", SHERLOCK, |s| s.invert_match(true));
A
Andrew Gallant 已提交
393 394 395 396 397 398 399 400 401 402 403
        assert_eq!(4, count);
        assert_eq!(out, "\
/baz.rs:Holmeses, success in the province of detective work must always
/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:and exhibited clearly, with a label attached.
");
    }

    #[test]
    fn invert_match_line_numbers() {
A
Andrew Gallant 已提交
404
        let (count, out) = search("Sherlock", SHERLOCK, |s| {
A
Andrew Gallant 已提交
405 406 407 408 409 410 411 412 413 414 415 416 417
            s.invert_match(true).line_number(true)
        });
        assert_eq!(4, count);
        assert_eq!(out, "\
/baz.rs:2:Holmeses, success in the province of detective work must always
/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:6:and exhibited clearly, with a label attached.
");
    }

    #[test]
    fn invert_match_count() {
A
Andrew Gallant 已提交
418
        let (count, out) = search("Sherlock", SHERLOCK, |s| {
A
Andrew Gallant 已提交
419 420 421 422 423 424
            s.invert_match(true).count(true)
        });
        assert_eq!(4, count);
        assert_eq!(out, "/baz.rs:4\n");
    }
}