comments.rs 13.9 KB
Newer Older
C
Chris Wong 已提交
1
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 3 4 5 6 7 8 9 10
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

S
Steven Fackler 已提交
11 12
pub use self::CommentStyle::*;

13
use ast;
14
use codemap::CodeMap;
15
use syntax_pos::{BytePos, CharPos, Pos, FileName};
16 17
use parse::lexer::{is_block_doc_comment, is_pattern_whitespace};
use parse::lexer::{self, ParseSess, StringReader, TokenAndSpan};
18
use print::pprust;
19
use str::char_at;
20

A
Alex Crichton 已提交
21
use std::io::Read;
P
Paul Collier 已提交
22
use std::usize;
23

24
#[derive(Clone, Copy, PartialEq, Debug)]
25
pub enum CommentStyle {
26 27 28 29 30 31 32 33
    /// No code on either side of each line of the comment
    Isolated,
    /// Code exists to the left of the comment
    Trailing,
    /// Code before /* foo */ and after the comment
    Mixed,
    /// Just a manual blank line "\n\n", for layout
    BlankLine,
34 35
}

36
#[derive(Clone)]
37
pub struct Comment {
38
    pub style: CommentStyle,
39
    pub lines: Vec<String>,
40
    pub pos: BytePos,
41
}
42

43
pub fn is_doc_comment(s: &str) -> bool {
44 45
    (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") ||
    (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!")
46 47
}

48
pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
P
Patrick Walton 已提交
49
    assert!(is_doc_comment(comment));
50
    if comment.starts_with("//!") || comment.starts_with("/*!") {
51
        ast::AttrStyle::Inner
52
    } else {
53
        ast::AttrStyle::Outer
54 55 56
    }
}

57
pub fn strip_doc_comment_decoration(comment: &str) -> String {
58
    /// remove whitespace-only lines from the start/end of lines
59
    fn vertical_trim(lines: Vec<String>) -> Vec<String> {
60
        let mut i = 0;
61
        let mut j = lines.len();
62
        // first line of all-stars should be omitted
63
        if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
64 65
            i += 1;
        }
66
        while i < j && lines[i].trim().is_empty() {
67 68 69
            i += 1;
        }
        // like the first, a last line of all stars should be omitted
70 71 72 73 74
        if j > i &&
           lines[j - 1]
               .chars()
               .skip(1)
               .all(|c| c == '*') {
75
            j -= 1;
76
        }
77
        while j > i && lines[j - 1].trim().is_empty() {
78
            j -= 1;
79
        }
80
        lines[i..j].to_vec()
81 82
    }

83
    /// remove a "[ \t]*\*" block from each line, if possible
84
    fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
P
Paul Collier 已提交
85
        let mut i = usize::MAX;
86 87
        let mut can_trim = true;
        let mut first = true;
88
        for line in &lines {
89
            for (j, c) in line.chars().enumerate() {
M
Marvin Löbel 已提交
90
                if j > i || !"* \t".contains(c) {
91
                    can_trim = false;
92 93
                    break;
                }
94 95 96 97 98 99 100
                if c == '*' {
                    if first {
                        i = j;
                        first = false;
                    } else if i != j {
                        can_trim = false;
                    }
101 102 103
                    break;
                }
            }
104
            if i >= line.len() {
105 106 107 108 109
                can_trim = false;
            }
            if !can_trim {
                break;
            }
110 111
        }

112
        if can_trim {
113 114 115
            lines.iter()
                 .map(|line| (&line[i + 1..line.len()]).to_string())
                 .collect()
116 117 118
        } else {
            lines
        }
119 120
    }

121
    // one-line comments lose their prefix
122 123
    const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
    for prefix in ONELINERS {
124
        if comment.starts_with(*prefix) {
J
Jorge Aparicio 已提交
125
            return (&comment[prefix.len()..]).to_string();
126
        }
127 128
    }

129
    if comment.starts_with("/*") {
130
        let lines = comment[3..comment.len() - 2]
131 132 133
                        .lines()
                        .map(|s| s.to_string())
                        .collect::<Vec<String>>();
134

135
        let lines = vertical_trim(lines);
136 137
        let lines = horizontal_trim(lines);

138
        return lines.join("\n");
139 140
    }

S
Steve Klabnik 已提交
141
    panic!("not a doc-comment: {}", comment);
142 143
}

144
fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
145
    debug!(">>> blank-line comment");
146 147
    comments.push(Comment {
        style: BlankLine,
148
        lines: Vec::new(),
149
        pos: rdr.pos,
150
    });
151 152
}

153
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, comments: &mut Vec<Comment>) {
154
    while is_pattern_whitespace(rdr.ch) && !rdr.is_eof() {
155
        if rdr.ch_is('\n') {
156
            push_blank_line_comment(rdr, &mut *comments);
157
        }
C
Corey Richardson 已提交
158
        rdr.bump();
159 160 161
    }
}

162 163
fn read_shebang_comment(rdr: &mut StringReader,
                        code_to_the_left: bool,
164
                        comments: &mut Vec<Comment>) {
165
    debug!(">>> shebang comment");
166
    let p = rdr.pos;
167
    debug!("<<< shebang comment");
168
    comments.push(Comment {
T
Tshepang Lekhonkhobe 已提交
169
        style: if code_to_the_left { Trailing } else { Isolated },
170 171
        lines: vec![rdr.read_one_line_comment()],
        pos: p,
172
    });
173 174
}

175 176
fn read_line_comments(rdr: &mut StringReader,
                      code_to_the_left: bool,
177
                      comments: &mut Vec<Comment>) {
178
    debug!(">>> line comments");
179
    let p = rdr.pos;
180
    let mut lines: Vec<String> = Vec::new();
181
    while rdr.ch_is('/') && rdr.nextch_is('/') {
C
Corey Richardson 已提交
182
        let line = rdr.read_one_line_comment();
183
        debug!("{}", line);
184
        // Doc comments are not put in comments.
185
        if is_doc_comment(&line[..]) {
186 187
            break;
        }
188
        lines.push(line);
C
Corey Richardson 已提交
189
        rdr.consume_non_eol_whitespace();
190
    }
191
    debug!("<<< line comments");
192
    if !lines.is_empty() {
193
        comments.push(Comment {
T
Tshepang Lekhonkhobe 已提交
194
            style: if code_to_the_left { Trailing } else { Isolated },
195
            lines,
196
            pos: p,
197 198
        });
    }
199 200
}

201 202 203
/// Returns None if the first col chars of s contain a non-whitespace char.
/// Otherwise returns Some(k) where k is first char offset after that leading
/// whitespace.  Note k may be outside bounds of s.
P
Paul Collier 已提交
204
fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
205
    let len = s.len();
206
    let mut col = col.to_usize();
P
Paul Collier 已提交
207
    let mut cursor: usize = 0;
208
    while col > 0 && cursor < len {
209
        let ch = char_at(s, cursor);
210
        if !ch.is_whitespace() {
211 212
            return None;
        }
213
        cursor += ch.len_utf8();
214
        col -= 1;
B
Brian Anderson 已提交
215
    }
216
    return Some(cursor);
217 218
}

219
fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String>, s: String, col: CharPos) {
T
Tim Chevalier 已提交
220
    let len = s.len();
221
    let s1 = match all_whitespace(&s[..], col) {
222 223
        Some(col) => {
            if col < len {
J
Jorge Aparicio 已提交
224
                (&s[col..len]).to_string()
225
            } else {
226
                "".to_string()
227
            }
228 229 230
        }
        None => s,
    };
231
    debug!("pushing line: {}", s1);
232
    lines.push(s1);
233 234
}

235
fn read_block_comment(rdr: &mut StringReader,
236
                      code_to_the_left: bool,
237
                      comments: &mut Vec<Comment>) {
238
    debug!(">>> block comment");
239
    let p = rdr.pos;
240
    let mut lines: Vec<String> = Vec::new();
241
    let col = rdr.col;
C
Corey Richardson 已提交
242 243
    rdr.bump();
    rdr.bump();
244

245
    let mut curr_line = String::from("/*");
246

247
    // doc-comments are not really comments, they are attributes
248 249 250
    if (rdr.ch_is('*') && !rdr.nextch_is('*')) || rdr.ch_is('!') {
        while !(rdr.ch_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
            curr_line.push(rdr.ch.unwrap());
C
Corey Richardson 已提交
251
            rdr.bump();
252
        }
C
Corey Richardson 已提交
253
        if !rdr.is_eof() {
254
            curr_line.push_str("*/");
C
Corey Richardson 已提交
255 256
            rdr.bump();
            rdr.bump();
257
        }
258
        if is_block_doc_comment(&curr_line[..]) {
259
            return;
260
        }
M
Marvin Löbel 已提交
261
        assert!(!curr_line.contains('\n'));
262
        lines.push(curr_line);
263
    } else {
P
Paul Collier 已提交
264
        let mut level: isize = 1;
265
        while level > 0 {
266
            debug!("=== block comment level {}", level);
C
Corey Richardson 已提交
267
            if rdr.is_eof() {
268
                panic!(rdr.fatal("unterminated block comment"));
269
            }
270
            if rdr.ch_is('\n') {
271
                trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
272
                curr_line = String::new();
C
Corey Richardson 已提交
273
                rdr.bump();
274
            } else {
275 276
                curr_line.push(rdr.ch.unwrap());
                if rdr.ch_is('/') && rdr.nextch_is('*') {
C
Corey Richardson 已提交
277 278
                    rdr.bump();
                    rdr.bump();
279
                    curr_line.push('*');
280 281
                    level += 1;
                } else {
282
                    if rdr.ch_is('*') && rdr.nextch_is('/') {
C
Corey Richardson 已提交
283 284
                        rdr.bump();
                        rdr.bump();
285
                        curr_line.push('/');
286
                        level -= 1;
287 288 289
                    } else {
                        rdr.bump();
                    }
290
                }
291 292
            }
        }
293
        if !curr_line.is_empty() {
294
            trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
295
        }
296
    }
297

298 299 300 301 302
    let mut style = if code_to_the_left {
        Trailing
    } else {
        Isolated
    };
C
Corey Richardson 已提交
303
    rdr.consume_non_eol_whitespace();
304
    if !rdr.is_eof() && !rdr.ch_is('\n') && lines.len() == 1 {
305
        style = Mixed;
306
    }
307
    debug!("<<< block comment");
308
    comments.push(Comment {
309 310
        style,
        lines,
311 312
        pos: p,
    });
313 314 315
}


316 317 318 319
fn consume_comment(rdr: &mut StringReader,
                   comments: &mut Vec<Comment>,
                   code_to_the_left: &mut bool,
                   anything_to_the_left: &mut bool) {
320
    debug!(">>> consume comment");
321
    if rdr.ch_is('/') && rdr.nextch_is('/') {
322 323 324
        read_line_comments(rdr, *code_to_the_left, comments);
        *code_to_the_left = false;
        *anything_to_the_left = false;
325
    } else if rdr.ch_is('/') && rdr.nextch_is('*') {
326 327
        read_block_comment(rdr, *code_to_the_left, comments);
        *anything_to_the_left = true;
328
    } else if rdr.ch_is('#') && rdr.nextch_is('!') {
329 330 331
        read_shebang_comment(rdr, *code_to_the_left, comments);
        *code_to_the_left = false;
        *anything_to_the_left = false;
332 333 334
    } else {
        panic!();
    }
335
    debug!("<<< consume comment");
336 337
}

338
#[derive(Clone)]
339
pub struct Literal {
340
    pub lit: String,
341
    pub pos: BytePos,
342
}
343

J
John Clements 已提交
344 345
// it appears this function is called only from pprust... that's
// probably not a good thing.
346
pub fn gather_comments_and_literals(sess: &ParseSess, path: FileName, srdr: &mut Read)
347
                                    -> (Vec<Comment>, Vec<Literal>) {
A
Alex Crichton 已提交
348 349
    let mut src = Vec::new();
    srdr.read_to_end(&mut src).unwrap();
350
    let src = String::from_utf8(src).unwrap();
351 352
    let cm = CodeMap::new(sess.codemap().path_mapping().clone());
    let filemap = cm.new_filemap(path, src);
353
    let mut rdr = lexer::StringReader::new_raw(sess, filemap);
354

355 356
    let mut comments: Vec<Comment> = Vec::new();
    let mut literals: Vec<Literal> = Vec::new();
357 358
    let mut code_to_the_left = false; // Only code
    let mut anything_to_the_left = false; // Code or comments
C
Corey Richardson 已提交
359
    while !rdr.is_eof() {
360
        loop {
361
            // Eat all the whitespace and count blank lines.
C
Corey Richardson 已提交
362
            rdr.consume_non_eol_whitespace();
363
            if rdr.ch_is('\n') {
364 365 366
                if anything_to_the_left {
                    rdr.bump(); // The line is not blank, do not count.
                }
367
                consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
368 369
                code_to_the_left = false;
                anything_to_the_left = false;
370
            }
371 372 373 374 375 376
            // Eat one comment group
            if rdr.peeking_at_comment() {
                consume_comment(&mut rdr, &mut comments,
                                &mut code_to_the_left, &mut anything_to_the_left);
            } else {
                break
377 378
            }
        }
379

380
        let bstart = rdr.pos;
P
Paul Stansifer 已提交
381
        rdr.next_token();
382
        // discard, and look ahead; we're working with internal state
383
        let TokenAndSpan { tok, sp } = rdr.peek();
384
        if tok.is_lit() {
C
Corey Richardson 已提交
385
            rdr.with_str_from(bstart, |s| {
386
                debug!("tok lit: {}", s);
387 388
                literals.push(Literal {
                    lit: s.to_string(),
389
                    pos: sp.lo(),
390
                });
391
            })
392
        } else {
393
            debug!("tok: {}", pprust::token_to_string(&tok));
394
        }
395 396
        code_to_the_left = true;
        anything_to_the_left = true;
397
    }
398 399

    (comments, literals)
400
}
401 402

#[cfg(test)]
403
mod tests {
404 405
    use super::*;

406 407
    #[test]
    fn test_block_doc_comment_1() {
408 409
        let comment = "/**\n * Test \n **  Test\n *   Test\n*/";
        let stripped = strip_doc_comment_decoration(comment);
410
        assert_eq!(stripped, " Test \n*  Test\n   Test");
411 412
    }

413 414
    #[test]
    fn test_block_doc_comment_2() {
415 416
        let comment = "/**\n * Test\n *  Test\n*/";
        let stripped = strip_doc_comment_decoration(comment);
417
        assert_eq!(stripped, " Test\n  Test");
418 419
    }

420 421
    #[test]
    fn test_block_doc_comment_3() {
422
        let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
423
        let stripped = strip_doc_comment_decoration(comment);
424
        assert_eq!(stripped, " let a: *i32;\n *a = 5;");
425 426
    }

427 428
    #[test]
    fn test_block_doc_comment_4() {
429
        let comment = "/*******************\n test\n *********************/";
430
        let stripped = strip_doc_comment_decoration(comment);
431
        assert_eq!(stripped, " test");
432 433
    }

434 435
    #[test]
    fn test_line_doc_comment() {
436
        let stripped = strip_doc_comment_decoration("/// test");
437
        assert_eq!(stripped, " test");
438
        let stripped = strip_doc_comment_decoration("///! test");
439
        assert_eq!(stripped, " test");
440
        let stripped = strip_doc_comment_decoration("// test");
441
        assert_eq!(stripped, " test");
442
        let stripped = strip_doc_comment_decoration("// test");
443
        assert_eq!(stripped, " test");
444
        let stripped = strip_doc_comment_decoration("///test");
445
        assert_eq!(stripped, "test");
446
        let stripped = strip_doc_comment_decoration("///!test");
447
        assert_eq!(stripped, "test");
448
        let stripped = strip_doc_comment_decoration("//test");
449
        assert_eq!(stripped, "test");
450 451
    }
}