rst_parser.py 29.2 KB
Newer Older
1
#!/usr/bin/env python
2

3
import os, sys, re, string, fnmatch
V
Vladislav Vinogradov 已提交
4
allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d", "calib3d", "objdetect", "legacy", "contrib", "gpu", "androidcamera", "java", "python", "stitching", "ts", "photo", "nonfree", "videostab", "ocl", "superres"]
5 6 7
verbose = False
show_warnings = True
show_errors = True
8
show_critical_errors = True
9

10 11 12 13
params_blacklist = {
    "fromarray" : ("object", "allowND"), # python only function
    "reprojectImageTo3D" : ("ddepth"),   # python only argument
    "composeRT" : ("d*d*"),              # wildchards in parameter names are not supported by this parser
14
    "CvSVM::train_auto" : ("\\*Grid"),   # wildchards in parameter names are not supported by this parser
15 16 17 18 19
    "error" : "args", # parameter of supporting macro
    "getConvertElem" : ("from", "cn", "to", "beta", "alpha"), # arguments of returned functions
    "gpu::swapChannels" : ("dstOrder") # parameter is not parsed correctly by the hdr_parser
}

20
ERROR_001_SECTIONFAILURE      = 1
21
WARNING_002_HDRWHITESPACE     = 2
22 23 24 25 26 27 28 29
ERROR_003_PARENTHESES         = 3
WARNING_004_TABS              = 4
ERROR_005_REDEFENITIONPARAM   = 5
ERROR_006_REDEFENITIONFUNC    = 6
WARNING_007_UNDOCUMENTEDPARAM = 7
WARNING_008_MISSINGPARAM      = 8
WARNING_009_HDRMISMATCH       = 9
ERROR_010_NOMODULE            = 10
30
ERROR_011_EOLEXPECTED         = 11
31

32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
params_mapping = {
    "composeRT" : {
        "dr3dr1" : "d*d*",
        "dr3dr2" : "d*d*",
        "dr3dt1" : "d*d*",
        "dr3dt2" : "d*d*",
        "dt3dr1" : "d*d*",
        "dt3dr2" : "d*d*",
        "dt3dt1" : "d*d*",
        "dt3dt2" : "d*d*"
        },
    "CvSVM::train_auto" : {
        "coeffGrid" : "\\*Grid",
        "degreeGrid" : "\\*Grid",
        "gammaGrid" : "\\*Grid",
        "nuGrid" : "\\*Grid",
        "pGrid" : "\\*Grid"
    }
}

52 53
known_text_sections_names = ["Appendix", "Results", "Prerequisites", "Introduction", "Description"]

54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
class DeclarationParser(object):
    def __init__(self, line=None):
        if line is None:
            self.fdecl = ""
            self.lang = ""
            self.balance = 0
            return
        self.lang = self.getLang(line)
        assert self.lang is not None
        self.fdecl = line[line.find("::")+2:].strip()
        self.balance = self.fdecl.count("(") - self.fdecl.count(")")
        assert self.balance >= 0

    def append(self, line):
        self.fdecl += line
        self.balance = self.fdecl.count("(") - self.fdecl.count(")")

    def isready(self):
        return self.balance == 0

74 75
    @classmethod
    def getLang(cls, line):
76 77 78 79 80 81 82 83 84 85 86
        if line.startswith(".. ocv:function::"):
            return "C++"
        if line.startswith(".. ocv:cfunction::"):
            return "C"
        if line.startswith(".. ocv:pyfunction::"):
            return "Python2"
        if line.startswith(".. ocv:pyoldfunction::"):
            return "Python1"
        if line.startswith(".. ocv:jfunction::"):
            return "Java"
        return None
87

88 89 90 91 92 93 94 95 96 97 98 99 100 101
    def hasDeclaration(self, line):
        return self.getLang(line) is not None

class ParamParser(object):
    def __init__(self, line=None):
        if line is None:
            self.prefix = ""
            self.name = ""
            self.comment = ""
            self.active = False
            return
        offset = line.find(":param")
        assert offset > 0
        self.prefix = line[:offset]
102
        assert self.prefix == " "*len(self.prefix), ":param definition should be prefixed with spaces"
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
        line = line[offset + 6:].lstrip()
        name_end = line.find(":")
        assert name_end > 0
        self.name = line[:name_end]
        self.comment = line[name_end+1:].lstrip()
        self.active = True

    def append(self, line):
        assert self.active
        if (self.hasDeclaration(line)):
            self.active = False
        elif line.startswith(self.prefix) or not line:
            self.comment += "\n" + line.lstrip()
        else:
            self.active = False
118

119 120
    @classmethod
    def hasDeclaration(cls, line):
121 122 123 124 125 126
        return line.lstrip().startswith(":param")

class RstParser(object):
    def __init__(self, cpp_parser):
        self.cpp_parser = cpp_parser
        self.definitions = {}
127 128 129
        self.sections_parsed = 0
        self.sections_total = 0
        self.sections_skipped = 0
130

131 132 133
    def parse(self, module_name, module_path=None):
        if module_path is None:
            module_path = "../" + module_name
134 135 136 137 138

        doclist = []
        for root, dirs, files in os.walk(os.path.join(module_path,"doc")):
            for filename in fnmatch.filter(files, "*.rst"):
                doclist.append(os.path.join(root, filename))
139

140
        for doc in doclist:
141
            self.parse_rst_file(module_name, doc)
142

143 144 145 146
    def parse_section_safe(self, module_name, section_name, file_name, lineno, lines):
        try:
            self.parse_section(module_name, section_name, file_name, lineno, lines)
        except AssertionError, args:
147
            if show_errors:
148
                print >> sys.stderr, "RST parser error E%03d: assertion in \"%s\" at %s:%s" % (ERROR_001_SECTIONFAILURE, section_name, file_name, lineno)
149
                print >> sys.stderr, "    Details: %s" % args
150 151 152 153

    def parse_section(self, module_name, section_name, file_name, lineno, lines):
        self.sections_total += 1
        # skip sections having whitespace in name
154
        #if section_name.find(" ") >= 0 and section_name.find("::operator") < 0:
155
        if (section_name.find(" ") >= 0 and not bool(re.match(r"(\w+::)*operator\s*(\w+|>>|<<|\(\)|->|\+\+|--|=|==|\+=|-=)", section_name)) ) or section_name.endswith(":"):
156
            if show_errors:
A
Andrey Kamaev 已提交
157
                print >> sys.stderr, "RST parser warning W%03d:  SKIPPED: \"%s\" File: %s:%s" % (WARNING_002_HDRWHITESPACE, section_name, file_name, lineno)
158 159
            self.sections_skipped += 1
            return
160 161 162 163 164

        func = {}
        func["name"] = section_name
        func["file"] = file_name
        func["line"] = lineno
165
        func["module"] = module_name
166 167

        # parse section name
168 169
        section_name = self.parse_namespace(func, section_name)
        class_separator_idx = section_name.find("::")
170
        if class_separator_idx > 0:
171 172
            func["class"] = section_name[:class_separator_idx]
            func["method"] = section_name[class_separator_idx+2:]
173
        else:
174
            func["method"] = section_name
175

176
        capturing_seealso = False
177 178
        skip_code_lines = False
        expected_brief = True
179
        was_code_line = False
180 181
        fdecl = DeclarationParser()
        pdecl = ParamParser()
182
        ll = None
183 184 185 186 187 188 189 190 191

        for l in lines:
            # read tail of function/method declaration if needed
            if not fdecl.isready():
                fdecl.append(ll)
                if fdecl.isready():
                    self.add_new_fdecl(func, fdecl)
                continue

192 193
            # continue capture seealso
            if capturing_seealso:
194
                if not l or l.startswith(" "):
195
                    seealso = func.get("seealso", [])
196 197
                    seealso.extend(l.split(","))
                    func["seealso"] = seealso
198 199
                    continue
                else:
200
                    capturing_seealso = False
201

202 203 204 205 206 207 208 209
            ll = l.strip()
            if ll == "..":
                expected_brief = False
                skip_code_lines = False
                continue

            # skip lines if line-skipping mode is activated
            if skip_code_lines:
210 211
                if not l:
                    continue
212
                if not l.startswith(" "):
213
                    skip_code_lines = False
214

215
            if ll.startswith(".. code-block::") or ll.startswith(".. image::"):
216
                skip_code_lines = True
217

218
                continue
219

220 221
            # todo: parse structure members; skip them for now
            if ll.startswith(".. ocv:member::"):
222
        #print ll
223 224
                skip_code_lines = True
                continue
225

226 227 228
            #ignore references (todo: collect them)
            if l.startswith(".. ["):
                continue
229

230 231 232 233
            if ll.startswith(".. "):
                expected_brief = False
            elif ll.endswith("::"):
                # turn on line-skipping mode for code fragments
234
                #print ll
235 236
                skip_code_lines = True
                ll = ll[:len(ll)-2]
237

238 239 240 241 242 243 244 245
            # continue param parsing (process params after processing .. at the beginning of the line and :: at the end)
            if pdecl.active:
                pdecl.append(l)
                if pdecl.active:
                    continue
                else:
                    self.add_new_pdecl(func, pdecl)
                    # do not continue - current line can contain next parameter definition
246

247 248 249 250 251
            # parse ".. seealso::" blocks
            if ll.startswith(".. seealso::"):
                if ll.endswith(".. seealso::"):
                    capturing_seealso = True
                else:
252
                    seealso = func.get("seealso", [])
253 254 255 256 257 258 259
                    seealso.extend(ll[ll.find("::")+2:].split(","))
                    func["seealso"] = seealso
                continue

            # skip ".. index::"
            if ll.startswith(".. index::"):
                continue
260

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
            # parse class & struct definitions
            if ll.startswith(".. ocv:class::"):
                func["class"] = ll[ll.find("::")+2:].strip()
                if "method" in func:
                    del func["method"]
                func["isclass"] = True
                expected_brief = True
                continue

            if ll.startswith(".. ocv:struct::"):
                func["class"] = ll[ll.find("::")+2:].strip()
                if "method" in func:
                    del func["method"]
                func["isstruct"] = True
                expected_brief = True
                continue

            # parse function/method definitions
            if fdecl.hasDeclaration(ll):
                fdecl = DeclarationParser(ll)
                if fdecl.isready():
                    self.add_new_fdecl(func, fdecl)
                continue

            # parse parameters
            if pdecl.hasDeclaration(l):
                pdecl = ParamParser(l)
                continue

            # record brief description
            if expected_brief:
                func["brief"] = func.get("brief", "") + "\n" + ll
                if skip_code_lines:
294
                    expected_brief = False # force end brief if code block begins
295 296 297
                continue

            # record other lines as long description
298
            if (skip_code_lines):
299 300 301 302 303
                ll = ll.replace("/*", "/ *")
                ll = ll.replace("*/", "* /")
                if (was_code_line):
                    func["long"] = func.get("long", "") + "\n" + ll + "\n"
                else:
304
                    was_code_line = True
305 306 307
                    func["long"] = func.get("long", "") + ll +"\n<code>\n\n // C++ code:\n\n"
            else:
                if (was_code_line):
308 309
                    func["long"] = func.get("long", "") + "\n" + ll + "\n</code>\n"
                    was_code_line = False
310 311
                else:
                    func["long"] = func.get("long", "") + "\n" + ll
312
        # endfor l in lines
313

314
        if fdecl.balance != 0:
315
            if show_critical_errors:
316
                print >> sys.stderr, "RST parser error E%03d: invalid parentheses balance in \"%s\" at %s:%s" % (ERROR_003_PARENTHESES, section_name, file_name, lineno)
317
            return
318 319 320 321 322 323 324 325 326

        # save last parameter if needed
        if pdecl.active:
            self.add_new_pdecl(func, pdecl)

        # add definition to list
        func = self.normalize(func)
        if self.validate(func):
            self.definitions[func["name"]] = func
327
            self.sections_parsed += 1
328 329
            if verbose:
                self.print_info(func)
330
        elif func:
331 332 333 334 335
            if func["name"] in known_text_sections_names:
                if show_errors:
                    print >> sys.stderr, "RST parser warning W%03d:  SKIPPED: \"%s\" File: %s:%s" % (WARNING_002_HDRWHITESPACE, section_name, file_name, lineno)
                self.sections_skipped += 1
            elif show_errors:
336
                self.print_info(func, True, sys.stderr)
337

338
    def parse_rst_file(self, module_name, doc):
339 340
        doc = os.path.abspath(doc)
        lineno = 0
341 342
        whitespace_warnings = 0
        max_whitespace_warnings = 10
343

344 345 346 347 348 349 350 351
        lines = []
        flineno = 0
        fname = ""
        prev_line = None

        df = open(doc, "rt")
        for l in df.readlines():
            lineno += 1
352 353 354
            # handle tabs
            if l.find("\t") >= 0:
                whitespace_warnings += 1
355
                if whitespace_warnings <= max_whitespace_warnings and show_warnings:
356
                    print >> sys.stderr, "RST parser warning W%03d: tab symbol instead of space is used at %s:%s" % (WARNING_004_TABS, doc, lineno)
357
                l = l.replace("\t", "    ")
358

359
            # handle first line
360 361 362
            if prev_line == None:
                prev_line = l.rstrip()
                continue
363

364
            ll = l.rstrip()
365
            if len(prev_line) > 0 and len(ll) >= len(prev_line) and (ll == "-" * len(ll) or ll == "+" * len(ll) or ll == "=" * len(ll)):
366
                # new function candidate
367
                if len(lines) > 1:
368
                    self.parse_section_safe(module_name, fname, doc, flineno, lines[:len(lines)-1])
369 370 371 372
                lines = []
                flineno = lineno-1
                fname = prev_line.strip()
            elif flineno > 0:
373
                lines.append(ll)
374 375 376
            prev_line = ll
        df.close()

377
        # don't forget about the last function section in file!!!
378
        if len(lines) > 1:
379
            self.parse_section_safe(module_name, fname, doc, flineno, lines)
380

381 382
    @classmethod
    def parse_namespace(cls, func, section_name):
V
Vladislav Vinogradov 已提交
383
        known_namespaces = ["cv", "gpu", "flann", "superres"]
384 385 386 387 388 389
        l = section_name.strip()
        for namespace in known_namespaces:
            if l.startswith(namespace + "::"):
                func["namespace"] = namespace
                return l[len(namespace)+2:]
        return section_name
390 391

    def add_new_fdecl(self, func, decl):
392 393 394
        if decl.fdecl.endswith(";"):
            print >> sys.stderr, "RST parser error E%03d: unexpected semicolon at the end of declaration in \"%s\" at %s:%s" \
                        % (ERROR_011_EOLEXPECTED, func["name"], func["file"], func["line"])
395
        decls =  func.get("decls", [])
396 397
        if (decl.lang == "C++" or decl.lang == "C"):
            rst_decl = self.cpp_parser.parse_func_decl_no_wrap(decl.fdecl)
398
            decls.append( [decl.lang, decl.fdecl, rst_decl] )
399
        else:
400
            decls.append( [decl.lang, decl.fdecl] )
401 402
        func["decls"] = decls

403 404 405
    @classmethod
    def add_new_pdecl(cls, func, decl):
        params =  func.get("params", {})
406
        if decl.name in params:
407
            if show_errors:
408 409
                #check black_list
                if decl.name not in params_blacklist.get(func["name"], []):
410 411
                    print >> sys.stderr, "RST parser error E%03d: redefinition of parameter \"%s\" in \"%s\" at %s:%s" \
                        % (ERROR_005_REDEFENITIONPARAM, decl.name, func["name"], func["file"], func["line"])
412 413 414 415
        else:
            params[decl.name] = decl.comment
            func["params"] = params

416 417
    def print_info(self, func, skipped=False, out = sys.stdout):
        print >> out
418
        if skipped:
419 420
            print >> out, "SKIPPED DEFINITION:"
        print >> out, "name:      %s" % (func.get("name","~empty~"))
421
        print >> out, "file:      %s:%s" % (func.get("file","~empty~"), func.get("line","~empty~"))
422 423
        print >> out, "is class:  %s" % func.get("isclass", False)
        print >> out, "is struct: %s" % func.get("isstruct", False)
424 425 426 427 428
        print >> out, "module:    %s" % func.get("module","~unknown~")
        print >> out, "namespace: %s" % func.get("namespace", "~empty~")
        print >> out, "class:     %s" % (func.get("class","~empty~"))
        print >> out, "method:    %s" % (func.get("method","~empty~"))
        print >> out, "brief:     %s" % (func.get("brief","~empty~"))
429
        if "decls" in func:
430
            print >> out, "declarations:"
431
            for d in func["decls"]:
432
                print >> out, "     %7s: %s" % (d[0], re.sub(r"[ ]+", " ", d[1]))
433
        if "seealso" in func:
434
            print >> out, "seealso:  ", func["seealso"]
435
        if "params" in func:
436
            print >> out, "parameters:"
437
            for name, comment in func["params"].items():
438 439 440
                print >> out, "%23s:   %s" % (name, comment)
        print >> out, "long:      %s" % (func.get("long","~empty~"))
        print >> out
441 442

    def validate(self, func):
443 444
        if func.get("decls", None) is None:
            if not func.get("isclass", False) and not func.get("isstruct", False):
445
                return False
446
        if func["name"] in self.definitions:
447
            if show_errors:
448 449
                print >> sys.stderr, "RST parser error E%03d: \"%s\" from: %s:%s is already documented at %s:%s" \
                    % (ERROR_006_REDEFENITIONFUNC, func["name"], func["file"], func["line"], self.definitions[func["name"]]["file"], self.definitions[func["name"]]["line"])
450
            return False
451 452 453
        return self.validateParams(func)

    def validateParams(self, func):
454
        documentedParams = func.get("params", {}).keys()
455
        params = []
456

457 458 459 460 461 462 463 464 465 466 467
        for decl in func.get("decls", []):
            if len(decl) > 2:
                args = decl[2][3] # decl[2] -> [ funcname, return_ctype, [modifiers], [args] ]
                for arg in args:
                    # arg -> [ ctype, name, def val, [mod], argno ]
                    if arg[0] != "...":
                        params.append(arg[1])
        params = list(set(params))#unique

        # 1. all params are documented
        for p in params:
468
            if p not in documentedParams and show_warnings:
469
                print >> sys.stderr, "RST parser warning W%03d: parameter \"%s\" of \"%s\" is undocumented. %s:%s" % (WARNING_007_UNDOCUMENTEDPARAM, p, func["name"], func["file"], func["line"])
470 471 472

        # 2. only real params are documented
        for p in documentedParams:
473
            if p not in params and show_warnings:
474
                if p not in params_blacklist.get(func["name"], []):
475
                    print >> sys.stderr, "RST parser warning W%03d: unexisting parameter \"%s\" of \"%s\" is documented at %s:%s" % (WARNING_008_MISSINGPARAM, p, func["name"], func["file"], func["line"])
476 477 478 479 480
        return True

    def normalize(self, func):
        if not func:
            return func
481 482 483 484 485 486
        fnname = func["name"]
        fnname = self.normalizeText(fnname)
        fnname = re.sub(r'_\?D$', "_nD", fnname)  # tailing _?D can be mapped to _nD
        fnname = re.sub(r'\?D$', "ND", fnname)  # tailing ?D can be mapped to ND
        fnname = re.sub(r'\(s\)$', "s", fnname) # tailing (s) can be mapped to s
        func["name"] = fnname
487 488 489 490 491
        if "method" in func:
            func["method"] = self.normalizeText(func["method"])
        if "class" in func:
            func["class"] = self.normalizeText(func["class"])
        if "brief" in func:
492
            func["brief"] = self.normalizeText(func.get("brief", None))
493 494 495
            if not func["brief"]:
                del func["brief"]
        if "long" in func:
496
            func["long"] = self.normalizeText(func.get("long", None))
497 498 499 500 501 502 503 504 505 506
            if not func["long"]:
                del func["long"]
        if "decls" in func:
            func["decls"].sort()
        if "params" in func:
            params = {}
            for name, comment in func["params"].items():
                cmt = self.normalizeText(comment)
                if cmt:
                    params[name] = cmt
507 508 509 510 511
            # expand some wellknown params
            pmap = params_mapping.get(fnname)
            if pmap:
                for name, alias in pmap.items():
                    params[name] = params[alias]
512
            func["params"] = params
513 514 515 516
        if "seealso" in func:
            seealso = []
            for see in func["seealso"]:
                item = self.normalizeText(see.rstrip(".")).strip("\"")
517
                if item and (item.find(" ") < 0 or item.find("::operator") > 0):
518 519
                    seealso.append(item)
            func["seealso"] = list(set(seealso))
520 521
            if not func["seealso"]:
                del func["seealso"]
522 523

        # special case for old C functions - section name should omit "cv" prefix
524
        if not func.get("isclass", False) and not func.get("isstruct", False):
525
            self.fixOldCFunctionName(func)
526 527
        return func

528
    def fixOldCFunctionName(self, func):
529
        if not "decls" in func:
530 531 532 533 534 535 536 537 538 539 540
            return
        fname = None
        for decl in func["decls"]:
            if decl[0] != "C" and decl[0] != "Python1":
                return
            if decl[0] == "C":
                fname = decl[2][0]
        if fname is None:
            return

        fname = fname.replace(".", "::")
541
        if fname.startswith("cv::cv"):
542
            if fname[6:] == func.get("name", "").replace("*", "_n"):
543 544
                func["name"] = fname[4:]
                func["method"] = fname[4:]
545
            elif show_warnings:
546
                print >> sys.stderr, "RST parser warning W%03d:  \"%s\" - section name is \"%s\" instead of \"%s\" at %s:%s" % (WARNING_009_HDRMISMATCH, fname, func["name"], fname[6:], func["file"], func["line"])
547
                #self.print_info(func)
548

549 550 551
    def normalizeText(self, s):
        if s is None:
            return s
V
Vadim Pisarevsky 已提交
552 553

        s = re.sub(r"\.\. math::[ \r]*\n+((.|\n)*?)(\n[ \r]*\n|$)", mathReplace2, s)
554 555
        s = re.sub(r":math:`([^`]+?)`", mathReplace, s)
        s = re.sub(r" *:sup:", "^", s)
556

557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
        s = s.replace(":ocv:class:", "")
        s = s.replace(":ocv:struct:", "")
        s = s.replace(":ocv:func:", "")
        s = s.replace(":ocv:cfunc:","")
        s = s.replace(":c:type:", "")
        s = s.replace(":c:func:", "")
        s = s.replace(":ref:", "")
        s = s.replace(":math:", "")
        s = s.replace(":func:", "")

        s = s.replace("]_", "]")
        s = s.replace(".. note::", "Note:")
        s = s.replace(".. table::", "")
        s = s.replace(".. ocv:function::", "")
        s = s.replace(".. ocv:cfunction::", "")

        # remove ".. identifier:" lines
        s = re.sub(r"(^|\n)\.\. [a-zA-Z_0-9]+(::[a-zA-Z_0-9]+)?:(\n|$)", "\n ", s)
        # unwrap urls
        s = re.sub(r"`([^`<]+ )<(https?://[^>]+)>`_", "\\1(\\2)", s)
        # remove tailing ::
        s = re.sub(r"::(\n|$)", "\\1", s)
579

580 581 582
        # normalize line endings
        s = re.sub(r"\r\n", "\n", s)
        # remove extra line breaks before/after _ or ,
583
        s = re.sub(r"\n[ ]*([_,])\n", r"\1 ", s)
584 585
        # remove extra line breaks after `
        #s = re.sub(r"`\n", "` ", s)
586
        # remove extra space after ( and before .,)
587
        s = re.sub(r"\([\n ]+", "(", s)
588
        s = re.sub(r"[\n ]+(\.|,|\))", "\\1", s)
589 590
        # remove extra line breaks after ".. note::"
        s = re.sub(r"\.\. note::\n+", ".. note:: ", s)
591
        # remove extra line breaks before *
592
        s = re.sub(r"\n+\*", "\n*", s)
593 594
        # remove extra line breaks after *
        s = re.sub(r"\n\*\n+", "\n* ", s)
595
        # remove extra line breaks before #.
596
        s = re.sub(r"\n+#\.", "\n#.", s)
597
        # remove extra line breaks after #.
598
        s = re.sub(r"\n#\.\n+", "\n#. ", s)
599
        # remove extra line breaks before `
600
        #s = re.sub(r"\n[ ]*`", " `", s)
601
        # remove trailing whitespaces
602
        s = re.sub(r"[ ]+$", "", s)
603
        # remove .. for references
604
        #s = re.sub(r"\.\. \[", "[", s)
605 606
        # unescape
        s = re.sub(r"\\(.)", "\\1", s)
607

608 609 610 611 612 613 614 615 616
        # remove whitespace before .
        s = re.sub(r"[ ]+\.", ".", s)
        # remove tailing whitespace
        s = re.sub(r" +(\n|$)", "\\1", s)
        # remove leading whitespace
        s = re.sub(r"(^|\n) +", "\\1", s)
        # compress line breaks
        s = re.sub(r"\n\n+", "\n\n", s)
        # remove other newlines
617
        s = re.sub(r"([^.\n\\=])\n([^*#\n]|\*[^ ])", "\\1 \\2", s)
618 619 620
        # compress whitespace
        s = re.sub(r" +", " ", s)

621
        # restore math
622
        s = re.sub(r" *<BR> *", "\n", s)
623

624 625 626 627
        # remove extra space before .
        s = re.sub(r"[\n ]+\.", ".", s)

        s = s.replace("**", "")
V
Vadim Pisarevsky 已提交
628
        s = re.sub(r"``([^\n]+?)``", "<code>\\1</code>", s)
629 630 631 632
        s = s.replace("``", "\"")
        s = s.replace("`", "\"")
        s = s.replace("\"\"", "\"")

633 634
        s = s.strip()
        return s
635

636 637 638 639 640 641 642 643 644 645 646 647
    def printSummary(self):
        print "RST Parser Summary:"
        print "  Total sections:   %s" % self.sections_total
        print "  Skipped sections: %s" % self.sections_skipped
        print "  Parsed  sections: %s" % self.sections_parsed
        print "  Invalid sections: %s" % (self.sections_total - self.sections_parsed - self.sections_skipped)

        # statistic by language
        stat = {}
        classes = 0
        structs = 0
        for name, d in self.definitions.items():
648 649 650 651 652 653 654
            if d.get("isclass", False):
                classes += 1
            elif d.get("isstruct", False):
                structs += 1
            else:
                for decl in d.get("decls", []):
                    stat[decl[0]] = stat.get(decl[0], 0) + 1
655 656 657 658 659 660

        print
        print "  classes documented:           %s" % classes
        print "  structs documented:           %s" % structs
        for lang in sorted(stat.items()):
            print "  %7s functions documented: %s" % lang
661
        print
K
Kirill Kornyakov 已提交
662

663 664 665 666 667 668 669 670 671 672 673 674
def mathReplace2(match):
    m = mathReplace(match)
    #print "%s   ===>   %s" % (match.group(0), m)
    return "\n\n"+m+"<BR><BR>"

def hdotsforReplace(match):
    return '...  '*int(match.group(1))

def matrixReplace(match):
    m = match.group(2)
    m = re.sub(r" *& *", "   ", m)
    return m
675

676 677 678 679
def mathReplace(match):
    m = match.group(1)

    m = m.replace("\n", "<BR>")
680 681
    m = m.replace("<", "&lt")
    m = m.replace(">", "&gt")
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
    m = re.sub(r"\\text(tt|rm)?{(.*?)}", "\\2", m)
    m = re.sub(r"\\mbox{(.*?)}", "\\1", m)
    m = re.sub(r"\\mathrm{(.*?)}", "\\1", m)
    m = re.sub(r"\\vecthree{(.*?)}{(.*?)}{(.*?)}", "[\\1 \\2 \\3]", m)
    m = re.sub(r"\\bar{(.*?)}", "\\1`", m)
    m = re.sub(r"\\sqrt\[(\d)*\]{(.*?)}", "sqrt\\1(\\2)", m)
    m = re.sub(r"\\sqrt{(.*?)}", "sqrt(\\1)", m)
    m = re.sub(r"\\frac{(.*?)}{(.*?)}", "(\\1)/(\\2)", m)
    m = re.sub(r"\\fork{(.*?)}{(.*?)}{(.*?)}{(.*?)}", "\\1 \\2; \\3 \\4", m)
    m = re.sub(r"\\forkthree{(.*?)}{(.*?)}{(.*?)}{(.*?)}{(.*?)}{(.*?)}", "\\1 \\2; \\3 \\4; \\5 \\6", m)
    m = re.sub(r"\\stackrel{(.*?)}{(.*?)}", "\\1 \\2", m)
    m = re.sub(r"\\sum _{(.*?)}", "sum{by: \\1}", m)

    m = re.sub(r" +", " ", m)
    m = re.sub(r"\\begin{(?P<gtype>array|bmatrix)}(?:{[\|lcr\. ]+})? *(.*?)\\end{(?P=gtype)}", matrixReplace, m)
    m = re.sub(r"\\hdotsfor{(\d+)}", hdotsforReplace, m)
    m = re.sub(r"\\vecthreethree{(.*?)}{(.*?)}{(.*?)}{(.*?)}{(.*?)}{(.*?)}{(.*?)}{(.*?)}{(.*?)}", "<BR>|\\1 \\2 \\3|<BR>|\\4 \\5 \\6|<BR>|\\7 \\8 \\9|<BR>", m)
699

700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743
    m = re.sub(r"\\left[ ]*\\lfloor[ ]*", "[", m)
    m = re.sub(r"[ ]*\\right[ ]*\\rfloor", "]", m)
    m = re.sub(r"\\left[ ]*\([ ]*", "(", m)
    m = re.sub(r"[ ]*\\right[ ]*\)", ")", m)
    m = re.sub(r"([^\\])\$", "\\1", m)

    m = m.replace("\\times", "x")
    m = m.replace("\\pm", "+-")
    m = m.replace("\\cdot", "*")
    m = m.replace("\\sim", "~")
    m = m.replace("\\leftarrow", "<-")
    m = m.replace("\\rightarrow", "->")
    m = m.replace("\\leftrightarrow", "<->")
    m = re.sub(r" *\\neg *", " !", m)
    m = re.sub(r" *\\neq? *", " != ", m)
    m = re.sub(r" *\\geq? *", " >= ", m)
    m = re.sub(r" *\\leq? *", " <= ", m)
    m = re.sub(r" *\\vee *", " V ", m)
    m = re.sub(r" *\\oplus *", " (+) ", m)
    m = re.sub(r" *\\mod *", " mod ", m)
    m = re.sub(r"( *)\\partial *", "\\1d", m)

    m = re.sub(r"( *)\\quad *", "\\1 ", m)
    m = m.replace("\\,", " ")
    m = m.replace("\\:", "  ")
    m = m.replace("\\;", "   ")
    m = m.replace("\\!", "")

    m = m.replace("\\\\", "<BR>")
    m = m.replace("\\wedge", "/\\\\")
    m = re.sub(r"\\(.)", "\\1", m)

    m = re.sub(r"\([ ]+", "(", m)
    m = re.sub(r"[ ]+(\.|,|\))(<BR>| |$)", "\\1\\2", m)
    m = re.sub(r" +\|[ ]+([a-zA-Z0-9_(])", " |\\1", m)
    m = re.sub(r"([a-zA-Z0-9_)}])[ ]+(\(|\|)", "\\1\\2", m)

    m = re.sub(r"{\((-?[a-zA-Z0-9_]+)\)}", "\\1", m)
    m = re.sub(r"{(-?[a-zA-Z0-9_]+)}", "(\\1)", m)
    m = re.sub(r"\(([0-9]+)\)", "\\1", m)
    m = m.replace("{", "(")
    m = m.replace("}", ")")

    #print "%s   ===>   %s" % (match.group(0), m)
V
Vadim Pisarevsky 已提交
744
    return "<em>" + m + "</em>"
745 746

if __name__ == "__main__":
747
    if len(sys.argv) < 2:
748 749
        print "Usage:\n", os.path.basename(sys.argv[0]), " <module path>"
        exit(0)
750

751 752 753
    if len(sys.argv) >= 3:
        if sys.argv[2].lower() == "verbose":
            verbose = True
754 755

    rst_parser_dir  = os.path.dirname(os.path.abspath(sys.argv[0]))
A
Andrey Pavlenko 已提交
756
    hdr_parser_path = os.path.join(rst_parser_dir, "../../python/src2")
757 758 759 760

    sys.path.append(hdr_parser_path)
    import hdr_parser

K
Kirill Kornyakov 已提交
761
    module = sys.argv[1]
762

A
Andrey Pavlenko 已提交
763
    if module != "all" and not os.path.isdir(os.path.join(rst_parser_dir, "../../" + module)):
764
        print "RST parser error E%03d: module \"%s\" could not be found." % (ERROR_010_NOMODULE, module)
765 766 767
        exit(1)

    parser = RstParser(hdr_parser.CppHeaderParser())
768

769
    if module == "all":
770
        for m in allmodules:
A
Andrey Pavlenko 已提交
771
            parser.parse(m, os.path.join(rst_parser_dir, "../../" + m))
772
    else:
A
Andrey Pavlenko 已提交
773
        parser.parse(module, os.path.join(rst_parser_dir, "../../" + module))
774 775

    # summary
776
    parser.printSummary()