apibuild.py 98.1 KB
Newer Older
1
#!/usr/bin/env python
2 3 4 5 6 7 8 9
#
# This is the API builder, it parses the C sources and build the
# API formal description in XML.
#
# See Copyright for the status of this software.
#
# daniel@veillard.com
#
10 11 12

from __future__ import print_function

13 14
import os
import sys
15
import glob
16
import re
17

18 19 20 21
quiet = True
warnings = 0
debug = False
debugsym = None
22 23 24 25

#
# C parser analysis code
#
26
included_files = {
27
  "libvirt-common.h": "header with general libvirt API definitions",
28
  "libvirt-domain.h": "header with general libvirt API definitions",
29
  "libvirt-domain-checkpoint.h": "header with general libvirt API definitions",
30
  "libvirt-domain-snapshot.h": "header with general libvirt API definitions",
31
  "libvirt-event.h": "header with general libvirt API definitions",
32
  "libvirt-host.h": "header with general libvirt API definitions",
33
  "libvirt-interface.h": "header with general libvirt API definitions",
34
  "libvirt-network.h": "header with general libvirt API definitions",
35
  "libvirt-nodedev.h": "header with general libvirt API definitions",
36
  "libvirt-nwfilter.h": "header with general libvirt API definitions",
37
  "libvirt-secret.h": "header with general libvirt API definitions",
38
  "libvirt-storage.h": "header with general libvirt API definitions",
39
  "libvirt-stream.h": "header with general libvirt API definitions",
40 41
  "virterror.h": "header with error specific API definitions",
  "libvirt.c": "Main interfaces for the libvirt library",
42
  "libvirt-domain.c": "Domain interfaces for the libvirt library",
43
  "libvirt-domain-checkpoint.c": "Domain checkpoint interfaces for the libvirt library",
44
  "libvirt-domain-snapshot.c": "Domain snapshot interfaces for the libvirt library",
45
  "libvirt-host.c": "Host interfaces for the libvirt library",
46
  "libvirt-interface.c": "Interface interfaces for the libvirt library",
47
  "libvirt-network.c": "Network interfaces for the libvirt library",
48
  "libvirt-nodedev.c": "Node device interfaces for the libvirt library",
49
  "libvirt-nwfilter.c": "NWFilter interfaces for the libvirt library",
50
  "libvirt-secret.c": "Secret interfaces for the libvirt library",
51
  "libvirt-storage.c": "Storage interfaces for the libvirt library",
52
  "libvirt-stream.c": "Stream interfaces for the libvirt library",
53
  "virerror.c": "implements error handling and reporting code for libvirt",
54
  "virevent.c": "event loop for monitoring file handles",
55
  "virtypedparam-public.c": "virTypedParameters APIs",
56 57
}

58 59 60 61 62
qemu_included_files = {
  "libvirt-qemu.h": "header with QEMU specific API definitions",
  "libvirt-qemu.c": "Implementations for the QEMU specific APIs",
}

63 64 65 66 67
lxc_included_files = {
  "libvirt-lxc.h": "header with LXC specific API definitions",
  "libvirt-lxc.c": "Implementations for the LXC specific APIs",
}

68 69 70 71 72
admin_included_files = {
  "libvirt-admin.h": "header with admin specific API definitions",
  "libvirt-admin.c": "Implementations for the admin specific APIs",
}

73 74
ignored_words = {
  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
75
  "ATTRIBUTE_SENTINEL": (0, "macro keyword"),
76
  "VIR_DEPRECATED": (0, "macro keyword"),
77
  "VIR_EXPORT_VAR": (0, "macro keyword"),
78 79 80
  "WINAPI": (0, "Windows keyword"),
  "__declspec": (3, "Windows keyword"),
  "__stdcall": (0, "Windows keyword"),
81 82
}

D
Daniel Veillard 已提交
83
ignored_functions = {
84
  "virConnectSupportsFeature": "private function for remote access",
D
Daniel Veillard 已提交
85 86 87 88 89
  "virDomainMigrateFinish": "private function for migration",
  "virDomainMigrateFinish2": "private function for migration",
  "virDomainMigratePerform": "private function for migration",
  "virDomainMigratePrepare": "private function for migration",
  "virDomainMigratePrepare2": "private function for migration",
C
Chris Lalancette 已提交
90
  "virDomainMigratePrepareTunnel": "private function for tunnelled migration",
91 92 93 94 95 96
  "virDomainMigrateBegin3": "private function for migration",
  "virDomainMigrateFinish3": "private function for migration",
  "virDomainMigratePerform3": "private function for migration",
  "virDomainMigratePrepare3": "private function for migration",
  "virDomainMigrateConfirm3": "private function for migration",
  "virDomainMigratePrepareTunnel3": "private function for tunnelled migration",
97
  "DllMain": "specific function for Win32",
98
  "virTypedParamsValidate": "internal function in virtypedparam.c",
99
  "virTypedParameterValidateSet": "internal function in virtypedparam.c",
100 101
  "virTypedParameterAssign": "internal function in virtypedparam.c",
  "virTypedParameterAssignFromStr": "internal function in virtypedparam.c",
102
  "virTypedParameterToString": "internal function in virtypedparam.c",
103
  "virTypedParamsCheck": "internal function in virtypedparam.c",
104
  "virTypedParamsCopy": "internal function in virtypedparam.c",
105 106 107 108 109 110
  "virDomainMigrateBegin3Params": "private function for migration",
  "virDomainMigrateFinish3Params": "private function for migration",
  "virDomainMigratePerform3Params": "private function for migration",
  "virDomainMigratePrepare3Params": "private function for migration",
  "virDomainMigrateConfirm3Params": "private function for migration",
  "virDomainMigratePrepareTunnel3Params": "private function for tunnelled migration",
J
Jiri Denemark 已提交
111
  "virErrorCopyNew": "private",
D
Daniel Veillard 已提交
112 113
}

114 115 116 117 118 119
ignored_macros = {
  "_virSchedParameter": "backward compatibility macro for virTypedParameter",
  "_virBlkioParameter": "backward compatibility macro for virTypedParameter",
  "_virMemoryParameter": "backward compatibility macro for virTypedParameter",
}

120 121
# macros that should be completely skipped
hidden_macros = {
122 123
  "VIR_DEPRECATED": "internal macro to mark deprecated apis",
  "VIR_EXPORT_VAR": "internal macro to mark exported vars",
124 125
}

126
def escape(raw):
127 128 129 130 131
    raw = raw.replace('&', '&')
    raw = raw.replace('<', '&lt;')
    raw = raw.replace('>', '&gt;')
    raw = raw.replace("'", '&apos;')
    raw = raw.replace('"', '&quot;')
132 133 134
    return raw

def uniq(items):
135
    return sorted(set(items))
136 137

class identifier:
138 139
    def __init__(self, name, header=None, module=None, type=None, lineno=0,
                 info=None, extra=None, conditionals=None):
140
        self.name = name
141 142 143 144 145 146 147
        self.header = header
        self.module = module
        self.type = type
        self.info = info
        self.extra = extra
        self.lineno = lineno
        self.static = 0
148
        if conditionals is None or len(conditionals) == 0:
149 150 151
            self.conditionals = None
        else:
            self.conditionals = conditionals[:]
152
        if self.name == debugsym and not quiet:
153 154
            print("=> define %s : %s" % (debugsym, (module, type, info,
                                         extra, conditionals)))
155 156 157

    def __repr__(self):
        r = "%s %s:" % (self.type, self.name)
158 159
        if self.static:
            r = r + " static"
160
        if self.module is not None:
161
            r = r + " from %s" % self.module
162
        if self.info is not None:
163
            r = r + " " + repr(self.info)
164
        if self.extra is not None:
165
            r = r + " " + repr(self.extra)
166
        if self.conditionals is not None:
167
            r = r + " " + repr(self.conditionals)
168
        return r
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185


    def set_header(self, header):
        self.header = header
    def set_module(self, module):
        self.module = module
    def set_type(self, type):
        self.type = type
    def set_info(self, info):
        self.info = info
    def set_extra(self, extra):
        self.extra = extra
    def set_lineno(self, lineno):
        self.lineno = lineno
    def set_static(self, static):
        self.static = static
    def set_conditionals(self, conditionals):
186
        if conditionals is None or len(conditionals) == 0:
187 188 189
            self.conditionals = None
        else:
            self.conditionals = conditionals[:]
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209

    def get_name(self):
        return self.name
    def get_header(self):
        return self.module
    def get_module(self):
        return self.module
    def get_type(self):
        return self.type
    def get_info(self):
        return self.info
    def get_lineno(self):
        return self.lineno
    def get_extra(self):
        return self.extra
    def get_static(self):
        return self.static
    def get_conditionals(self):
        return self.conditionals

210
    def update(self, header, module, type=None, info=None, extra=None,
211
               conditionals=None):
212
        if self.name == debugsym and not quiet:
213 214
            print("=> update %s : %s" % (debugsym, (module, type, info,
                                         extra, conditionals)))
215
        if header is not None and self.header is None:
216
            self.set_header(module)
217
        if module is not None and (self.module is None or self.header == self.module):
218
            self.set_module(module)
219
        if type is not None and self.type is None:
220
            self.set_type(type)
221
        if info is not None:
222
            self.set_info(info)
223
        if extra is not None:
224
            self.set_extra(extra)
225
        if conditionals is not None:
226
            self.set_conditionals(conditionals)
227 228

class index:
229
    def __init__(self, name="noname"):
230 231 232
        self.name = name
        self.identifiers = {}
        self.functions = {}
233 234 235
        self.variables = {}
        self.includes = {}
        self.structs = {}
236
        self.unions = {}
237 238 239 240 241
        self.enums = {}
        self.typedefs = {}
        self.macros = {}
        self.references = {}
        self.info = {}
242

243 244 245
    def warning(self, msg):
        global warnings
        warnings = warnings + 1
246
        print(msg)
247

248
    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals=None):
249
        if name[0:2] == '__':
250
            return None
251 252
        d = None
        try:
253 254
            d = self.identifiers[name]
            d.update(header, module, type, lineno, info, extra, conditionals)
255
        except:
256 257 258
            d = identifier(name, header, module, type, lineno, info, extra,
                           conditionals)
            self.identifiers[name] = d
259

260
        if d is not None and static == 1:
261
            d.set_static(1)
262

263
        if d is not None and name is not None and type is not None:
264
            self.references[name] = d
265

266
        if name == debugsym and not quiet:
267
            print("New ref: %s" % (d))
268

269
        return d
270

271 272
    def add(self, name, header, module, static, type, lineno, info=None,
            extra=None, conditionals=None):
273
        if name[0:2] == '__':
274
            return None
275 276
        d = None
        try:
277 278
            d = self.identifiers[name]
            d.update(header, module, type, lineno, info, extra, conditionals)
279
        except:
280 281 282
            d = identifier(name, header, module, type, lineno, info, extra,
                           conditionals)
            self.identifiers[name] = d
283

284
        if d is not None and static == 1:
285 286
            d.set_static(1)

287
        if d is not None and name is not None and type is not None:
288 289 290 291 292 293 294 295 296 297 298 299 300
            type_map = {
                "function": self.functions,
                "functype": self.functions,
                "variable": self.variables,
                "include": self.includes,
                "struct": self.structs,
                "union": self.unions,
                "enum": self.enums,
                "typedef": self.typedefs,
                "macro": self.macros
            }
            if type in type_map:
                type_map[type][name] = d
301
            else:
302
                self.warning("Unable to register type ", type)
303

304
        if name == debugsym and not quiet:
305
            print("New symbol: %s" % (d))
306 307

        return d
308 309 310

    def merge(self, idx):
        for id in idx.functions.keys():
311 312 313 314 315 316 317 318
            #
            # macro might be used to override functions or variables
            # definitions
            #
            if id in self.macros:
                del self.macros[id]
            if id in self.functions:
                self.warning("function %s from %s redeclared in %s" % (
319
                    id, self.functions[id].header, idx.functions[id].header))
320 321 322
            else:
                self.functions[id] = idx.functions[id]
                self.identifiers[id] = idx.functions[id]
323
        for id in idx.variables.keys():
324 325 326 327 328 329 330 331
            #
            # macro might be used to override functions or variables
            # definitions
            #
            if id in self.macros:
                del self.macros[id]
            if id in self.variables:
                self.warning("variable %s from %s redeclared in %s" % (
332
                    id, self.variables[id].header, idx.variables[id].header))
333 334 335
            else:
                self.variables[id] = idx.variables[id]
                self.identifiers[id] = idx.variables[id]
336
        for id in idx.structs.keys():
337 338
            if id in self.structs:
                self.warning("struct %s from %s redeclared in %s" % (
339
                    id, self.structs[id].header, idx.structs[id].header))
340 341 342
            else:
                self.structs[id] = idx.structs[id]
                self.identifiers[id] = idx.structs[id]
343
        for id in idx.unions.keys():
344 345
            if id in self.unions:
                print("union %s from %s redeclared in %s" % (
346
                    id, self.unions[id].header, idx.unions[id].header))
347 348 349
            else:
                self.unions[id] = idx.unions[id]
                self.identifiers[id] = idx.unions[id]
350
        for id in idx.typedefs.keys():
351 352
            if id in self.typedefs:
                self.warning("typedef %s from %s redeclared in %s" % (
353
                    id, self.typedefs[id].header, idx.typedefs[id].header))
354 355 356
            else:
                self.typedefs[id] = idx.typedefs[id]
                self.identifiers[id] = idx.typedefs[id]
357
        for id in idx.macros.keys():
358 359 360 361 362 363 364 365 366 367 368 369
            #
            # macro might be used to override functions or variables
            # definitions
            #
            if id in self.variables:
                continue
            if id in self.functions:
                continue
            if id in self.enums:
                continue
            if id in self.macros:
                self.warning("macro %s from %s redeclared in %s" % (
370
                    id, self.macros[id].header, idx.macros[id].header))
371 372 373
            else:
                self.macros[id] = idx.macros[id]
                self.identifiers[id] = idx.macros[id]
374
        for id in idx.enums.keys():
375 376
            if id in self.enums:
                self.warning("enum %s from %s redeclared in %s" % (
377
                    id, self.enums[id].header, idx.enums[id].header))
378 379 380
            else:
                self.enums[id] = idx.enums[id]
                self.identifiers[id] = idx.enums[id]
381 382 383

    def merge_public(self, idx):
        for id in idx.functions.keys():
384 385 386 387 388 389 390 391 392 393 394 395 396 397
            if id in self.functions:
                up = idx.functions[id]
                # check that function condition agrees with header
                if up.conditionals != self.functions[id].conditionals:
                    self.warning("Header condition differs from Function"
                                 " for %s:" % id)
                    self.warning("  H: %s" % self.functions[id].conditionals)
                    self.warning("  C: %s" % up.conditionals)
                self.functions[id].update(None, up.module, up.type, up.info,
                                          up.extra)
        #     else:
        #         print("Function %s from %s is not declared in headers" % (
        #               id, idx.functions[id].module))
        # TODO: do the same for variables.
398 399 400

    def analyze_dict(self, type, dict):
        count = 0
401
        public = 0
402
        for name in dict.keys():
403 404 405 406
            id = dict[name]
            count = count + 1
            if id.static == 0:
                public = public + 1
407
        if count != public:
408
            print("  %d %s , %d public" % (count, type, public))
409
        elif count != 0:
410
            print("  %d public %s" % (count, type))
411 412 413


    def analyze(self):
414 415 416 417 418 419 420
        if not quiet:
            self.analyze_dict("functions", self.functions)
            self.analyze_dict("variables", self.variables)
            self.analyze_dict("structs", self.structs)
            self.analyze_dict("unions", self.unions)
            self.analyze_dict("typedefs", self.typedefs)
            self.analyze_dict("macros", self.macros)
421

422 423 424 425 426
class CLexer:
    """A lexer for the C language, tokenize the input by reading and
       analyzing it line by line"""
    def __init__(self, input):
        self.input = input
427 428 429
        self.tokens = []
        self.line = ""
        self.lineno = 0
430 431 432

    def getline(self):
        line = ''
433 434 435 436
        while line == '':
            line = self.input.readline()
            if not line:
                return None
R
Radostin Stoyanov 已提交
437 438
            self.lineno += 1
            line = line.strip()
439 440 441 442
            if line == '':
                continue
            while line[-1] == '\\':
                line = line[:-1]
R
Radostin Stoyanov 已提交
443 444
                n = self.input.readline().strip()
                self.lineno += 1
445 446
                if not n:
                    break
R
Radostin Stoyanov 已提交
447
                line += n
448
        return line
449

450 451 452 453
    def getlineno(self):
        return self.lineno

    def push(self, token):
454
        self.tokens.insert(0, token)
455 456

    def debug(self):
457 458
        print("Last token: ", self.last)
        print("Token queue: ", self.tokens)
459
        print("Line %d end: " % self.lineno, self.line)
460 461 462

    def token(self):
        while self.tokens == []:
463 464 465 466 467
            if self.line == "":
                line = self.getline()
            else:
                line = self.line
                self.line = ""
468
            if line is None:
469 470 471
                return None

            if line[0] == '#':
472
                self.tokens = [('preproc', word) for word in line.split()]
473 474 475 476 477 478

                # We might have whitespace between the '#' and preproc
                # macro name, so instead of having a single token element
                # of '#define' we might end up with '#' and 'define'. This
                # merges them back together
                if self.tokens[0][1] == "#":
479 480
                    self.tokens[0] = ('preproc', "#" + self.tokens[1][1])
                    del self.tokens[1]
481
                break
482 483
            l = len(line)
            if line[0] == '"' or line[0] == "'":
484 485 486 487 488 489 490 491 492 493
                quote = line[0]
                i = 1
                while quote not in line[i:]:
                    i = len(line)
                    nextline = self.getline()
                    if nextline is None:
                        return None
                    line += nextline

                tok, self.line = line[1:].split(quote, 1)
494 495 496
                self.last = ('string', tok)
                return self.last

497
            if line.startswith("/*"):
498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
                line = line[2:]
                found = 0
                tok = ""
                while found == 0:
                    i = 0
                    l = len(line)
                    while i < l:
                        if line[i] == '*' and i+1 < l and line[i+1] == '/':
                            self.line = line[i+2:]
                            line = line[:i-1]
                            l = i
                            found = 1
                            break
                        i = i + 1
                    if tok != "":
                        tok = tok + "\n"
                    tok = tok + line
                    if found == 0:
                        line = self.getline()
517
                        if line is None:
518 519 520
                            return None
                self.last = ('comment', tok)
                return self.last
521
            if line.startswith("//"):
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
                line = line[2:]
                self.last = ('comment', line)
                return self.last
            i = 0
            while i < l:
                if line[i] == '/' and i+1 < l and line[i+1] == '/':
                    self.line = line[i:]
                    line = line[:i]
                    break
                if line[i] == '/' and i+1 < l and line[i+1] == '*':
                    self.line = line[i:]
                    line = line[:i]
                    break
                if line[i] == '"' or line[i] == "'":
                    self.line = line[i:]
                    line = line[:i]
                    break
                i = i + 1
            l = len(line)
            i = 0
            while i < l:
                if line[i] == ' ' or line[i] == '\t':
                    i = i + 1
                    continue
546
                if line[i].isalnum():
547 548
                    s = i
                    while i < l:
549
                        if line[i] not in " \t(){}:;,+-*/%&!|[]=><":
550 551 552 553 554
                            i = i + 1
                        else:
                            break
                    self.tokens.append(('name', line[s:i]))
                    continue
555
                if line[i] in "(){}:;,[]":
556
#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
557 558 559 560 561
#                   line[i] == '}' or line[i] == ':' or line[i] == ';' or \
#                   line[i] == ',' or line[i] == '[' or line[i] == ']':
                    self.tokens.append(('sep', line[i]))
                    i = i + 1
                    continue
562
                if line[i] in "+-*><=/%&!|.":
563
#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
564 565 566
#                   line[i] == '>' or line[i] == '<' or line[i] == '=' or \
#                   line[i] == '/' or line[i] == '%' or line[i] == '&' or \
#                   line[i] == '!' or line[i] == '|' or line[i] == '.':
567
                    if line[i] == '.' and i + 2 < l and \
568 569 570 571 572 573
                       line[i+1] == '.' and line[i+2] == '.':
                        self.tokens.append(('name', '...'))
                        i = i + 3
                        continue

                    j = i + 1
574
                    if j < l and line[j] in "+-*><=/%&!|":
575 576 577 578 579 580 581 582 583 584 585 586
#                       line[j] == '+' or line[j] == '-' or line[j] == '*' or \
#                       line[j] == '>' or line[j] == '<' or line[j] == '=' or \
#                       line[j] == '/' or line[j] == '%' or line[j] == '&' or \
#                       line[j] == '!' or line[j] == '|'):
                        self.tokens.append(('op', line[i:j+1]))
                        i = j + 1
                    else:
                        self.tokens.append(('op', line[i]))
                        i = i + 1
                    continue
                s = i
                while i < l:
587
                    if line[i] not in " \t(){}:;,+-*/%&!|[]=><":
588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
#                        line[i] != ' ' and line[i] != '\t' and
#                        line[i] != '(' and line[i] != ')' and
#                        line[i] != '{'  and line[i] != '}' and
#                        line[i] != ':' and line[i] != ';' and
#                        line[i] != ',' and line[i] != '+' and
#                        line[i] != '-' and line[i] != '*' and
#                        line[i] != '/' and line[i] != '%' and
#                        line[i] != '&' and line[i] != '!' and
#                        line[i] != '|' and line[i] != '[' and
#                        line[i] != ']' and line[i] != '=' and
#                        line[i] != '*' and line[i] != '>' and
#                        line[i] != '<'):
                        i = i + 1
                    else:
                        break
                self.tokens.append(('name', line[s:i]))

        tok = self.tokens[0]
        self.tokens = self.tokens[1:]
        self.last = tok
        return tok
609

610 611
class CParser:
    """The C module parser"""
612
    def __init__(self, filename, idx=None):
613
        self.filename = filename
614 615 616 617
        if len(filename) > 2 and filename[-2:] == '.h':
            self.is_header = 1
        else:
            self.is_header = 0
618
        self.input = open(filename)
619
        self.lexer = CLexer(self.input)
620
        if idx is None:
621 622 623 624 625 626 627 628 629 630
            self.index = index()
        else:
            self.index = idx
        self.top_comment = ""
        self.last_comment = ""
        self.comment = None
        self.collect_ref = 0
        self.no_error = 0
        self.conditionals = []
        self.defines = []
631 632 633 634 635 636 637 638 639 640 641 642 643

    def collect_references(self):
        self.collect_ref = 1

    def stop_error(self):
        self.no_error = 1

    def start_error(self):
        self.no_error = 0

    def lineno(self):
        return self.lexer.getlineno()

644
    def index_add(self, name, module, static, type, info=None, extra=None):
645 646 647 648 649 650
        if self.is_header == 1:
            self.index.add(name, module, module, static, type, self.lineno(),
                           info, extra, self.conditionals)
        else:
            self.index.add(name, None, module, static, type, self.lineno(),
                           info, extra, self.conditionals)
651 652

    def index_add_ref(self, name, module, static, type, info=None,
653
                      extra=None):
654 655 656 657 658 659
        if self.is_header == 1:
            self.index.add_ref(name, module, module, static, type,
                               self.lineno(), info, extra, self.conditionals)
        else:
            self.index.add_ref(name, None, module, static, type, self.lineno(),
                               info, extra, self.conditionals)
660 661

    def warning(self, msg):
662 663
        global warnings
        warnings = warnings + 1
664
        if self.no_error:
665
            return
666
        print(msg)
667 668 669

    def error(self, msg, token=-1):
        if self.no_error:
670
            return
671

672
        print("Parse Error: " + msg)
673
        if token != -1:
674
            print("Got token ", token)
675 676
        self.lexer.debug()
        sys.exit(1)
677 678

    def debug(self, msg, token=-1):
679
        print("Debug: " + msg)
680
        if token != -1:
681
            print("Got token ", token)
682
        self.lexer.debug()
683 684

    def parseTopComment(self, comment):
685
        res = {}
686
        lines = comment.split("\n")
687 688
        item = None
        for line in lines:
C
Claudio Bley 已提交
689
            line = line.lstrip().lstrip('*').lstrip()
690 691 692 693 694 695 696

            m = re.match('([_.a-zA-Z0-9]+):(.*)', line)
            if m:
                item = m.group(1)
                line = m.group(2).lstrip()

            if item:
A
Andrea Bolognani 已提交
697
                if item in res:
698 699 700 701
                    res[item] = res[item] + " " + line
                else:
                    res[item] = line
        self.index.info = res
702

703
    def strip_lead_star(self, line):
704 705
        if line.lstrip().startswith('*'):
            line = line.replace('*', '', 1)
706 707 708
        return line

    def cleanupComment(self):
709
        if not isinstance(self.comment, str):
710 711 712 713 714 715 716 717
            return
        # remove the leading * on multi-line comments
        lines = self.comment.splitlines(True)
        com = ""
        for line in lines:
            com = com + self.strip_lead_star(line)
        self.comment = com.strip()

718
    def parseComment(self, token):
719
        com = token[1]
720
        if self.top_comment == "":
721
            self.top_comment = com
722
        if self.comment is None or com[0] == '*':
723
            self.comment = com
724
        else:
725
            self.comment = self.comment + com
726
        token = self.lexer.token()
727

728
        if self.comment.find("DOC_DISABLE") != -1:
729
            self.stop_error()
730

731
        if self.comment.find("DOC_ENABLE") != -1:
732
            self.start_error()
733

734
        return token
735 736 737 738

    #
    # Parse a comment block associate to a typedef
    #
739
    def parseTypeComment(self, name, quiet=False):
740
        if name[0:2] == '__':
741
            quiet = True
742

743
        if self.comment is None:
744
            if not quiet:
745
                self.warning("Missing comment for type %s" % name)
746 747
            return None
        if not self.comment.startswith('*'):
748
            if not quiet:
749
                self.warning("Missing * in type comment for %s" % name)
750 751
            return None

752
        lines = self.comment.split('\n')
753 754 755
        # Remove lines that contain only single asterisk
        lines[:] = [line for line in lines if line.strip() != '*']

756
        if lines[0] != "* %s:" % name:
757
            if not quiet:
758
                self.warning("Misformatted type comment for %s" % name)
759
                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
760
            return None
761
        del lines[0]
762 763 764 765 766 767

        # Concatenate all remaining lines by striping leading asterisks
        desc = " ".join([line.lstrip("*").strip() for line in lines]).strip()

        if not (quiet or desc):
            self.warning("Type comment for %s lack description of the macro"
768
                         % name)
769 770

        return desc
771 772 773
    #
    # Parse a comment block associate to a macro
    #
774
    def parseMacroComment(self, name, quiet=0):
775 776
        global ignored_macros

777
        if name[0:2] == '__':
778
            quiet = 1
A
Andrea Bolognani 已提交
779
        if name in ignored_macros:
780
            quiet = 1
781 782

        args = []
783
        desc = ""
784

785
        if self.comment is None:
786
            if not quiet:
787 788
                self.warning("Missing comment for macro %s" % name)
            return args, desc
789
        if self.comment[0] != '*':
790
            if not quiet:
791 792
                self.warning("Missing * in macro comment for %s" % name)
            return args, desc
793
        lines = self.comment.split('\n')
794 795
        if lines[0] == '*':
            del lines[0]
796
        if lines[0] != "* %s:" % name:
797
            if not quiet:
798
                self.warning("Misformatted macro comment for %s" % name)
799
                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
800
            return args, desc
801 802 803 804 805 806
        del lines[0]
        while lines[0] == '*':
            del lines[0]
        while len(lines) > 0 and lines[0][0:3] == '* @':
            l = lines[0][3:]
            try:
807
                arg, desc = l.split(':', 1)
808 809
                desc = desc.strip()
                arg = arg.strip()
810
            except:
811
                if not quiet:
812 813
                    self.warning("Misformatted macro comment for %s" % name)
                    self.warning("  problem with '%s'" % lines[0])
814 815 816
                del lines[0]
                continue
            del lines[0]
817
            l = lines[0].strip()
818 819 820
            while len(l) > 2 and l[0:3] != '* @':
                while l[0] == '*':
                    l = l[1:]
821
                desc = desc + ' ' + l.strip()
822 823 824 825
                del lines[0]
                if len(lines) == 0:
                    break
                l = lines[0]
826
            args.append((arg, desc))
827 828 829 830 831 832 833
        while len(lines) > 0 and lines[0] == '*':
            del lines[0]
        desc = ""
        while len(lines) > 0:
            l = lines[0]
            while len(l) > 0 and l[0] == '*':
                l = l[1:]
834
            l = l.strip()
835 836
            desc = desc + " " + l
            del lines[0]
837

838
        desc = desc.strip()
839

840 841
        if quiet == 0:
            if desc == "":
842
                self.warning("Macro comment for %s lack description of the macro" % name)
843

844
        return args, desc
845

846 847 848 849 850
    #
    # Parse a comment block and merge the information found in the
    # parameters descriptions, finally returns a block as complete
    # as possible
    #
851
    def mergeFunctionComment(self, name, description, quiet=0):
D
Daniel Veillard 已提交
852 853
        global ignored_functions

854
        if name == 'main':
855
            quiet = 1
856
        if name[0:2] == '__':
857
            quiet = 1
A
Andrea Bolognani 已提交
858
        if name in ignored_functions:
D
Daniel Veillard 已提交
859
            quiet = 1
860

861
        ret, args = description
862 863
        desc = ""
        retdesc = ""
864

865
        if self.comment is None:
866
            if not quiet:
867 868
                self.warning("Missing comment for function %s" % name)
            return (ret[0], retdesc), args, desc
869
        if self.comment[0] != '*':
870
            if not quiet:
871 872
                self.warning("Missing * in function comment for %s" % name)
            return (ret[0], retdesc), args, desc
873
        lines = self.comment.split('\n')
874 875
        if lines[0] == '*':
            del lines[0]
876
        if lines[0] != "* %s:" % name:
877
            if not quiet:
878
                self.warning("Misformatted function comment for %s" % name)
879
                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
880
            return (ret[0], retdesc), args, desc
881 882 883 884 885 886 887
        del lines[0]
        while lines[0] == '*':
            del lines[0]
        nbargs = len(args)
        while len(lines) > 0 and lines[0][0:3] == '* @':
            l = lines[0][3:]
            try:
888
                arg, desc = l.split(':', 1)
889 890
                desc = desc.strip()
                arg = arg.strip()
891
            except:
892
                if not quiet:
893 894
                    self.warning("Misformatted function comment for %s" % name)
                    self.warning("  problem with '%s'" % lines[0])
895 896 897
                del lines[0]
                continue
            del lines[0]
898
            l = lines[0].strip()
899 900 901
            while len(l) > 2 and l[0:3] != '* @':
                while l[0] == '*':
                    l = l[1:]
902
                desc = desc + ' ' + l.strip()
903 904 905 906 907 908 909 910
                del lines[0]
                if len(lines) == 0:
                    break
                l = lines[0]
            i = 0
            while i < nbargs:
                if args[i][1] == arg:
                    args[i] = (args[i][0], arg, desc)
911
                    break
912 913 914 915 916 917 918 919 920 921 922 923
                i = i + 1
            if i >= nbargs:
                if not quiet:
                    self.warning("Unable to find arg %s from function comment for %s" % (
                       arg, name))
        while len(lines) > 0 and lines[0] == '*':
            del lines[0]
        desc = None
        while len(lines) > 0:
            l = lines[0]
            i = 0
            # Remove all leading '*', followed by at most one ' ' character
924
            # since we need to preserve correct indentation of code examples
925 926 927 928 929 930
            while i < len(l) and l[i] == '*':
                i = i + 1
            if i > 0:
                if i < len(l) and l[i] == ' ':
                    i = i + 1
                l = l[i:]
931
            if len(l) >= 6 and l[0:7] == "Returns":
932
                try:
933
                    l = l.split(' ', 1)[1]
934 935
                except:
                    l = ""
936
                retdesc = l.strip()
937 938 939 940 941
                del lines[0]
                while len(lines) > 0:
                    l = lines[0]
                    while len(l) > 0 and l[0] == '*':
                        l = l[1:]
942
                    l = l.strip()
943 944 945 946 947 948 949 950 951 952 953
                    retdesc = retdesc + " " + l
                    del lines[0]
            else:
                if desc is not None:
                    desc = desc + "\n" + l
                else:
                    desc = l
                del lines[0]

        if desc is None:
            desc = ""
954 955
        retdesc = retdesc.strip()
        desc = desc.strip()
956 957

        if quiet == 0:
958 959 960
            #
            # report missing comments
            #
961 962
            i = 0
            while i < nbargs:
963
                if args[i][2] is None and args[i][0] != "void" and args[i][1] is not None:
964 965 966
                    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
                i = i + 1
            if retdesc == "" and ret[0] != "void":
967
                self.warning("Function comment for %s lacks description of return value" % name)
968
            if desc == "":
969
                self.warning("Function comment for %s lacks description of the function" % name)
970 971


972
        return (ret[0], retdesc), args, desc
973 974

    def parsePreproc(self, token):
975
        if debug:
976
            print("=> preproc ", token, self.lexer.tokens)
977
        name = token[1]
978 979
        if name == "#include":
            token = self.lexer.token()
980
            if token is None:
981 982 983 984 985 986 987 988
                return None
            if token[0] == 'preproc':
                self.index_add(token[1], self.filename, not self.is_header,
                                "include")
                return self.lexer.token()
            return token
        if name == "#define":
            token = self.lexer.token()
989
            if token is None:
990 991
                return None
            if token[0] == 'preproc':
992
                # TODO macros with arguments
993 994 995
                name = token[1]
                lst = []
                token = self.lexer.token()
996
                while token is not None and token[0] == 'preproc' and \
997 998 999
                      token[1][0] != '#':
                    lst.append(token[1])
                    token = self.lexer.token()
1000
                try:
1001
                    name = name.split('(') [0]
1002 1003
                except:
                    pass
1004 1005 1006 1007

                # skip hidden macros
                if name in hidden_macros:
                    return token
1008 1009
                if name[-2:] == "_H" or name[-8:] == "_H_ALLOW":
                    return token
1010

1011 1012 1013 1014
                strValue = None
                if len(lst) == 1 and lst[0][0] == '"' and lst[0][-1] == '"':
                    strValue = lst[0][1:-1]
                (args, desc) = self.parseMacroComment(name, not self.is_header)
1015
                self.index_add(name, self.filename, not self.is_header,
1016
                               "macro", (args, desc, strValue))
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
                return token

        #
        # Processing of conditionals modified by Bill 1/1/05
        #
        # We process conditionals (i.e. tokens from #ifdef, #ifndef,
        # #if, #else and #endif) for headers and mainline code,
        # store the ones from the header in libxml2-api.xml, and later
        # (in the routine merge_public) verify that the two (header and
        # mainline code) agree.
        #
        # There is a small problem with processing the headers. Some of
        # the variables are not concerned with enabling / disabling of
        # library functions (e.g. '__XML_PARSER_H__'), and we don't want
        # them to be included in libxml2-api.xml, or involved in
        # the check between the header and the mainline code.  To
        # accomplish this, we ignore any conditional which doesn't include
        # the string 'ENABLED'
        #
        if name == "#ifdef":
            apstr = self.lexer.tokens[0][1]
            try:
                self.defines.append(apstr)
1040
                if apstr.find('ENABLED') != -1:
1041 1042 1043 1044 1045 1046 1047
                    self.conditionals.append("defined(%s)" % apstr)
            except:
                pass
        elif name == "#ifndef":
            apstr = self.lexer.tokens[0][1]
            try:
                self.defines.append(apstr)
1048
                if apstr.find('ENABLED') != -1:
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059
                    self.conditionals.append("!defined(%s)" % apstr)
            except:
                pass
        elif name == "#if":
            apstr = ""
            for tok in self.lexer.tokens:
                if apstr != "":
                    apstr = apstr + " "
                apstr = apstr + tok[1]
            try:
                self.defines.append(apstr)
1060
                if apstr.find('ENABLED') != -1:
1061 1062 1063 1064 1065
                    self.conditionals.append(apstr)
            except:
                pass
        elif name == "#else":
            if self.conditionals != [] and \
1066
               self.defines[-1].find('ENABLED') != -1:
1067 1068 1069
                self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
        elif name == "#endif":
            if self.conditionals != [] and \
1070
               self.defines[-1].find('ENABLED') != -1:
1071 1072 1073
                self.conditionals = self.conditionals[:-1]
            self.defines = self.defines[:-1]
        token = self.lexer.token()
1074
        while token is not None and token[0] == 'preproc' and \
1075 1076 1077
            token[1][0] != '#':
            token = self.lexer.token()
        return token
1078

1079 1080 1081 1082 1083
    #
    # token acquisition on top of the lexer, it handle internally
    # preprocessor and comments since they are logically not part of
    # the program structure.
    #
1084 1085 1086
    def push(self, tok):
        self.lexer.push(tok)

1087 1088 1089 1090
    def token(self):
        global ignored_words

        token = self.lexer.token()
1091
        while token is not None:
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
            if token[0] == 'comment':
                token = self.parseComment(token)
                continue
            elif token[0] == 'preproc':
                token = self.parsePreproc(token)
                continue
            elif token[0] == "name" and token[1] == "__const":
                token = ("name", "const")
                return token
            elif token[0] == "name" and token[1] == "__attribute":
                token = self.lexer.token()
1103
                while token is not None and token[1] != ";":
1104 1105
                    token = self.lexer.token()
                return token
A
Andrea Bolognani 已提交
1106
            elif token[0] == "name" and token[1] in ignored_words:
1107 1108 1109 1110 1111 1112 1113 1114 1115
                (n, info) = ignored_words[token[1]]
                i = 0
                while i < n:
                    token = self.lexer.token()
                    i = i + 1
                token = self.lexer.token()
                continue
            else:
                if debug:
1116
                    print("=> ", token)
1117 1118
                return token
        return None
1119

1120 1121 1122
    #
    # Parse a typedef, it records the type and its name.
    #
1123
    def parseTypedef(self, token):
1124
        if token is None:
1125 1126
            return None
        token = self.parseType(token)
1127
        if token is None:
1128 1129 1130 1131
            self.error("parsing typedef")
            return None
        base_type = self.type
        type = base_type
1132
        # self.debug("end typedef type", token)
1133
        while token is not None:
1134 1135 1136
            if token[0] == "name":
                name = token[1]
                signature = self.signature
1137
                if signature is not None:
1138
                    type = type.split('(')[0]
1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156
                    d = self.mergeFunctionComment(name,
                            ((type, None), signature), 1)
                    self.index_add(name, self.filename, not self.is_header,
                                    "functype", d)
                else:
                    if base_type == "struct":
                        self.index_add(name, self.filename, not self.is_header,
                                        "struct", type)
                        base_type = "struct " + name
                    else:
                        # TODO report missing or misformatted comments
                        info = self.parseTypeComment(name, 1)
                        self.index_add(name, self.filename, not self.is_header,
                                    "typedef", type, info)
                token = self.token()
            else:
                self.error("parsing typedef: expecting a name")
                return token
1157
            # self.debug("end typedef", token)
1158
            if token is not None and token[0] == 'sep' and token[1] == ',':
1159 1160
                type = base_type
                token = self.token()
1161
                while token is not None and token[0] == "op":
1162 1163
                    type = type + token[1]
                    token = self.token()
1164
            elif token is not None and token[0] == 'sep' and token[1] == ';':
1165
                break
1166
            elif token is not None and token[0] == 'name':
1167
                type = base_type
1168
                continue
1169 1170 1171 1172 1173
            else:
                self.error("parsing typedef: expecting ';'", token)
                return token
        token = self.token()
        return token
1174

1175 1176 1177 1178
    #
    # Parse a C code block, used for functions it parse till
    # the balancing } included
    #
1179
    def parseBlock(self, token):
1180
        while token is not None:
1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseBlock(token)
            elif token[0] == "sep" and token[1] == "}":
                self.comment = None
                token = self.token()
                return token
            else:
                if self.collect_ref == 1:
                    oldtok = token
                    token = self.token()
                    if oldtok[0] == "name" and oldtok[1][0:3] == "vir":
                        if token[0] == "sep" and token[1] == "(":
                            self.index_add_ref(oldtok[1], self.filename,
                                                0, "function")
                            token = self.token()
                        elif token[0] == "name":
                            token = self.token()
                            if token[0] == "sep" and (token[1] == ";" or
                               token[1] == "," or token[1] == "="):
                                self.index_add_ref(oldtok[1], self.filename,
                                                    0, "type")
                    elif oldtok[0] == "name" and oldtok[1][0:4] == "XEN_":
                        self.index_add_ref(oldtok[1], self.filename,
                                            0, "typedef")
                    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXEN_":
                        self.index_add_ref(oldtok[1], self.filename,
                                            0, "typedef")

                else:
                    token = self.token()
        return token
1213

1214 1215 1216
    #
    # Parse a C struct definition till the balancing }
    #
1217 1218
    def parseStruct(self, token):
        fields = []
1219
        # self.debug("start parseStruct", token)
1220
        while token is not None:
1221 1222 1223 1224 1225
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseTypeBlock(token)
            elif token[0] == "sep" and token[1] == "}":
                self.struct_fields = fields
1226 1227
                # self.debug("end parseStruct", token)
                # print(fields)
1228 1229 1230 1231
                token = self.token()
                return token
            else:
                base_type = self.type
1232
                # self.debug("before parseType", token)
1233
                token = self.parseType(token)
1234
                # self.debug("after parseType", token)
1235
                if token is not None and token[0] == "name":
1236 1237 1238 1239 1240
                    fname = token[1]
                    token = self.token()
                    if token[0] == "sep" and token[1] == ";":
                        self.comment = None
                        token = self.token()
1241 1242 1243 1244 1245 1246 1247
                        self.cleanupComment()
                        if self.type == "union":
                            fields.append((self.type, fname, self.comment,
                                           self.union_fields))
                            self.union_fields = []
                        else:
                            fields.append((self.type, fname, self.comment))
1248 1249 1250
                        self.comment = None
                    else:
                        self.error("parseStruct: expecting ;", token)
1251
                elif token is not None and token[0] == "sep" and token[1] == "{":
1252 1253
                    token = self.token()
                    token = self.parseTypeBlock(token)
1254
                    if token is not None and token[0] == "name":
1255
                        token = self.token()
1256
                    if token is not None and token[0] == "sep" and token[1] == ";":
1257 1258 1259 1260 1261 1262
                        token = self.token()
                    else:
                        self.error("parseStruct: expecting ;", token)
                else:
                    self.error("parseStruct: name", token)
                    token = self.token()
1263
                self.type = base_type
1264
        self.struct_fields = fields
1265 1266
        # self.debug("end parseStruct", token)
        # print(fields)
1267
        return token
1268

1269 1270 1271
    #
    # Parse a C union definition till the balancing }
    #
1272 1273 1274
    def parseUnion(self, token):
        fields = []
        # self.debug("start parseUnion", token)
1275
        while token is not None:
1276 1277 1278 1279 1280 1281
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseTypeBlock(token)
            elif token[0] == "sep" and token[1] == "}":
                self.union_fields = fields
                # self.debug("end parseUnion", token)
1282
                # print(fields)
1283 1284 1285 1286 1287 1288 1289
                token = self.token()
                return token
            else:
                base_type = self.type
                # self.debug("before parseType", token)
                token = self.parseType(token)
                # self.debug("after parseType", token)
1290
                if token is not None and token[0] == "name":
1291 1292 1293 1294 1295 1296 1297 1298 1299 1300
                    fname = token[1]
                    token = self.token()
                    if token[0] == "sep" and token[1] == ";":
                        self.comment = None
                        token = self.token()
                        self.cleanupComment()
                        fields.append((self.type, fname, self.comment))
                        self.comment = None
                    else:
                        self.error("parseUnion: expecting ;", token)
1301
                elif token is not None and token[0] == "sep" and token[1] == "{":
1302 1303
                    token = self.token()
                    token = self.parseTypeBlock(token)
1304
                    if token is not None and token[0] == "name":
1305
                        token = self.token()
1306
                    if token is not None and token[0] == "sep" and token[1] == ";":
1307 1308 1309 1310 1311 1312
                        token = self.token()
                    else:
                        self.error("parseUnion: expecting ;", token)
                else:
                    self.error("parseUnion: name", token)
                    token = self.token()
1313
                self.type = base_type
1314 1315
        self.union_fields = fields
        # self.debug("end parseUnion", token)
1316
        # print(fields)
1317 1318
        return token

1319 1320 1321
    #
    # Parse a C enum block, parse till the balancing }
    #
1322 1323
    def parseEnumBlock(self, token):
        self.enums = []
1324 1325
        name = None
        comment = ""
E
Eric Blake 已提交
1326
        value = "-1"
1327
        commentsBeforeVal = self.comment is not None
1328
        while token is not None:
1329 1330 1331 1332
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseTypeBlock(token)
            elif token[0] == "sep" and token[1] == "}":
1333
                if name is not None:
1334
                    self.cleanupComment()
1335
                    if self.comment is not None:
1336 1337 1338 1339 1340 1341
                        comment = self.comment
                        self.comment = None
                    self.enums.append((name, value, comment))
                token = self.token()
                return token
            elif token[0] == "name":
J
Jiri Denemark 已提交
1342 1343 1344
                self.cleanupComment()
                if name is not None:
                    if self.comment is not None:
1345
                        comment = self.comment.strip()
J
Jiri Denemark 已提交
1346 1347 1348 1349 1350 1351 1352 1353 1354
                        self.comment = None
                    self.enums.append((name, value, comment))
                name = token[1]
                comment = ""
                token = self.token()
                if token[0] == "op" and token[1][0] == "=":
                    value = ""
                    if len(token[1]) > 1:
                        value = token[1][1:]
1355
                    token = self.token()
J
Jiri Denemark 已提交
1356 1357
                    while token[0] != "sep" or (token[1] != ',' and
                          token[1] != '}'):
1358
                        # We might be dealing with '1U << 12' here
1359
                        value = value + re.sub("^(\d+)U$", "\\1", token[1])
1360
                        token = self.token()
J
Jiri Denemark 已提交
1361 1362 1363 1364
                else:
                    try:
                        value = "%d" % (int(value) + 1)
                    except:
1365
                        self.warning("Failed to compute value of enum %s" % name)
1366
                        value = ""
J
Jiri Denemark 已提交
1367
                if token[0] == "sep" and token[1] == ",":
1368 1369 1370 1371
                    if commentsBeforeVal:
                        self.cleanupComment()
                        self.enums.append((name, value, self.comment))
                        name = comment = self.comment = None
J
Jiri Denemark 已提交
1372
                    token = self.token()
1373 1374 1375
            else:
                token = self.token()
        return token
1376

1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
    def parseVirEnumDecl(self, token):
        if token[0] != "name":
            self.error("parsing VIR_ENUM_DECL: expecting name", token)

        token = self.token()

        if token[0] != "sep":
            self.error("parsing VIR_ENUM_DECL: expecting ')'", token)

        if token[1] != ')':
            self.error("parsing VIR_ENUM_DECL: expecting ')'", token)

        token = self.token()
        if token[0] == "sep" and token[1] == ';':
            token = self.token()

        return token

    def parseVirEnumImpl(self, token):
        # First the type name
        if token[0] != "name":
            self.error("parsing VIR_ENUM_IMPL: expecting name", token)

        token = self.token()

        if token[0] != "sep":
            self.error("parsing VIR_ENUM_IMPL: expecting ','", token)

        if token[1] != ',':
            self.error("parsing VIR_ENUM_IMPL: expecting ','", token)
        token = self.token()

        # Now the sentinel name
        if token[0] != "name":
            self.error("parsing VIR_ENUM_IMPL: expecting name", token)

        token = self.token()

        if token[0] != "sep":
            self.error("parsing VIR_ENUM_IMPL: expecting ','", token)

        if token[1] != ',':
            self.error("parsing VIR_ENUM_IMPL: expecting ','", token)

        token = self.token()

        # Now a list of strings (optional comments)
        while token is not None:
            isGettext = False
            # First a string, optionally with N_(...)
            if token[0] == 'name':
                if token[1] != 'N_':
                    self.error("parsing VIR_ENUM_IMPL: expecting 'N_'", token)
                token = self.token()
                if token[0] != "sep" or token[1] != '(':
                    self.error("parsing VIR_ENUM_IMPL: expecting '('", token)
                token = self.token()
                isGettext = True

                if token[0] != "string":
                    self.error("parsing VIR_ENUM_IMPL: expecting a string", token)
                token = self.token()
            elif token[0] == "string":
                token = self.token()
            else:
                self.error("parsing VIR_ENUM_IMPL: expecting a string", token)

            # Then a separator
            if token[0] == "sep":
                if isGettext and token[1] == ')':
                    token = self.token()

                if token[1] == ',':
                    token = self.token()

                if token[1] == ')':
                    token = self.token()
                    break

            # Then an optional comment
            if token[0] == "comment":
                token = self.token()


        if token[0] == "sep" and token[1] == ';':
            token = self.token()

        return token

1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
    def parseVirLogInit(self, token):
        if token[0] != "string":
            self.error("parsing VIR_LOG_INIT: expecting string", token)

        token = self.token()

        if token[0] != "sep":
            self.error("parsing VIR_LOG_INIT: expecting ')'", token)

        if token[1] != ')':
            self.error("parsing VIR_LOG_INIT: expecting ')'", token)

        token = self.token()
        if token[0] == "sep" and token[1] == ';':
            token = self.token()

        return token

1484 1485 1486 1487
    #
    # Parse a C definition block, used for structs or unions it parse till
    # the balancing }
    #
1488
    def parseTypeBlock(self, token):
1489
        while token is not None:
1490 1491 1492 1493 1494 1495 1496 1497 1498
            if token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseTypeBlock(token)
            elif token[0] == "sep" and token[1] == "}":
                token = self.token()
                return token
            else:
                token = self.token()
        return token
1499

1500 1501 1502 1503 1504
    #
    # Parse a type: the fact that the type name can either occur after
    #    the definition or within the definition makes it a little harder
    #    if inside, the name token is pushed back before returning
    #
1505 1506
    def parseType(self, token):
        self.type = ""
1507
        self.struct_fields = []
1508
        self.union_fields = []
1509
        self.signature = None
1510
        if token is None:
1511 1512
            return token

1513 1514
        while (token[0] == "name" and
               token[1] in ["const", "unsigned", "signed"]):
1515 1516 1517 1518 1519
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
            token = self.token()
1520

1521
        if token[0] == "name" and token[1] == "long":
1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]

            # some read ahead for long long
            oldtmp = token
            token = self.token()
            if token[0] == "name" and token[1] == "long":
                self.type = self.type + " " + token[1]
            else:
                self.push(token)
                token = oldtmp

1536 1537
            oldtmp = token
            token = self.token()
1538
            if token[0] == "name" and token[1] == "int":
1539 1540 1541 1542
                self.type = self.type + " " + token[1]
            else:
                self.push(token)
                token = oldtmp
1543 1544

        elif token[0] == "name" and token[1] == "short":
1545 1546 1547 1548
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
1549

1550
        elif token[0] == "name" and token[1] == "struct":
1551 1552 1553 1554 1555 1556 1557 1558 1559
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
            token = self.token()
            nametok = None
            if token[0] == "name":
                nametok = token
                token = self.token()
1560
            if token is not None and token[0] == "sep" and token[1] == "{":
1561 1562
                token = self.token()
                token = self.parseStruct(token)
1563
            elif token is not None and token[0] == "op" and token[1] == "*":
1564 1565
                self.type = self.type + " " + nametok[1] + " *"
                token = self.token()
1566
                while token is not None and token[0] == "op" and token[1] == "*":
1567 1568 1569 1570 1571 1572 1573 1574
                    self.type = self.type + " *"
                    token = self.token()
                if token[0] == "name":
                    nametok = token
                    token = self.token()
                else:
                    self.error("struct : expecting name", token)
                    return token
1575
            elif token is not None and token[0] == "name" and nametok is not None:
1576 1577 1578
                self.type = self.type + " " + nametok[1]
                return token

1579
            if nametok is not None:
1580 1581 1582
                self.lexer.push(token)
                token = nametok
            return token
1583

1584 1585 1586 1587 1588 1589 1590 1591 1592 1593
        elif token[0] == "name" and token[1] == "union":
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
            token = self.token()
            nametok = None
            if token[0] == "name":
                nametok = token
                token = self.token()
1594
            if token is not None and token[0] == "sep" and token[1] == "{":
1595 1596
                token = self.token()
                token = self.parseUnion(token)
1597
            elif token is not None and token[0] == "name" and nametok is not None:
1598 1599 1600
                self.type = self.type + " " + nametok[1]
                return token

1601
            if nametok is not None:
1602 1603 1604 1605
                self.lexer.push(token)
                token = nametok
            return token

1606
        elif token[0] == "name" and token[1] == "enum":
1607 1608 1609 1610 1611 1612
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
            self.enums = []
            token = self.token()
1613
            if token is not None and token[0] == "sep" and token[1] == "{":
1614 1615
                # drop comments before the enum block
                self.comment = None
1616 1617 1618 1619 1620
                token = self.token()
                token = self.parseEnumBlock(token)
            else:
                self.error("parsing enum: expecting '{'", token)
            enum_type = None
1621
            if token is not None and token[0] != "name":
1622 1623 1624 1625 1626 1627 1628 1629 1630
                self.lexer.push(token)
                token = ("name", "enum")
            else:
                enum_type = token[1]
            for enum in self.enums:
                self.index_add(enum[0], self.filename,
                               not self.is_header, "enum",
                               (enum[1], enum[2], enum_type))
            return token
1631 1632
        elif token[0] == "name" and token[1] == "VIR_ENUM_DECL":
            token = self.token()
1633
            if token is not None and token[0] == "sep" and token[1] == "(":
1634 1635 1636 1637
                token = self.token()
                token = self.parseVirEnumDecl(token)
            else:
                self.error("parsing VIR_ENUM_DECL: expecting '('", token)
1638
            if token is not None:
1639 1640 1641 1642 1643 1644
                self.lexer.push(token)
                token = ("name", "virenumdecl")
            return token

        elif token[0] == "name" and token[1] == "VIR_ENUM_IMPL":
            token = self.token()
1645
            if token is not None and token[0] == "sep" and token[1] == "(":
1646 1647 1648 1649
                token = self.token()
                token = self.parseVirEnumImpl(token)
            else:
                self.error("parsing VIR_ENUM_IMPL: expecting '('", token)
1650
            if token is not None:
1651 1652 1653
                self.lexer.push(token)
                token = ("name", "virenumimpl")
            return token
1654

1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666
        elif token[0] == "name" and token[1] == "VIR_LOG_INIT":
            token = self.token()
            if token is not None and token[0] == "sep" and token[1] == "(":
                token = self.token()
                token = self.parseVirLogInit(token)
            else:
                self.error("parsing VIR_LOG_INIT: expecting '('", token)
            if token is not None:
                self.lexer.push(token)
                token = ("name", "virloginit")
            return token

1667 1668 1669 1670 1671 1672 1673 1674 1675 1676
        elif token[0] == "name":
            if self.type == "":
                self.type = token[1]
            else:
                self.type = self.type + " " + token[1]
        else:
            self.error("parsing type %s: expecting a name" % (self.type),
                       token)
            return token
        token = self.token()
1677
        while token is not None and (token[0] == "op" or
1678 1679 1680
              token[0] == "name" and token[1] == "const"):
            self.type = self.type + " " + token[1]
            token = self.token()
1681

1682 1683 1684
        #
        # if there is a parenthesis here, this means a function type
        #
1685
        if token is not None and token[0] == "sep" and token[1] == '(':
1686 1687
            self.type = self.type + token[1]
            token = self.token()
1688
            while token is not None and token[0] == "op" and token[1] == '*':
1689 1690
                self.type = self.type + token[1]
                token = self.token()
1691
            if token is None or token[0] != "name":
1692
                self.error("parsing function type, name expected", token)
1693 1694 1695 1696
                return token
            self.type = self.type + token[1]
            nametok = token
            token = self.token()
1697
            if token is not None and token[0] == "sep" and token[1] == ')':
1698 1699
                self.type = self.type + token[1]
                token = self.token()
1700
                if token is not None and token[0] == "sep" and token[1] == '(':
1701
                    token = self.token()
1702 1703 1704
                    type = self.type
                    token = self.parseSignature(token)
                    self.type = type
1705
                else:
1706
                    self.error("parsing function type, '(' expected", token)
1707 1708
                    return token
            else:
1709
                self.error("parsing function type, ')' expected", token)
1710 1711 1712 1713 1714
                return token
            self.lexer.push(token)
            token = nametok
            return token

1715 1716 1717
        #
        # do some lookahead for arrays
        #
1718
        if token is not None and token[0] == "name":
1719 1720
            nametok = token
            token = self.token()
1721
            if token is not None and token[0] == "sep" and token[1] == '[':
1722
                self.type = self.type + " " + nametok[1]
1723
                while token is not None and token[0] == "sep" and token[1] == '[':
1724 1725
                    self.type = self.type + token[1]
                    token = self.token()
1726
                    while token is not None and token[0] != 'sep' and \
1727 1728 1729
                          token[1] != ']' and token[1] != ';':
                        self.type = self.type + token[1]
                        token = self.token()
1730
                if token is not None and token[0] == 'sep' and token[1] == ']':
1731 1732 1733
                    self.type = self.type + token[1]
                    token = self.token()
                else:
1734
                    self.error("parsing array type, ']' expected", token)
1735
                    return token
1736
            elif token is not None and token[0] == "sep" and token[1] == ':':
1737
                # remove :12 in case it's a limited int size
1738 1739 1740 1741 1742 1743
                token = self.token()
                token = self.token()
            self.lexer.push(token)
            token = nametok

        return token
1744

1745 1746 1747
    #
    # Parse a signature: '(' has been parsed and we scan the type definition
    #    up to the ')' included
1748 1749
    def parseSignature(self, token):
        signature = []
1750
        if token is not None and token[0] == "sep" and token[1] == ')':
1751 1752 1753
            self.signature = []
            token = self.token()
            return token
1754
        while token is not None:
1755
            token = self.parseType(token)
1756
            if token is not None and token[0] == "name":
1757 1758
                signature.append((self.type, token[1], None))
                token = self.token()
1759
            elif token is not None and token[0] == "sep" and token[1] == ',':
1760 1761
                token = self.token()
                continue
1762
            elif token is not None and token[0] == "sep" and token[1] == ')':
1763
                # only the type was provided
1764 1765 1766 1767
                if self.type == "...":
                    signature.append((self.type, "...", None))
                else:
                    signature.append((self.type, None, None))
1768
            if token is not None and token[0] == "sep":
1769 1770 1771 1772 1773 1774 1775 1776
                if token[1] == ',':
                    token = self.token()
                    continue
                elif token[1] == ')':
                    token = self.token()
                    break
        self.signature = signature
        return token
1777

1778 1779 1780
    # this dict contains the functions that are allowed to use [unsigned]
    # long for legacy reasons in their signature and return type. this list is
    # fixed. new procedures and public APIs have to use [unsigned] long long
1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816
    long_legacy_functions = {
        "virGetVersion": (False, ("libVer", "typeVer")),
        "virConnectGetLibVersion": (False, ("libVer")),
        "virConnectGetVersion": (False, ("hvVer")),
        "virDomainGetMaxMemory": (True, ()),
        "virDomainMigrate": (False, ("flags", "bandwidth")),
        "virDomainMigrate2": (False, ("flags", "bandwidth")),
        "virDomainMigrateBegin3": (False, ("flags", "bandwidth")),
        "virDomainMigrateConfirm3": (False, ("flags", "bandwidth")),
        "virDomainMigrateDirect": (False, ("flags", "bandwidth")),
        "virDomainMigrateFinish": (False, ("flags")),
        "virDomainMigrateFinish2": (False, ("flags")),
        "virDomainMigrateFinish3": (False, ("flags")),
        "virDomainMigratePeer2Peer": (False, ("flags", "bandwidth")),
        "virDomainMigratePerform": (False, ("flags", "bandwidth")),
        "virDomainMigratePerform3": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepare": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepare2": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepare3": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepareTunnel": (False, ("flags", "bandwidth")),
        "virDomainMigratePrepareTunnel3": (False, ("flags", "bandwidth")),
        "virDomainMigrateToURI": (False, ("flags", "bandwidth")),
        "virDomainMigrateToURI2": (False, ("flags", "bandwidth")),
        "virDomainMigrateVersion1": (False, ("flags", "bandwidth")),
        "virDomainMigrateVersion2": (False, ("flags", "bandwidth")),
        "virDomainMigrateVersion3": (False, ("flags", "bandwidth")),
        "virDomainMigrateSetMaxSpeed": (False, ("bandwidth")),
        "virDomainSetMaxMemory": (False, ("memory")),
        "virDomainSetMemory": (False, ("memory")),
        "virDomainSetMemoryFlags": (False, ("memory")),
        "virDomainBlockCommit": (False, ("bandwidth")),
        "virDomainBlockJobSetSpeed": (False, ("bandwidth")),
        "virDomainBlockPull": (False, ("bandwidth")),
        "virDomainBlockRebase": (False, ("bandwidth")),
        "virDomainMigrateGetMaxSpeed": (False, ("bandwidth"))
    }
1817 1818 1819 1820 1821 1822 1823 1824

    def checkLongLegacyFunction(self, name, return_type, signature):
        if "long" in return_type and "long long" not in return_type:
            try:
                if not CParser.long_legacy_functions[name][0]:
                    raise Exception()
            except:
                self.error(("function '%s' is not allowed to return long, "
1825
                            "use long long instead") % name)
1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839

        for param in signature:
            if "long" in param[0] and "long long" not in param[0]:
                try:
                    if param[1] not in CParser.long_legacy_functions[name][1]:
                        raise Exception()
                except:
                    self.error(("function '%s' is not allowed to take long "
                                "parameter '%s', use long long instead")
                               % (name, param[1]))

    # this dict contains the structs that are allowed to use [unsigned]
    # long for legacy reasons. this list is fixed. new structs have to use
    # [unsigned] long long
1840 1841 1842 1843 1844
    long_legacy_struct_fields = {
        "_virDomainInfo": ("maxMem", "memory"),
        "_virNodeInfo": ("memory"),
        "_virDomainBlockJobInfo": ("bandwidth")
    }
1845 1846 1847 1848 1849 1850 1851 1852 1853

    def checkLongLegacyStruct(self, name, fields):
        for field in fields:
            if "long" in field[0] and "long long" not in field[0]:
                try:
                    if field[1] not in CParser.long_legacy_struct_fields[name]:
                        raise Exception()
                except:
                    self.error(("struct '%s' is not allowed to contain long "
1854
                                "field '%s', use long long instead")
1855 1856
                               % (name, field[1]))

1857 1858 1859 1860
    #
    # Parse a global definition, be it a type, variable or function
    # the extern "C" blocks are a bit nasty and require it to recurse.
    #
1861 1862 1863
    def parseGlobal(self, token):
        static = 0
        if token[1] == 'extern':
1864
            token = self.token()
1865
            if token is None:
1866 1867 1868 1869
                return token
            if token[0] == 'string':
                if token[1] == 'C':
                    token = self.token()
1870
                    if token is None:
1871 1872 1873
                        return token
                    if token[0] == 'sep' and token[1] == "{":
                        token = self.token()
1874
#                        print('Entering extern "C line ', self.lineno())
1875
                        while token is not None and (token[0] != 'sep' or
1876 1877 1878 1879 1880 1881 1882 1883
                              token[1] != "}"):
                            if token[0] == 'name':
                                token = self.parseGlobal(token)
                            else:
                                self.error(
                                 "token %s %s unexpected at the top level" % (
                                        token[0], token[1]))
                                token = self.parseGlobal(token)
1884
#                        print('Exiting extern "C" line', self.lineno())
1885 1886 1887 1888 1889 1890 1891
                        token = self.token()
                        return token
                else:
                    return token
        elif token[1] == 'static':
            static = 1
            token = self.token()
1892
            if token is None or token[0] != 'name':
1893 1894 1895 1896 1897 1898 1899 1900
                return token

        if token[1] == 'typedef':
            token = self.token()
            return self.parseTypedef(token)
        else:
            token = self.parseType(token)
            type_orig = self.type
1901
        if token is None or token[0] != "name":
1902 1903 1904 1905
            return token
        type = type_orig
        self.name = token[1]
        token = self.token()
1906
        while token is not None and (token[0] == "sep" or token[0] == "op"):
1907 1908 1909 1910
            if token[0] == "sep":
                if token[1] == "[":
                    type = type + token[1]
                    token = self.token()
1911 1912
                    while token is not None and (token[0] != "sep" or
                                                 token[1] != ";"):
1913 1914 1915
                        type = type + token[1]
                        token = self.token()

1916
            if token is not None and token[0] == "op" and token[1] == "=":
1917 1918 1919
                #
                # Skip the initialization of the variable
                #
1920 1921 1922 1923 1924 1925
                token = self.token()
                if token[0] == 'sep' and token[1] == '{':
                    token = self.token()
                    token = self.parseBlock(token)
                else:
                    self.comment = None
1926 1927 1928
                    while token is not None and (token[0] != "sep" or
                                                 token[1] not in ',;'):
                        token = self.token()
1929
                self.comment = None
1930
                if token is None or token[0] != "sep" or (token[1] != ';' and
1931 1932 1933
                   token[1] != ','):
                    self.error("missing ';' or ',' after value")

1934
            if token is not None and token[0] == "sep":
1935 1936 1937 1938
                if token[1] == ";":
                    self.comment = None
                    token = self.token()
                    if type == "struct":
1939
                        self.checkLongLegacyStruct(self.name, self.struct_fields)
1940 1941 1942 1943 1944 1945 1946 1947 1948
                        self.index_add(self.name, self.filename,
                             not self.is_header, "struct", self.struct_fields)
                    else:
                        self.index_add(self.name, self.filename,
                             not self.is_header, "variable", type)
                    break
                elif token[1] == "(":
                    token = self.token()
                    token = self.parseSignature(token)
1949
                    if token is None:
1950 1951
                        return None
                    if token[0] == "sep" and token[1] == ";":
1952
                        self.checkLongLegacyFunction(self.name, type, self.signature)
1953 1954 1955 1956 1957 1958
                        d = self.mergeFunctionComment(self.name,
                                ((type, None), self.signature), 1)
                        self.index_add(self.name, self.filename, static,
                                        "function", d)
                        token = self.token()
                    elif token[0] == "sep" and token[1] == "{":
1959
                        self.checkLongLegacyFunction(self.name, type, self.signature)
1960 1961 1962 1963 1964
                        d = self.mergeFunctionComment(self.name,
                                ((type, None), self.signature), static)
                        self.index_add(self.name, self.filename, static,
                                        "function", d)
                        token = self.token()
1965
                        token = self.parseBlock(token)
1966 1967 1968 1969 1970 1971
                elif token[1] == ',':
                    self.comment = None
                    self.index_add(self.name, self.filename, static,
                                    "variable", type)
                    type = type_orig
                    token = self.token()
1972
                    while token is not None and token[0] == "sep":
1973 1974
                        type = type + token[1]
                        token = self.token()
1975
                    if token is not None and token[0] == "name":
1976 1977 1978 1979 1980 1981
                        self.name = token[1]
                        token = self.token()
                else:
                    break

        return token
1982 1983

    def parse(self):
1984
        if not quiet:
1985
            print("Parsing %s" % (self.filename))
1986
        token = self.token()
1987
        while token is not None:
1988
            if token[0] == 'name':
1989
                token = self.parseGlobal(token)
1990
            else:
1991 1992 1993 1994 1995
                self.error("token %s %s unexpected at the top level" % (
                       token[0], token[1]))
                token = self.parseGlobal(token)
                return
        self.parseTopComment(self.top_comment)
1996
        return self.index
1997

1998 1999 2000

class docBuilder:
    """A documentation builder"""
J
Jiri Denemark 已提交
2001
    def __init__(self, name, path='.', directories=['.'], includes=[]):
2002
        self.name = name
J
Jiri Denemark 已提交
2003
        self.path = path
2004
        self.directories = directories
2005
        if name == "libvirt":
2006
            self.includes = includes + list(included_files.keys())
2007
        elif name == "libvirt-qemu":
2008
            self.includes = includes + list(qemu_included_files.keys())
2009
        elif name == "libvirt-lxc":
2010
            self.includes = includes + list(lxc_included_files.keys())
2011
        elif name == "libvirt-admin":
2012
            self.includes = includes + list(admin_included_files.keys())
2013 2014 2015
        self.modules = {}
        self.headers = {}
        self.idx = index()
2016
        self.xref = {}
2017 2018
        self.index = {}
        self.basename = name
2019
        self.errors = 0
2020

2021 2022 2023
    def warning(self, msg):
        global warnings
        warnings = warnings + 1
2024
        print(msg)
2025

2026 2027
    def error(self, msg):
        self.errors += 1
2028
        print("Error:", msg, file=sys.stderr)
2029

2030
    def indexString(self, id, str):
2031
        if str is None:
2032
            return
2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048
        str = str.replace("'", ' ')
        str = str.replace('"', ' ')
        str = str.replace("/", ' ')
        str = str.replace('*', ' ')
        str = str.replace("[", ' ')
        str = str.replace("]", ' ')
        str = str.replace("(", ' ')
        str = str.replace(")", ' ')
        str = str.replace("<", ' ')
        str = str.replace('>', ' ')
        str = str.replace("&", ' ')
        str = str.replace('#', ' ')
        str = str.replace(",", ' ')
        str = str.replace('.', ' ')
        str = str.replace(';', ' ')
        tokens = str.split()
2049
        for token in tokens:
C
Cole Robinson 已提交
2050 2051 2052 2053 2054 2055 2056 2057 2058
            c = token[0]
            if not re.match(r"[a-zA-Z]", c):
                pass
            elif len(token) < 3:
                pass
            else:
                lower = token.lower()
                # TODO: generalize this a bit
                if lower == 'and' or lower == 'the':
2059
                    pass
C
Cole Robinson 已提交
2060 2061
                elif token in self.xref:
                    self.xref[token].append(id)
2062
                else:
C
Cole Robinson 已提交
2063
                    self.xref[token] = [id]
2064 2065

    def analyze(self):
2066
        if not quiet:
2067
            print("Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())))
2068
        self.idx.analyze()
2069 2070

    def scanHeaders(self):
2071 2072 2073
        for header in self.headers.keys():
            parser = CParser(header)
            idx = parser.parse()
2074
            self.headers[header] = idx
2075
            self.idx.merge(idx)
2076 2077

    def scanModules(self):
2078 2079 2080 2081 2082 2083
        for module in self.modules.keys():
            parser = CParser(module)
            idx = parser.parse()
            # idx.analyze()
            self.modules[module] = idx
            self.idx.merge_public(idx)
2084 2085 2086

    def scan(self):
        for directory in self.directories:
2087 2088 2089 2090
            files = glob.glob(directory + "/*.c")
            for file in files:
                skip = 1
                for incl in self.includes:
2091
                    if file.find(incl) != -1:
2092
                        skip = 0
2093 2094
                        break
                if skip == 0:
2095
                    self.modules[file] = None
2096 2097 2098 2099
            files = glob.glob(directory + "/*.h")
            for file in files:
                skip = 1
                for incl in self.includes:
2100
                    if file.find(incl) != -1:
2101
                        skip = 0
2102 2103
                        break
                if skip == 0:
2104
                    self.headers[file] = None
2105 2106
        self.scanHeaders()
        self.scanModules()
2107

2108 2109
    def modulename_file(self, file):
        module = os.path.basename(file)
2110 2111 2112 2113 2114
        if module[-2:] == '.h':
            module = module[:-2]
        elif module[-2:] == '.c':
            module = module[:-2]
        return module
2115 2116 2117 2118

    def serialize_enum(self, output, name):
        id = self.idx.enums[name]
        output.write("    <enum name='%s' file='%s'" % (name,
2119
                     self.modulename_file(id.header)))
2120
        if id.info is not None:
2121
            info = id.info
2122
            valhex = ""
2123
            if info[0] is not None and info[0] != '':
2124 2125
                try:
                    val = eval(info[0])
2126
                    valhex = hex(val)
2127 2128
                except:
                    val = info[0]
2129
                output.write(" value='%s'" % (val))
2130 2131 2132 2133 2134 2135 2136 2137

                if valhex != "":
                    output.write(" value_hex='%s'" % (valhex))

                m = re.match("\(?1<<(\d+)\)?", info[0])
                if m:
                    output.write(" value_bitshift='%s'" % (m.group(1)))

2138
            if info[2] is not None and info[2] != '':
2139
                output.write(" type='%s'" % info[2])
2140
            if info[1] is not None and info[1] != '':
2141
                output.write(" info='%s'" % escape(info[1]))
2142 2143 2144 2145
        output.write("/>\n")

    def serialize_macro(self, output, name):
        id = self.idx.macros[name]
2146
        output.write("    <macro name='%s' file='%s'" % (name,
2147
                     self.modulename_file(id.header)))
2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168
        if id.info is None:
            args = []
            desc = None
            strValue = None
        else:
            (args, desc, strValue) = id.info

        if strValue is not None:
            output.write(" string='%s'" % strValue)
        output.write(">\n")

        if desc is not None and desc != "":
            output.write("      <info><![CDATA[%s]]></info>\n" % (desc))
            self.indexString(name, desc)
        for arg in args:
            (name, desc) = arg
            if desc is not None and desc != "":
                output.write("      <arg name='%s' info='%s'/>\n" % (
                             name, escape(desc)))
                self.indexString(name, desc)
            else:
2169
                output.write("      <arg name='%s'/>\n" % name)
2170 2171
        output.write("    </macro>\n")

2172
    def serialize_union(self, output, field, desc):
2173
        output.write("      <field name='%s' type='union' info='%s'>\n" % (field[1], desc))
2174 2175 2176
        output.write("        <union>\n")
        for f in field[3]:
            desc = f[2]
2177
            if desc is None:
2178 2179 2180
                desc = ''
            else:
                desc = escape(desc)
2181
            output.write("          <field name='%s' type='%s' info='%s'/>\n" % (f[1], f[0], desc))
2182 2183 2184 2185

        output.write("        </union>\n")
        output.write("      </field>\n")

2186 2187
    def serialize_typedef(self, output, name):
        id = self.idx.typedefs[name]
2188 2189 2190 2191
        if id.info[0:7] == 'struct ':
            output.write("    <struct name='%s' file='%s' type='%s'" % (
                     name, self.modulename_file(id.header), id.info))
            name = id.info[7:]
2192 2193
            if (name in self.idx.structs and
                    isinstance(self.idx.structs[name].info, (list, tuple))):
2194
                output.write(">\n")
2195 2196 2197 2198
                try:
                    for field in self.idx.structs[name].info:
                        desc = field[2]
                        self.indexString(name, desc)
2199
                        if desc is None:
2200 2201 2202
                            desc = ''
                        else:
                            desc = escape(desc)
2203 2204 2205
                        if field[0] == "union":
                            self.serialize_union(output, field, desc)
                        else:
2206
                            output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1], field[0], desc))
2207
                except:
2208
                    self.warning("Failed to serialize struct %s" % name)
2209 2210
                output.write("    </struct>\n")
            else:
2211
                output.write("/>\n")
2212
        else:
2213 2214
            output.write("    <typedef name='%s' file='%s' type='%s'" % (
                         name, self.modulename_file(id.header), id.info))
2215
            try:
2216
                desc = id.extra
2217
                if desc is not None and desc != "":
2218 2219 2220 2221 2222 2223
                    output.write(">\n      <info><![CDATA[%s]]></info>\n" % (desc))
                    output.write("    </typedef>\n")
                else:
                    output.write("/>\n")
            except:
                output.write("/>\n")
2224 2225 2226

    def serialize_variable(self, output, name):
        id = self.idx.variables[name]
2227
        if id.info is not None:
2228 2229 2230 2231 2232
            output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
                    name, self.modulename_file(id.header), id.info))
        else:
            output.write("    <variable name='%s' file='%s'/>\n" % (
                    name, self.modulename_file(id.header)))
2233

2234 2235
    def serialize_function(self, output, name):
        id = self.idx.functions[name]
2236
        if name == debugsym and not quiet:
2237
            print("=>", id)
2238

2239
        # NB: this is consumed by a regex in 'getAPIFilenames' in hvsupport.pl
2240
        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
2241 2242 2243 2244 2245
                     name, self.modulename_file(id.header),
                     self.modulename_file(id.module)))
        #
        # Processing of conditionals modified by Bill 1/1/05
        #
2246
        if id.conditionals is not None:
2247 2248 2249 2250 2251
            apstr = ""
            for cond in id.conditionals:
                if apstr != "":
                    apstr = apstr + " &amp;&amp; "
                apstr = apstr + cond
2252
            output.write("      <cond>%s</cond>\n" % (apstr))
2253 2254 2255 2256
        try:
            (ret, params, desc) = id.info
            output.write("      <info><![CDATA[%s]]></info>\n" % (desc))
            self.indexString(name, desc)
2257
            if ret[0] is not None:
2258 2259
                if ret[0] == "void":
                    output.write("      <return type='void'/>\n")
A
Andrea Bolognani 已提交
2260
                elif (ret[1] is None or ret[1] == '') and name not in ignored_functions:
2261
                    self.error("Missing documentation for return of function `%s'" % name)
2262 2263 2264 2265 2266 2267 2268
                else:
                    output.write("      <return type='%s' info='%s'/>\n" % (
                             ret[0], escape(ret[1])))
                    self.indexString(name, ret[1])
            for param in params:
                if param[0] == 'void':
                    continue
2269
                if (param[2] is None or param[2] == ''):
A
Andrea Bolognani 已提交
2270
                    if name in ignored_functions:
2271 2272 2273
                        output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
                    else:
                        self.error("Missing documentation for arg `%s' of function `%s'" % (param[1], name))
2274 2275 2276 2277
                else:
                    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
                    self.indexString(name, param[2])
        except:
2278
            print("Exception:", sys.exc_info()[1], file=sys.stderr)
2279
            self.warning("Failed to save function %s info: %s" % (name, repr(id.info)))
2280 2281 2282 2283
        output.write("    </%s>\n" % (id.type))

    def serialize_exports(self, output, file):
        module = self.modulename_file(file)
2284 2285
        output.write("    <file name='%s'>\n" % (module))
        dict = self.headers[file]
2286
        if dict.info is not None:
2287
            for data in ('Summary', 'Description'):
2288 2289
                try:
                    output.write("     <%s>%s</%s>\n" % (
C
Cole Robinson 已提交
2290
                                 data.lower(),
2291
                                 escape(dict.info[data]),
C
Cole Robinson 已提交
2292 2293
                                 data.lower()))
                except KeyError:
2294
                    self.warning("Header %s lacks a %s description" % (module, data))
A
Andrea Bolognani 已提交
2295
            if 'Description' in dict.info:
2296
                desc = dict.info['Description']
2297
                if desc.find("DEPRECATED") != -1:
2298
                    output.write("     <deprecated/>\n")
2299

2300
        for id in uniq(dict.macros.keys()):
2301
            # Macros are sometime used to masquerade other types.
A
Andrea Bolognani 已提交
2302
            if id in dict.functions:
2303
                continue
A
Andrea Bolognani 已提交
2304
            if id in dict.variables:
2305
                continue
A
Andrea Bolognani 已提交
2306
            if id in dict.typedefs:
2307
                continue
A
Andrea Bolognani 已提交
2308
            if id in dict.structs:
2309
                continue
A
Andrea Bolognani 已提交
2310
            if id in dict.unions:
2311
                continue
A
Andrea Bolognani 已提交
2312
            if id in dict.enums:
2313 2314
                continue
            output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
2315
        for id in uniq(dict.enums.keys()):
2316
            output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
2317
        for id in uniq(dict.typedefs.keys()):
2318
            output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
2319
        for id in uniq(dict.structs.keys()):
2320
            output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
2321
        for id in uniq(dict.variables.keys()):
2322
            output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
2323
        for id in uniq(dict.functions.keys()):
2324 2325
            output.write("     <exports symbol='%s' type='function'/>\n" % (id))
        output.write("    </file>\n")
2326 2327

    def serialize_xrefs_files(self, output):
2328
        headers = sorted(self.headers.keys())
2329
        for file in headers:
2330 2331 2332
            module = self.modulename_file(file)
            output.write("    <file name='%s'>\n" % (module))
            dict = self.headers[file]
2333 2334 2335 2336 2337
            ids = uniq(list(dict.functions.keys()) +
                       list(dict.variables.keys()) +
                       list(dict.macros.keys()) +
                       list(dict.typedefs.keys()) +
                       list(dict.structs.keys()) +
2338
                       list(dict.enums.keys()))
2339 2340 2341
            for id in ids:
                output.write("      <ref name='%s'/>\n" % (id))
            output.write("    </file>\n")
2342 2343 2344 2345
        pass

    def serialize_xrefs_functions(self, output):
        funcs = {}
2346 2347 2348 2349 2350 2351 2352
        for name in self.idx.functions.keys():
            id = self.idx.functions[name]
            try:
                (ret, params, desc) = id.info
                for param in params:
                    if param[0] == 'void':
                        continue
A
Andrea Bolognani 已提交
2353
                    if param[0] in funcs:
2354 2355 2356 2357 2358
                        funcs[param[0]].append(name)
                    else:
                        funcs[param[0]] = [name]
            except:
                pass
2359
        typ = sorted(funcs.keys())
2360
        for type in typ:
2361
            if type in ['', "void", "int", "char *", "const char *"]:
2362 2363 2364 2365 2366 2367 2368 2369 2370 2371
                continue
            output.write("    <type name='%s'>\n" % (type))
            ids = funcs[type]
            ids.sort()
            pid = ''    # not sure why we have dups, but get rid of them!
            for id in ids:
                if id != pid:
                    output.write("      <ref name='%s'/>\n" % (id))
                    pid = id
            output.write("    </type>\n")
2372 2373 2374

    def serialize_xrefs_constructors(self, output):
        funcs = {}
2375 2376 2377 2378 2379 2380
        for name in self.idx.functions.keys():
            id = self.idx.functions[name]
            try:
                (ret, params, desc) = id.info
                if ret[0] == "void":
                    continue
A
Andrea Bolognani 已提交
2381
                if ret[0] in funcs:
2382 2383 2384 2385 2386
                    funcs[ret[0]].append(name)
                else:
                    funcs[ret[0]] = [name]
            except:
                pass
2387
        typ = sorted(funcs.keys())
2388
        for type in typ:
2389
            if type in ['', "void", "int", "char *", "const char *"]:
2390 2391
                continue
            output.write("    <type name='%s'>\n" % (type))
2392
            ids = sorted(funcs[type])
2393 2394 2395
            for id in ids:
                output.write("      <ref name='%s'/>\n" % (id))
            output.write("    </type>\n")
2396 2397

    def serialize_xrefs_alpha(self, output):
2398
        letter = None
2399
        ids = sorted(self.idx.identifiers.keys())
2400 2401
        for id in ids:
            if id[0] != letter:
2402
                if letter is not None:
2403 2404 2405 2406
                    output.write("    </letter>\n")
                letter = id[0]
                output.write("    <letter name='%s'>\n" % (letter))
            output.write("      <ref name='%s'/>\n" % (id))
2407
        if letter is not None:
2408
            output.write("    </letter>\n")
2409 2410

    def serialize_xrefs_references(self, output):
2411
        typ = sorted(self.idx.identifiers.keys())
2412 2413 2414 2415 2416 2417 2418
        for id in typ:
            idf = self.idx.identifiers[id]
            module = idf.header
            output.write("    <reference name='%s' href='%s'/>\n" % (id,
                         'html/' + self.basename + '-' +
                         self.modulename_file(module) + '.html#' +
                         id))
2419 2420 2421

    def serialize_xrefs_index(self, output):
        index = self.xref
2422
        typ = sorted(index.keys())
2423 2424 2425 2426 2427 2428 2429 2430
        letter = None
        count = 0
        chunk = 0
        chunks = []
        for id in typ:
            if len(index[id]) > 30:
                continue
            if id[0] != letter:
2431 2432
                if letter is None or count > 200:
                    if letter is not None:
2433 2434 2435
                        output.write("      </letter>\n")
                        output.write("    </chunk>\n")
                        count = 0
2436
                        chunks.append(["chunk%s" % (chunk - 1), first_letter, letter])
2437 2438 2439
                    output.write("    <chunk name='chunk%s'>\n" % (chunk))
                    first_letter = id[0]
                    chunk = chunk + 1
2440
                elif letter is not None:
2441 2442 2443 2444
                    output.write("      </letter>\n")
                letter = id[0]
                output.write("      <letter name='%s'>\n" % (letter))
            output.write("        <word name='%s'>\n" % (id))
2445
            tokens = index[id]
2446 2447 2448 2449 2450 2451 2452 2453 2454
            tokens.sort()
            tok = None
            for token in tokens:
                if tok == token:
                    continue
                tok = token
                output.write("          <ref name='%s'/>\n" % (token))
                count = count + 1
            output.write("        </word>\n")
2455
        if letter is not None:
2456 2457 2458
            output.write("      </letter>\n")
            output.write("    </chunk>\n")
            if count != 0:
2459
                chunks.append(["chunk%s" % (chunk - 1), first_letter, letter])
2460 2461 2462 2463 2464
            output.write("    <chunks>\n")
            for ch in chunks:
                output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
                             ch[0], ch[1], ch[2]))
            output.write("    </chunks>\n")
2465 2466

    def serialize_xrefs(self, output):
2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484
        output.write("  <references>\n")
        self.serialize_xrefs_references(output)
        output.write("  </references>\n")
        output.write("  <alpha>\n")
        self.serialize_xrefs_alpha(output)
        output.write("  </alpha>\n")
        output.write("  <constructors>\n")
        self.serialize_xrefs_constructors(output)
        output.write("  </constructors>\n")
        output.write("  <functions>\n")
        self.serialize_xrefs_functions(output)
        output.write("  </functions>\n")
        output.write("  <files>\n")
        self.serialize_xrefs_files(output)
        output.write("  </files>\n")
        output.write("  <index>\n")
        self.serialize_xrefs_index(output)
        output.write("  </index>\n")
2485 2486

    def serialize(self):
J
Jiri Denemark 已提交
2487
        filename = "%s/%s-api.xml" % (self.path, self.name)
2488
        if not quiet:
2489
            print("Saving XML description %s" % (filename))
2490 2491 2492 2493
        output = open(filename, "w")
        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
        output.write("<api name='%s'>\n" % self.name)
        output.write("  <files>\n")
2494
        headers = sorted(self.headers.keys())
2495 2496 2497 2498
        for file in headers:
            self.serialize_exports(output, file)
        output.write("  </files>\n")
        output.write("  <symbols>\n")
2499
        macros = sorted(self.idx.macros.keys())
2500 2501
        for macro in macros:
            self.serialize_macro(output, macro)
2502
        enums = sorted(self.idx.enums.keys())
2503 2504
        for enum in enums:
            self.serialize_enum(output, enum)
2505
        typedefs = sorted(self.idx.typedefs.keys())
2506 2507
        for typedef in typedefs:
            self.serialize_typedef(output, typedef)
2508
        variables = sorted(self.idx.variables.keys())
2509 2510
        for variable in variables:
            self.serialize_variable(output, variable)
2511
        functions = sorted(self.idx.functions.keys())
2512 2513 2514 2515 2516 2517
        for function in functions:
            self.serialize_function(output, function)
        output.write("  </symbols>\n")
        output.write("</api>\n")
        output.close()

2518
        if self.errors > 0:
2519
            print("apibuild.py: %d error(s) encountered during generation" % self.errors, file=sys.stderr)
2520 2521
            sys.exit(3)

J
Jiri Denemark 已提交
2522
        filename = "%s/%s-refs.xml" % (self.path, self.name)
2523
        if not quiet:
2524
            print("Saving XML Cross References %s" % (filename))
2525 2526 2527 2528 2529 2530 2531 2532
        output = open(filename, "w")
        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
        output.write("<apirefs name='%s'>\n" % self.name)
        self.serialize_xrefs(output)
        output.write("</apirefs>\n")
        output.close()


A
Andrea Bolognani 已提交
2533 2534 2535 2536
class app:
    def warning(self, msg):
        global warnings
        warnings = warnings + 1
2537
        print(msg)
A
Andrea Bolognani 已提交
2538 2539 2540

    def rebuild(self, name):
        if name not in ["libvirt", "libvirt-qemu", "libvirt-lxc", "libvirt-admin"]:
A
Andrea Bolognani 已提交
2541
            self.warning("rebuild() failed, unknown module %s" % name)
A
Andrea Bolognani 已提交
2542 2543 2544 2545 2546 2547
            return None
        builder = None
        srcdir = os.path.abspath((os.environ["srcdir"]))
        builddir = os.path.abspath((os.environ["builddir"]))
        if srcdir == builddir:
            builddir = None
2548
        if glob.glob(srcdir + "/../src/libvirt.c") != []:
A
Andrea Bolognani 已提交
2549
            if not quiet:
2550
                print("Rebuilding API description for %s" % name)
A
Andrea Bolognani 已提交
2551 2552 2553
            dirs = [srcdir + "/../src",
                    srcdir + "/../src/util",
                    srcdir + "/../include/libvirt"]
2554 2555
            if (builddir and
                not os.path.exists(srcdir + "/../include/libvirt/libvirt-common.h")):
A
Andrea Bolognani 已提交
2556 2557
                dirs.append(builddir + "/../include/libvirt")
            builder = docBuilder(name, srcdir, dirs, [])
2558
        elif glob.glob("src/libvirt.c") != []:
A
Andrea Bolognani 已提交
2559
            if not quiet:
2560
                print("Rebuilding API description for %s" % name)
A
Andrea Bolognani 已提交
2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578
            builder = docBuilder(name, srcdir,
                                 ["src", "src/util", "include/libvirt"],
                                 [])
        else:
            self.warning("rebuild() failed, unable to guess the module")
            return None
        builder.scan()
        builder.analyze()
        builder.serialize()
        return builder

    #
    # for debugging the parser
    #
    def parse(self, filename):
        parser = CParser(filename)
        idx = parser.parse()
        return idx
2579 2580 2581


if __name__ == "__main__":
A
Andrea Bolognani 已提交
2582
    app = app()
2583 2584
    if len(sys.argv) > 1:
        debug = 1
A
Andrea Bolognani 已提交
2585
        app.parse(sys.argv[1])
2586
    else:
A
Andrea Bolognani 已提交
2587 2588 2589 2590
        app.rebuild("libvirt")
        app.rebuild("libvirt-qemu")
        app.rebuild("libvirt-lxc")
        app.rebuild("libvirt-admin")
2591 2592 2593 2594
    if warnings > 0:
        sys.exit(2)
    else:
        sys.exit(0)