#!/usr/bin/python -u
#
# This is the API builder, it parses the C sources and build the
# API formal description in XML.
#
# See Copyright for the status of this software.
#
# daniel@veillard.com
#
import os, sys
import string
import glob
debug=0
#
# C parser analysis code
#
ignored_files = {
"trio": "too many non standard macros",
"trio.c": "too many non standard macros",
"trionan.c": "too many non standard macros",
"triostr.c": "too many non standard macros",
"acconfig.h": "generated portability layer",
"config.h": "generated portability layer",
"libxml.h": "internal only",
"testOOM.c": "out of memory tester",
"testOOMlib.h": "out of memory tester",
"testOOMlib.c": "out of memory tester",
}
ignored_words = {
"WINAPI": (0, "Windows keyword"),
"LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
"XMLPUBVAR": (0, "Special macro for extern vars for win32"),
"XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
"EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
"XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
"XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
"EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
"XMLCALL": (0, "Special macro for win32 calls"),
"XSLTCALL": (0, "Special macro for win32 calls"),
"EXSLTCALL": (0, "Special macro for win32 calls"),
"__declspec": (3, "Windows keyword"),
"ATTRIBUTE_UNUSED": (0, "macro keyword"),
"LIBEXSLT_PUBLIC": (0, "macro keyword"),
"X_IN_Y": (5, "macro function builder"),
"XSLT_ITEM_COMMON_FIELDS": (0, "Special macro")
}
def escape(raw):
raw = string.replace(raw, '&', '&')
raw = string.replace(raw, '<', '<')
raw = string.replace(raw, '>', '>')
raw = string.replace(raw, "'", ''')
raw = string.replace(raw, '"', '"')
return raw
def uniq(items):
d = {}
for item in items:
d[item]=1
return d.keys()
class identifier:
def __init__(self, name, module=None, type=None, lineno = 0,
info=None, extra=None):
self.name = name
self.module = module
self.type = type
self.info = info
self.extra = extra
self.lineno = lineno
self.static = 0
def __repr__(self):
r = "%s %s:" % (self.type, self.name)
if self.static:
r = r + " static"
if self.module != None:
r = r + " from %s" % (self.module)
if self.info != None:
r = r + " " + `self.info`
if self.extra != None:
r = r + " " + `self.extra`
return r
def set_module(self, module):
self.module = module
def set_type(self, type):
self.type = type
def set_info(self, info):
self.info = info
def set_extra(self, extra):
self.extra = extra
def set_lineno(self, lineno):
self.lineno = lineno
def set_static(self, static):
self.static = static
def get_name(self):
return self.name
def get_module(self):
return self.module
def get_type(self):
return self.type
def get_info(self):
return self.info
def get_lineno(self):
return self.lineno
def get_extra(self):
return self.extra
def get_static(self):
return self.static
def update(self, module, type = None, info = None, extra=None):
if module != None and self.module == None:
self.set_module(module)
if type != None and self.type == None:
self.set_type(type)
if info != None:
self.set_info(info)
if extra != None:
self.set_extra(extra)
class index:
def __init__(self, name = "noname"):
self.name = name
self.identifiers = {}
self.functions = {}
self.variables = {}
self.includes = {}
self.structs = {}
self.enums = {}
self.typedefs = {}
self.macros = {}
self.references = {}
self.info = {}
def add_ref(self, name, module, static, type, lineno, info=None, extra=None):
if name[0:2] == '__':
return None
d = None
try:
d = self.identifiers[name]
d.update(module, type, lineno, info, extra)
except:
d = identifier(name, module, type, lineno, info, extra)
self.identifiers[name] = d
if d != None and static == 1:
d.set_static(1)
if d != None and name != None and type != None:
self.references[name] = d
def add(self, name, module, static, type, lineno, info=None, extra=None):
if name[0:2] == '__':
return None
d = None
try:
d = self.identifiers[name]
d.update(module, type, lineno, info, extra)
except:
d = identifier(name, module, type, lineno, info, extra)
self.identifiers[name] = d
if d != None and static == 1:
d.set_static(1)
if d != None and name != None and type != None:
if type == "function":
self.functions[name] = d
elif type == "functype":
self.functions[name] = d
elif type == "variable":
self.variables[name] = d
elif type == "include":
self.includes[name] = d
elif type == "struct":
self.structs[name] = d
elif type == "enum":
self.enums[name] = d
elif type == "typedef":
self.typedefs[name] = d
elif type == "macro":
self.macros[name] = d
else:
print "Unable to register type ", type
return d
def merge(self, idx):
for id in idx.functions.keys():
#
# macro might be used to override functions or variables
# definitions
#
if self.macros.has_key(id):
del self.macros[id]
if self.functions.has_key(id):
print "function %s from %s redeclared in %s" % (
id, self.functions[id].module, idx.functions[id].module)
else:
self.functions[id] = idx.functions[id]
self.identifiers[id] = idx.functions[id]
for id in idx.variables.keys():
#
# macro might be used to override functions or variables
# definitions
#
if self.macros.has_key(id):
del self.macros[id]
if self.variables.has_key(id):
print "variable %s from %s redeclared in %s" % (
id, self.variables[id].module, idx.variables[id].module)
else:
self.variables[id] = idx.variables[id]
self.identifiers[id] = idx.variables[id]
for id in idx.structs.keys():
if self.structs.has_key(id):
print "struct %s from %s redeclared in %s" % (
id, self.structs[id].module, idx.structs[id].module)
else:
self.structs[id] = idx.structs[id]
self.identifiers[id] = idx.structs[id]
for id in idx.typedefs.keys():
if self.typedefs.has_key(id):
print "typedef %s from %s redeclared in %s" % (
id, self.typedefs[id].module, idx.typedefs[id].module)
else:
self.typedefs[id] = idx.typedefs[id]
self.identifiers[id] = idx.typedefs[id]
for id in idx.macros.keys():
#
# macro might be used to override functions or variables
# definitions
#
if self.variables.has_key(id):
continue
if self.functions.has_key(id):
continue
if self.enums.has_key(id):
continue
if self.macros.has_key(id):
print "macro %s from %s redeclared in %s" % (
id, self.macros[id].module, idx.macros[id].module)
else:
self.macros[id] = idx.macros[id]
self.identifiers[id] = idx.macros[id]
for id in idx.enums.keys():
if self.enums.has_key(id):
print "enum %s from %s redeclared in %s" % (
id, self.enums[id].module, idx.enums[id].module)
else:
self.enums[id] = idx.enums[id]
self.identifiers[id] = idx.enums[id]
def merge_public(self, idx):
for id in idx.functions.keys():
if self.functions.has_key(id):
up = idx.functions[id]
self.functions[id].update(None, up.type, up.info, up.extra)
# else:
# print "Function %s from %s is not declared in headers" % (
# id, idx.functions[id].module)
# TODO: do the same for variables.
def analyze_dict(self, type, dict):
count = 0
public = 0
for name in dict.keys():
id = dict[name]
count = count + 1
if id.static == 0:
public = public + 1
if count != public:
print " %d %s , %d public" % (count, type, public)
elif count != 0:
print " %d public %s" % (count, type)
def analyze(self):
self.analyze_dict("functions", self.functions)
self.analyze_dict("variables", self.variables)
self.analyze_dict("structs", self.structs)
self.analyze_dict("typedefs", self.typedefs)
self.analyze_dict("macros", self.macros)
class CLexer:
"""A lexer for the C language, tokenize the input by reading and
analyzing it line by line"""
def __init__(self, input):
self.input = input
self.tokens = []
self.line = ""
self.lineno = 0
def getline(self):
line = ''
while line == '':
line = self.input.readline()
if not line:
return None
self.lineno = self.lineno + 1
line = string.lstrip(line)
line = string.rstrip(line)
if line == '':
continue
while line[-1] == '\\':
line = line[:-1]
n = self.input.readline()
self.lineno = self.lineno + 1
n = string.lstrip(n)
n = string.rstrip(n)
if not n:
break
else:
line = line + n
return line
def getlineno(self):
return self.lineno
def push(self, token):
self.tokens.insert(0, token);
def debug(self):
print "Last token: ", self.last
print "Token queue: ", self.tokens
print "Line %d end: " % (self.lineno), self.line
def token(self):
while self.tokens == []:
if self.line == "":
line = self.getline()
else:
line = self.line
self.line = ""
if line == None:
return None
if line[0] == '#':
self.tokens = map((lambda x: ('preproc', x)),
string.split(line))
break;
l = len(line)
if line[0] == '"' or line[0] == "'":
end = line[0]
line = line[1:]
found = 0
tok = ""
while found == 0:
i = 0
l = len(line)
while i < l:
if line[i] == end:
self.line = line[i+1:]
line = line[:i]
l = i
found = 1
break
if line[i] == '\\':
i = i + 1
i = i + 1
tok = tok + line
if found == 0:
line = self.getline()
if line == None:
return None
self.last = ('string', tok)
return self.last
if l >= 2 and line[0] == '/' and line[1] == '*':
line = line[2:]
found = 0
tok = ""
while found == 0:
i = 0
l = len(line)
while i < l:
if line[i] == '*' and i+1 < l and line[i+1] == '/':
self.line = line[i+2:]
line = line[:i-1]
l = i
found = 1
break
i = i + 1
if tok != "":
tok = tok + "\n"
tok = tok + line
if found == 0:
line = self.getline()
if line == None:
return None
self.last = ('comment', tok)
return self.last
if l >= 2 and line[0] == '/' and line[1] == '/':
line = line[2:]
self.last = ('comment', line)
return self.last
i = 0
while i < l:
if line[i] == '/' and i+1 < l and line[i+1] == '/':
self.line = line[i:]
line = line[:i]
break
if line[i] == '/' and i+1 < l and line[i+1] == '*':
self.line = line[i:]
line = line[:i]
break
if line[i] == '"' or line[i] == "'":
self.line = line[i:]
line = line[:i]
break
i = i + 1
l = len(line)
i = 0
while i < l:
if line[i] == ' ' or line[i] == '\t':
i = i + 1
continue
o = ord(line[i])
if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
(o >= 48 and o <= 57):
s = i
while i < l:
o = ord(line[i])
if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
(o >= 48 and o <= 57) or string.find(
" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
i = i + 1
else:
break
self.tokens.append(('name', line[s:i]))
continue
if string.find("(){}:;,[]", line[i]) != -1:
# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
# line[i] == ',' or line[i] == '[' or line[i] == ']':
self.tokens.append(('sep', line[i]))
i = i + 1
continue
if string.find("+-*><=/%&!|.", line[i]) != -1:
# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
# line[i] == '!' or line[i] == '|' or line[i] == '.':
if line[i] == '.' and i + 2 < l and \
line[i+1] == '.' and line[i+2] == '.':
self.tokens.append(('name', '...'))
i = i + 3
continue
j = i + 1
if j < l and (
string.find("+-*><=/%&!|", line[j]) != -1):
# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
# line[j] == '!' or line[j] == '|'):
self.tokens.append(('op', line[i:j+1]))
i = j + 1
else:
self.tokens.append(('op', line[i]))
i = i + 1
continue
s = i
while i < l:
o = ord(line[i])
if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
(o >= 48 and o <= 57) or (
string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
# line[i] != ' ' and line[i] != '\t' and
# line[i] != '(' and line[i] != ')' and
# line[i] != '{' and line[i] != '}' and
# line[i] != ':' and line[i] != ';' and
# line[i] != ',' and line[i] != '+' and
# line[i] != '-' and line[i] != '*' and
# line[i] != '/' and line[i] != '%' and
# line[i] != '&' and line[i] != '!' and
# line[i] != '|' and line[i] != '[' and
# line[i] != ']' and line[i] != '=' and
# line[i] != '*' and line[i] != '>' and
# line[i] != '<'):
i = i + 1
else:
break
self.tokens.append(('name', line[s:i]))
tok = self.tokens[0]
self.tokens = self.tokens[1:]
self.last = tok
return tok
class CParser:
"""The C module parser"""
def __init__(self, filename, idx = None):
self.filename = filename
if len(filename) > 2 and filename[-2:] == '.h':
self.is_header = 1
else:
self.is_header = 0
self.input = open(filename)
self.lexer = CLexer(self.input)
if idx == None:
self.index = index()
else:
self.index = idx
self.top_comment = ""
self.last_comment = ""
self.comment = None
self.collect_ref = 0
self.no_error = 0
def collect_references(self):
self.collect_ref = 1
def stop_error(self):
self.no_error = 1
def start_error(self):
self.no_error = 0
def lineno(self):
return self.lexer.getlineno()
def index_add(self, name, module, static, type, info=None, extra = None):
self.index.add(name, module, static, type, self.lineno(),
info, extra)
def index_add_ref(self, name, module, static, type, info=None,
extra = None):
self.index.add_ref(name, module, static, type, self.lineno(),
info, extra)
def warning(self, msg):
if self.no_error:
return
print msg
def error(self, msg, token=-1):
if self.no_error:
return
print "Parse Error: " + msg
if token != -1:
print "Got token ", token
self.lexer.debug()
sys.exit(1)
def debug(self, msg, token=-1):
print "Debug: " + msg
if token != -1:
print "Got token ", token
self.lexer.debug()
def parseTopComment(self, comment):
res = {}
lines = string.split(comment, "\n")
item = None
for line in lines:
while line != "" and (line[0] == ' ' or line[0] == '\t'):
line = line[1:]
while line != "" and line[0] == '*':
line = line[1:]
while line != "" and (line[0] == ' ' or line[0] == '\t'):
line = line[1:]
try:
(it, line) = string.split(line, ":", 1)
item = it
while line != "" and (line[0] == ' ' or line[0] == '\t'):
line = line[1:]
if res.has_key(item):
res[item] = res[item] + " " + line
else:
res[item] = line
except:
if item != None:
if res.has_key(item):
res[item] = res[item] + " " + line
else:
res[item] = line
self.index.info = res
def parseComment(self, token):
if self.top_comment == "":
self.top_comment = token[1]
if self.comment == None or token[1][0] == '*':
self.comment = token[1];
else:
self.comment = self.comment + token[1]
token = self.lexer.token()
if string.find(self.comment, "DOC_DISABLE") != -1:
self.stop_error()
if string.find(self.comment, "DOC_ENABLE") != -1:
self.start_error()
return token
#
# Parse a comment block associate to a macro
#
def parseMacroComment(self, name, quiet = 0):
if name[0:2] == '__':
quiet = 1
args = []
desc = ""
if self.comment == None:
if not quiet:
self.warning("Missing comment for macro %s" % (name))
return((args, desc))
if self.comment[0] != '*':
if not quiet:
self.warning("Missing * in macro comment for %s" % (name))
return((args, desc))
lines = string.split(self.comment, '\n')
if lines[0] == '*':
del lines[0]
if lines[0] != "* %s:" % (name):
if not quiet:
self.warning("Misformatted macro comment for %s" % (name))
self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
return((args, desc))
del lines[0]
while lines[0] == '*':
del lines[0]
while len(lines) > 0 and lines[0][0:3] == '* @':
l = lines[0][3:]
try:
(arg, desc) = string.split(l, ':', 1)
desc=string.strip(desc)
arg=string.strip(arg)
except:
if not quiet:
self.warning("Misformatted macro comment for %s" % (name))
self.warning(" problem with '%s'" % (lines[0]))
del lines[0]
continue
del lines[0]
l = string.strip(lines[0])
while len(l) > 2 and l[0:3] != '* @':
while l[0] == '*':
l = l[1:]
desc = desc + ' ' + string.strip(l)
del lines[0]
if len(lines) == 0:
break
l = lines[0]
args.append((arg, desc))
while len(lines) > 0 and lines[0] == '*':
del lines[0]
desc = ""
while len(lines) > 0:
l = lines[0]
while len(l) > 0 and l[0] == '*':
l = l[1:]
l = string.strip(l)
desc = desc + " " + l
del lines[0]
desc = string.strip(desc)
if quiet == 0:
if desc == "":
self.warning("Macro comment for %s lack description of the macro" % (name))
return((args, desc))
#
# Parse a comment block and merge the informations found in the
# parameters descriptions, finally returns a block as complete
# as possible
#
def mergeFunctionComment(self, name, description, quiet = 0):
if name == 'main':
quiet = 1
if name[0:2] == '__':
quiet = 1
(ret, args) = description
desc = ""
retdesc = ""
if self.comment == None:
if not quiet:
self.warning("Missing comment for function %s" % (name))
return(((ret[0], retdesc), args, desc))
if self.comment[0] != '*':
if not quiet:
self.warning("Missing * in function comment for %s" % (name))
return(((ret[0], retdesc), args, desc))
lines = string.split(self.comment, '\n')
if lines[0] == '*':
del lines[0]
if lines[0] != "* %s:" % (name):
if not quiet:
self.warning("Misformatted function comment for %s" % (name))
self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
return(((ret[0], retdesc), args, desc))
del lines[0]
while len(lines) > 0 and lines[0] == '*':
del lines[0]
nbargs = len(args)
while len(lines) > 0 and lines[0][0:3] == '* @':
l = lines[0][3:]
try:
(arg, desc) = string.split(l, ':', 1)
desc=string.strip(desc)
arg=string.strip(arg)
except:
if not quiet:
self.warning("Misformatted function comment for %s" % (name))
self.warning(" problem with '%s'" % (lines[0]))
del lines[0]
continue
del lines[0]
l = string.strip(lines[0])
while len(l) > 2 and l[0:3] != '* @':
while l[0] == '*':
l = l[1:]
desc = desc + ' ' + string.strip(l)
del lines[0]
if len(lines) == 0:
break
l = lines[0]
i = 0
while i < nbargs:
if args[i][1] == arg:
args[i] = (args[i][0], arg, desc)
break;
i = i + 1
if i >= nbargs:
if not quiet:
self.warning("Unable to find arg %s from function comment for %s" % (
arg, name))
while len(lines) > 0 and lines[0] == '*':
del lines[0]
desc = ""
while len(lines) > 0:
l = lines[0]
while len(l) > 0 and l[0] == '*':
l = l[1:]
l = string.strip(l)
if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
try:
l = string.split(l, ' ', 1)[1]
except:
l = ""
retdesc = string.strip(l)
del lines[0]
while len(lines) > 0:
l = lines[0]
while len(l) > 0 and l[0] == '*':
l = l[1:]
l = string.strip(l)
retdesc = retdesc + " " + l
del lines[0]
else:
desc = desc + " " + l
del lines[0]
retdesc = string.strip(retdesc)
desc = string.strip(desc)
if quiet == 0:
#
# report missing comments
#
i = 0
while i < nbargs:
if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
self.warning("Function comment for %s lack description of arg %s" % (name, args[i][1]))
i = i + 1
if retdesc == "" and ret[0] != "void":
self.warning("Function comment for %s lack description of return value" % (name))
if desc == "":
self.warning("Function comment for %s lack description of the function" % (name))
return(((ret[0], retdesc), args, desc))
def parsePreproc(self, token):
name = token[1]
if name == "#include":
token = self.lexer.token()
if token == None:
return None
if token[0] == 'preproc':
self.index_add(token[1], self.filename, not self.is_header,
"include")
return self.lexer.token()
return token
if name == "#define":
token = self.lexer.token()
if token == None:
return None
if token[0] == 'preproc':
# TODO macros with arguments
name = token[1]
lst = []
token = self.lexer.token()
while token != None and token[0] == 'preproc' and \
token[1][0] != '#':
lst.append(token[1])
token = self.lexer.token()
try:
name = string.split(name, '(') [0]
except:
pass
info = self.parseMacroComment(name, not self.is_header)
self.index_add(name, self.filename, not self.is_header,
"macro", info)
return token
token = self.lexer.token()
while token != None and token[0] == 'preproc' and \
token[1][0] != '#':
token = self.lexer.token()
return token
#
# token acquisition on top of the lexer, it handle internally
# preprocessor and comments since they are logically not part of
# the program structure.
#
def token(self):
global ignored_words
token = self.lexer.token()
while token != None:
if token[0] == 'comment':
token = self.parseComment(token)
continue
elif token[0] == 'preproc':
token = self.parsePreproc(token)
continue
elif token[0] == "name" and ignored_words.has_key(token[1]):
(n, info) = ignored_words[token[1]]
i = 0
while i < n:
token = self.lexer.token()
i = i + 1
token = self.lexer.token()
continue
else:
if debug:
print "=> ", token
return token
return None
#
# Parse a typedef, it records the type and its name.
#
def parseTypedef(self, token):
if token == None:
return None
token = self.parseType(token)
if token == None:
self.error("parsing typedef")
return None
base_type = self.type
type = base_type
#self.debug("end typedef type", token)
while token != None:
if token[0] == "name":
name = token[1]
signature = self.signature
if signature != None:
type = string.split(type, '(')[0]
d = self.mergeFunctionComment(name,
((type, None), signature), 1)
self.index_add(name, self.filename, not self.is_header,
"functype", d)
else:
if base_type == "struct":
self.index_add(name, self.filename, not self.is_header,
"struct", type)
base_type = "struct " + name
else:
self.index_add(name, self.filename, not self.is_header,
"typedef", type)
token = self.token()
else:
self.error("parsing typedef: expecting a name")
return token
#self.debug("end typedef", token)
if token != None and token[0] == 'sep' and token[1] == ',':
type = base_type
token = self.token()
while token != None and token[0] == "op":
type = type + token[1]
token = self.token()
elif token != None and token[0] == 'sep' and token[1] == ';':
break;
elif token != None and token[0] == 'name':
type = base_type
continue;
else:
self.error("parsing typedef: expecting ';'", token)
return token
token = self.token()
return token
#
# Parse a C code block, used for functions it parse till
# the balancing } included
#
def parseBlock(self, token):
while token != None:
if token[0] == "sep" and token[1] == "{":
token = self.token()
token = self.parseBlock(token)
elif token[0] == "sep" and token[1] == "}":
self.comment = None
token = self.token()
return token
else:
if self.collect_ref == 1:
oldtok = token
token = self.token()
if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
if token[0] == "sep" and token[1] == "(":
self.index_add_ref(oldtok[1], self.filename,
0, "function")
token = self.token()
elif token[0] == "name":
token = self.token()
if token[0] == "sep" and (token[1] == ";" or
token[1] == "," or token[1] == "="):
self.index_add_ref(oldtok[1], self.filename,
0, "type")
elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
self.index_add_ref(oldtok[1], self.filename,
0, "typedef")
elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
self.index_add_ref(oldtok[1], self.filename,
0, "typedef")
else:
token = self.token()
return token
#
# Parse a C struct definition till the balancing }
#
def parseStruct(self, token):
fields = []
#self.debug("start parseStruct", token)
while token != None:
if token[0] == "sep" and token[1] == "{":
token = self.token()
token = self.parseTypeBlock(token)
elif token[0] == "sep" and token[1] == "}":
self.struct_fields = fields
#self.debug("end parseStruct", token)
#print fields
token = self.token()
return token
else:
base_type = self.type
#self.debug("before parseType", token)
token = self.parseType(token)
#self.debug("after parseType", token)
if token != None and token[0] == "name":
fname = token[1]
token = self.token()
if token[0] == "sep" and token[1] == ";":
self.comment = None
token = self.token()
fields.append((self.type, fname, self.comment))
self.comment = None
else:
self.error("parseStruct: expecting ;", token)
elif token != None and token[0] == "sep" and token[1] == "{":
token = self.token()
token = self.parseTypeBlock(token)
if token != None and token[0] == "name":
token = self.token()
if token != None and token[0] == "sep" and token[1] == ";":
token = self.token()
else:
self.error("parseStruct: expecting ;", token)
else:
self.error("parseStruct: name", token)
token = self.token()
self.type = base_type;
self.struct_fields = fields
#self.debug("end parseStruct", token)
#print fields
return token
#
# Parse a C enum block, parse till the balancing }
#
def parseEnumBlock(self, token):
self.enums = []
name = None
self.comment = None
comment = ""
value = "0"
while token != None:
if token[0] == "sep" and token[1] == "{":
token = self.token()
token = self.parseTypeBlock(token)
elif token[0] == "sep" and token[1] == "}":
if name != None:
if self.comment != None:
comment = self.comment
self.comment = None
self.enums.append((name, value, comment))
token = self.token()
return token
elif token[0] == "name":
if name != None:
if self.comment != None:
comment = string.strip(self.comment)
self.comment = None
self.enums.append((name, value, comment))
name = token[1]
comment = ""
token = self.token()
if token[0] == "op" and token[1][0] == "=":
value = ""
if len(token[1]) > 1:
value = token[1][1:]
token = self.token()
while token[0] != "sep" or (token[1] != ',' and
token[1] != '}'):
value = value + token[1]
token = self.token()
else:
try:
value = "%d" % (int(value) + 1)
except:
self.warning("Failed to compute value of enum %s" % (name))
value=""
if token[0] == "sep" and token[1] == ",":
token = self.token()
else:
token = self.token()
return token
#
# Parse a C definition block, used for structs it parse till
# the balancing }
#
def parseTypeBlock(self, token):
while token != None:
if token[0] == "sep" and token[1] == "{":
token = self.token()
token = self.parseTypeBlock(token)
elif token[0] == "sep" and token[1] == "}":
token = self.token()
return token
else:
token = self.token()
return token
#
# Parse a type: the fact that the type name can either occur after
# the definition or within the definition makes it a little harder
# if inside, the name token is pushed back before returning
#
def parseType(self, token):
self.type = ""
self.struct_fields = []
self.signature = None
if token == None:
return token
while token[0] == "name" and (
token[1] == "const" or token[1] == "unsigned" or
token[1] == "signed"):
if self.type == "":
self.type = token[1]
else:
self.type = self.type + " " + token[1]
token = self.token()
if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
if self.type == "":
self.type = token[1]
else:
self.type = self.type + " " + token[1]
if token[0] == "name" and token[1] == "int":
if self.type == "":
self.type = tmp[1]
else:
self.type = self.type + " " + tmp[1]
elif token[0] == "name" and token[1] == "struct":
if self.type == "":
self.type = token[1]
else:
self.type = self.type + " " + token[1]
token = self.token()
nametok = None
if token[0] == "name":
nametok = token
token = self.token()
if token != None and token[0] == "sep" and token[1] == "{":
token = self.token()
token = self.parseStruct(token)
elif token != None and token[0] == "op" and token[1] == "*":
self.type = self.type + " " + nametok[1] + " *"
token = self.token()
while token != None and token[0] == "op" and token[1] == "*":
self.type = self.type + " *"
token = self.token()
if token[0] == "name":
nametok = token
token = self.token()
else:
self.error("struct : expecting name", token)
return token
elif token != None and token[0] == "name" and nametok != None:
self.type = self.type + " " + nametok[1]
return token
if nametok != None:
self.lexer.push(token)
token = nametok
return token
elif token[0] == "name" and token[1] == "enum":
if self.type == "":
self.type = token[1]
else:
self.type = self.type + " " + token[1]
self.enums = []
token = self.token()
if token != None and token[0] == "sep" and token[1] == "{":
token = self.token()
token = self.parseEnumBlock(token)
else:
self.error("parsing enum: expecting '{'", token)
enum_type = None
if token != None and token[0] != "name":
self.lexer.push(token)
token = ("name", "enum")
else:
enum_type = token[1]
for enum in self.enums:
self.index_add(enum[0], self.filename,
not self.is_header, "enum",
(enum[1], enum[2], enum_type))
return token
elif token[0] == "name":
if self.type == "":
self.type = token[1]
else:
self.type = self.type + " " + token[1]
else:
self.error("parsing type %s: expecting a name" % (self.type),
token)
return token
token = self.token()
while token != None and (token[0] == "op" or
token[0] == "name" and token[1] == "const"):
self.type = self.type + " " + token[1]
token = self.token()
#
# if there is a parenthesis here, this means a function type
#
if token != None and token[0] == "sep" and token[1] == '(':
self.type = self.type + token[1]
token = self.token()
while token != None and token[0] == "op" and token[1] == '*':
self.type = self.type + token[1]
token = self.token()
if token == None or token[0] != "name" :
self.error("parsing function type, name expected", token);
return token
self.type = self.type + token[1]
nametok = token
token = self.token()
if token != None and token[0] == "sep" and token[1] == ')':
self.type = self.type + token[1]
token = self.token()
if token != None and token[0] == "sep" and token[1] == '(':
token = self.token()
type = self.type;
token = self.parseSignature(token);
self.type = type;
else:
self.error("parsing function type, '(' expected", token);
return token
else:
self.error("parsing function type, ')' expected", token);
return token
self.lexer.push(token)
token = nametok
return token
#
# do some lookahead for arrays
#
if token != None and token[0] == "name":
nametok = token
token = self.token()
if token != None and token[0] == "sep" and token[1] == '[':
self.type = self.type + nametok[1]
while token != None and token[0] == "sep" and token[1] == '[':
self.type = self.type + token[1]
token = self.token()
while token != None and token[0] != 'sep' and \
token[1] != ']' and token[1] != ';':
self.type = self.type + token[1]
token = self.token()
if token != None and token[0] == 'sep' and token[1] == ']':
self.type = self.type + token[1]
token = self.token()
else:
self.error("parsing array type, ']' expected", token);
return token
elif token != None and token[0] == "sep" and token[1] == ':':
# remove :12 in case it's a limited int size
token = self.token()
token = self.token()
self.lexer.push(token)
token = nametok
return token
#
# Parse a signature: '(' has been parsed and we scan the type definition
# up to the ')' included
def parseSignature(self, token):
signature = []
if token != None and token[0] == "sep" and token[1] == ')':
self.signature = []
token = self.token()
return token
while token != None:
token = self.parseType(token)
if token != None and token[0] == "name":
signature.append((self.type, token[1], None))
token = self.token()
elif token != None and token[0] == "sep" and token[1] == ',':
token = self.token()
continue
elif token != None and token[0] == "sep" and token[1] == ')':
# only the type was provided
if self.type == "...":
signature.append((self.type, "...", None))
else:
signature.append((self.type, None, None))
if token != None and token[0] == "sep":
if token[1] == ',':
token = self.token()
continue
elif token[1] == ')':
token = self.token()
break
self.signature = signature
return token
#
# Parse a global definition, be it a type, variable or function
# the extern "C" blocks are a bit nasty and require it to recurse.
#
def parseGlobal(self, token):
static = 0
if token[1] == 'extern':
token = self.token()
if token == None:
return token
if token[0] == 'string':
if token[1] == 'C':
token = self.token()
if token == None:
return token
if token[0] == 'sep' and token[1] == "{":
token = self.token()
# print 'Entering extern "C line ', self.lineno()
while token != None and (token[0] != 'sep' or
token[1] != "}"):
if token[0] == 'name':
token = self.parseGlobal(token)
else:
self.error(
"token %s %s unexpected at the top level" % (
token[0], token[1]))
token = self.parseGlobal(token)
# print 'Exiting extern "C" line', self.lineno()
token = self.token()
return token
else:
return token
elif token[1] == 'static':
static = 1
token = self.token()
if token == None or token[0] != 'name':
return token
if token[1] == 'typedef':
token = self.token()
return self.parseTypedef(token)
else:
token = self.parseType(token)
type_orig = self.type
if token == None or token[0] != "name":
return token
type = type_orig
self.name = token[1]
token = self.token()
while token != None and (token[0] == "sep" or token[0] == "op"):
if token[0] == "sep":
if token[1] == "[":
type = type + token[1]
token = self.token()
while token != None and (token[0] != "sep" or \
token[1] != ";"):
type = type + token[1]
token = self.token()
if token != None and token[0] == "op" and token[1] == "=":
#
# Skip the initialization of the variable
#
token = self.token()
if token[0] == 'sep' and token[1] == '{':
token = self.token()
token = self.parseBlock(token)
else:
self.comment = None
while token != None and (token[0] != "sep" or \
(token[1] != ';' and token[1] != ',')):
token = self.token()
self.comment = None
if token == None or token[0] != "sep" or (token[1] != ';' and
token[1] != ','):
self.error("missing ';' or ',' after value")
if token != None and token[0] == "sep":
if token[1] == ";":
self.comment = None
token = self.token()
if type == "struct":
self.index_add(self.name, self.filename,
not self.is_header, "struct", self.struct_fields)
else:
self.index_add(self.name, self.filename,
not self.is_header, "variable", type)
break
elif token[1] == "(":
token = self.token()
token = self.parseSignature(token)
if token == None:
return None
if token[0] == "sep" and token[1] == ";":
d = self.mergeFunctionComment(self.name,
((type, None), self.signature), 1)
self.index_add(self.name, self.filename, static,
"function", d)
token = self.token()
elif token[0] == "sep" and token[1] == "{":
d = self.mergeFunctionComment(self.name,
((type, None), self.signature), static)
self.index_add(self.name, self.filename, static,
"function", d)
token = self.token()
token = self.parseBlock(token);
elif token[1] == ',':
self.comment = None
self.index_add(self.name, self.filename, static,
"variable", type)
type = type_orig
token = self.token()
while token != None and token[0] == "sep":
type = type + token[1]
token = self.token()
if token != None and token[0] == "name":
self.name = token[1]
token = self.token()
else:
break
return token
def parse(self):
self.warning("Parsing %s" % (self.filename))
token = self.token()
while token != None:
if token[0] == 'name':
token = self.parseGlobal(token)
else:
self.error("token %s %s unexpected at the top level" % (
token[0], token[1]))
token = self.parseGlobal(token)
return
self.parseTopComment(self.top_comment)
return self.index
class docBuilder:
"""A documentation builder"""
def __init__(self, name, directories=['.'], excludes=[]):
self.name = name
self.directories = directories
self.excludes = excludes + ignored_files.keys()
self.modules = {}
self.headers = {}
self.idx = index()
self.xref = {}
self.index = {}
if name == 'libxml2':
self.basename = 'libxml'
else:
self.basename = name
def indexString(self, id, str):
if str == None:
return
str = string.replace(str, "'", ' ')
str = string.replace(str, '"', ' ')
str = string.replace(str, "/", ' ')
str = string.replace(str, '*', ' ')
str = string.replace(str, "[", ' ')
str = string.replace(str, "]", ' ')
str = string.replace(str, "(", ' ')
str = string.replace(str, ")", ' ')
str = string.replace(str, "<", ' ')
str = string.replace(str, '>', ' ')
str = string.replace(str, "&", ' ')
str = string.replace(str, '#', ' ')
str = string.replace(str, ",", ' ')
str = string.replace(str, '.', ' ')
str = string.replace(str, ';', ' ')
tokens = string.split(str)
for token in tokens:
try:
c = token[0]
if string.find(string.letters, c) < 0:
pass
elif len(token) < 3:
pass
else:
lower = string.lower(token)
# TODO: generalize this a bit
if lower == 'and' or lower == 'the':
pass
elif self.xref.has_key(token):
self.xref[token].append(id)
else:
self.xref[token] = [id]
except:
pass
def analyze(self):
print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
self.idx.analyze()
def scanHeaders(self):
for header in self.headers.keys():
parser = CParser(header)
idx = parser.parse()
self.headers[header] = idx;
self.idx.merge(idx)
def scanModules(self):
for module in self.modules.keys():
parser = CParser(module)
idx = parser.parse()
# idx.analyze()
self.modules[module] = idx
self.idx.merge_public(idx)
def scan(self):
for directory in self.directories:
files = glob.glob(directory + "/*.c")
for file in files:
skip = 0
for excl in self.excludes:
if string.find(file, excl) != -1:
skip = 1;
break
if skip == 0:
self.modules[file] = None;
files = glob.glob(directory + "/*.h")
for file in files:
skip = 0
for excl in self.excludes:
if string.find(file, excl) != -1:
skip = 1;
break
if skip == 0:
self.headers[file] = None;
self.scanHeaders()
self.scanModules()
def modulename_file(self, file):
module = os.path.basename(file)
if module[-2:] == '.h':
module = module[:-2]
return module
def serialize_enum(self, output, name):
id = self.idx.enums[name]
output.write(" \n")
def serialize_macro(self, output, name):
id = self.idx.macros[name]
output.write(" \n" % (name,
self.modulename_file(id.module)))
if id.info != None:
try:
(args, desc) = id.info
if desc != None and desc != "":
output.write(" %s\n" % (escape(desc)))
self.indexString(name, desc)
for arg in args:
(name, desc) = arg
if desc != None and desc != "":
output.write(" \n" % (
name, escape(desc)))
self.indexString(name, desc)
else:
output.write(" \n" % (name))
except:
pass
output.write(" \n")
def serialize_typedef(self, output, name):
id = self.idx.typedefs[name]
if id.info[0:7] == 'struct ':
output.write(" \n");
try:
for field in self.idx.structs[name].info:
desc = field[2]
self.indexString(name, desc)
if desc == None:
desc = ''
else:
desc = escape(desc)
output.write(" \n" % (field[1] , field[0], desc))
except:
print "Failed to serialize struct %s" % (name)
output.write(" \n")
else:
output.write("/>\n");
else :
output.write(" \n" % (
name, self.modulename_file(id.module), id.info))
def serialize_variable(self, output, name):
id = self.idx.variables[name]
if id.info != None:
output.write(" \n" % (
name, self.modulename_file(id.module), id.info))
else:
output.write(" \n" % (
name, self.modulename_file(id.module)))
def serialize_function(self, output, name):
id = self.idx.functions[name]
output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
self.modulename_file(id.module)))
try:
(ret, params, desc) = id.info
output.write(" %s\n" % (escape(desc)))
self.indexString(name, desc)
if ret[0] != None:
if ret[0] == "void":
output.write(" \n")
else:
output.write(" \n" % (
ret[0], escape(ret[1])))
self.indexString(name, ret[1])
for param in params:
if param[0] == 'void':
continue
if param[2] == None:
output.write(" \n" % (param[1], param[0]))
else:
output.write(" \n" % (param[1], param[0], escape(param[2])))
self.indexString(name, param[2])
except:
print "Failed to save function %s info: " % name, `id.info`
output.write(" %s>\n" % (id.type))
def serialize_exports(self, output, file):
module = self.modulename_file(file)
output.write(" \n" % (module))
dict = self.headers[file]
if dict.info != None:
for data in ('Summary', 'Description', 'Author'):
try:
output.write(" <%s>%s%s>\n" % (
string.lower(data),
escape(dict.info[data]),
string.lower(data)))
except:
print "Header %s lacks a %s description" % (module, data)
if dict.info.has_key('Description'):
desc = dict.info['Description']
if string.find(desc, "DEPRECATED") != -1:
output.write(" \n")
ids = dict.macros.keys()
ids.sort()
for id in uniq(ids):
# Macros are sometime used to masquerade other types.
if dict.functions.has_key(id):
continue
if dict.variables.has_key(id):
continue
if dict.typedefs.has_key(id):
continue
if dict.structs.has_key(id):
continue
if dict.enums.has_key(id):
continue
output.write(" \n" % (id))
ids = dict.enums.keys()
ids.sort()
for id in uniq(ids):
output.write(" \n" % (id))
ids = dict.typedefs.keys()
ids.sort()
for id in uniq(ids):
output.write(" \n" % (id))
ids = dict.structs.keys()
ids.sort()
for id in uniq(ids):
output.write(" \n" % (id))
ids = dict.variables.keys()
ids.sort()
for id in uniq(ids):
output.write(" \n" % (id))
ids = dict.functions.keys()
ids.sort()
for id in uniq(ids):
output.write(" \n" % (id))
output.write(" \n")
def serialize_xrefs_files(self, output):
headers = self.headers.keys()
headers.sort()
for file in headers:
module = self.modulename_file(file)
output.write(" \n" % (module))
dict = self.headers[file]
ids = uniq(dict.functions.keys() + dict.variables.keys() + \
dict.macros.keys() + dict.typedefs.keys() + \
dict.structs.keys() + dict.enums.keys())
ids.sort()
for id in ids:
output.write(" \n" % (id))
output.write(" \n")
pass
def serialize_xrefs_functions(self, output):
funcs = {}
for name in self.idx.functions.keys():
id = self.idx.functions[name]
try:
(ret, params, desc) = id.info
for param in params:
if param[0] == 'void':
continue
if funcs.has_key(param[0]):
funcs[param[0]].append(name)
else:
funcs[param[0]] = [name]
except:
pass
typ = funcs.keys()
typ.sort()
for type in typ:
if type == '' or type == 'void' or type == "int" or \
type == "char *" or type == "const char *" :
continue
output.write(" \n" % (type))
ids = funcs[type]
ids.sort()
pid = '' # not sure why we have dups, but get rid of them!
for id in ids:
if id != pid:
output.write(" \n" % (id))
pid = id
output.write(" \n")
def serialize_xrefs_constructors(self, output):
funcs = {}
for name in self.idx.functions.keys():
id = self.idx.functions[name]
try:
(ret, params, desc) = id.info
if ret[0] == "void":
continue
if funcs.has_key(ret[0]):
funcs[ret[0]].append(name)
else:
funcs[ret[0]] = [name]
except:
pass
typ = funcs.keys()
typ.sort()
for type in typ:
if type == '' or type == 'void' or type == "int" or \
type == "char *" or type == "const char *" :
continue
output.write(" \n" % (type))
ids = funcs[type]
ids.sort()
for id in ids:
output.write(" \n" % (id))
output.write(" \n")
def serialize_xrefs_alpha(self, output):
letter = None
ids = self.idx.identifiers.keys()
ids.sort()
for id in ids:
if id[0] != letter:
if letter != None:
output.write(" \n")
letter = id[0]
output.write(" \n" % (letter))
output.write(" \n" % (id))
if letter != None:
output.write(" \n")
def serialize_xrefs_references(self, output):
typ = self.idx.identifiers.keys()
typ.sort()
for id in typ:
idf = self.idx.identifiers[id]
module = idf.module
output.write(" \n" % (id,
'html/' + self.basename + '-' +
self.modulename_file(module) + '.html#' +
id))
def serialize_xrefs_index(self, output):
index = self.xref
typ = index.keys()
typ.sort()
letter = None
count = 0
chunk = 0
chunks = []
for id in typ:
if len(index[id]) > 30:
continue
if id[0] != letter:
if letter == None or count > 200:
if letter != None:
output.write(" \n")
output.write(" \n")
count = 0
chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
output.write(" \n" % (chunk))
first_letter = id[0]
chunk = chunk + 1
elif letter != None:
output.write(" \n")
letter = id[0]
output.write(" \n" % (letter))
output.write(" \n" % (id))
tokens = index[id];
tokens.sort()
tok = None
for token in tokens:
if tok == token:
continue
tok = token
output.write(" \n" % (token))
count = count + 1
output.write(" \n")
if letter != None:
output.write(" \n")
output.write(" \n")
if count != 0:
chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
output.write(" \n")
for ch in chunks:
output.write(" \n" % (
ch[0], ch[1], ch[2]))
output.write(" \n")
def serialize_xrefs(self, output):
output.write(" \n")
self.serialize_xrefs_references(output)
output.write(" \n")
output.write(" \n")
self.serialize_xrefs_alpha(output)
output.write(" \n")
output.write(" \n")
self.serialize_xrefs_constructors(output)
output.write(" \n")
output.write(" \n")
self.serialize_xrefs_functions(output)
output.write(" \n")
output.write(" \n")
self.serialize_xrefs_files(output)
output.write(" \n")
output.write(" \n")
self.serialize_xrefs_index(output)
output.write(" \n")
def serialize(self, outdir):
filename = outdir + "%s-api.xml" % self.name
print "Saving XML description %s" % (filename)
output = open(filename, "w")
output.write('\n')
output.write("\n" % self.name)
output.write(" \n")
headers = self.headers.keys()
headers.sort()
for file in headers:
self.serialize_exports(output, file)
output.write(" \n")
output.write(" \n")
macros = self.idx.macros.keys()
macros.sort()
for macro in macros:
self.serialize_macro(output, macro)
enums = self.idx.enums.keys()
enums.sort()
for enum in enums:
self.serialize_enum(output, enum)
typedefs = self.idx.typedefs.keys()
typedefs.sort()
for typedef in typedefs:
self.serialize_typedef(output, typedef)
variables = self.idx.variables.keys()
variables.sort()
for variable in variables:
self.serialize_variable(output, variable)
functions = self.idx.functions.keys()
functions.sort()
for function in functions:
self.serialize_function(output, function)
output.write(" \n")
output.write("\n")
output.close()
filename = outdir + "%s-refs.xml" % self.name
print "Saving XML Cross References %s" % (filename)
output = open(filename, "w")
output.write('\n')
output.write("\n" % self.name)
self.serialize_xrefs(output)
output.write("\n")
output.close()
def rebuild():
builder = None
if glob.glob("parser.c") != [] :
print "Rebuilding API description for libxml2"
builder = docBuilder("libxml2", [".", "."],
["xmlwin32version.h", "tst.c"])
elif glob.glob("../parser.c") != [] :
print "Rebuilding API description for libxml2"
builder = docBuilder("libxml2", ["..", "../include/libxml"],
["xmlwin32version.h", "tst.c"])
elif glob.glob("../libxslt/transform.c") != [] :
print "Rebuilding API description for libxslt"
builder = docBuilder("libxslt", ["../libxslt"],
["win32config.h", "libxslt.h", "tst.c"])
else:
print "rebuild() failed, unable to guess the module"
return None
builder.scan()
builder.analyze()
builder.serialize("./")
if glob.glob("../libexslt/exslt.c") != [] :
extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
extra.scan()
extra.analyze()
extra.serialize("EXSLT/")
return builder
#
# for debugging the parser
#
def parse(filename):
parser = CParser(filename)
idx = parser.parse()
return idx
if __name__ == "__main__":
rebuild()