#!/usr/bin/python -u # # tries to parse the output of gtk-doc declaration files and make # an XML reusable description from them # # TODO: try to extracts comments from the DocBook output of import sys import string ids = {} macros = {} variables = {} structs = {} typedefs = {} enums = {} functions = {} user_functions = {} ret_types = {} types = {} sections = [] files = {} identifiers_file = {} identifiers_type = {} ################################################################## # # Indexer to generate the word index # ################################################################## index = {} def indexString(id, str): str = string.replace(str, "'", ' ') str = string.replace(str, '"', ' ') str = string.replace(str, "/", ' ') str = string.replace(str, '*', ' ') str = string.replace(str, "[", ' ') str = string.replace(str, "]", ' ') str = string.replace(str, "(", ' ') str = string.replace(str, ")", ' ') str = string.replace(str, "<", ' ') str = string.replace(str, '>', ' ') str = string.replace(str, "&", ' ') str = string.replace(str, '#', ' ') str = string.replace(str, ",", ' ') str = string.replace(str, '.', ' ') str = string.replace(str, ';', ' ') tokens = string.split(str) for token in tokens: try: c = token[0] if string.find(string.letters, c) < 0: pass elif len(token) < 3: pass else: lower = string.lower(token) # TODO: generalize this a bit if lower == 'and' or lower == 'the': pass elif index.has_key(token): index[token].append(id) else: index[token] = [id] except: pass ################################################################## # # Parsing: libxslt-decl.txt # ################################################################## def mormalizeTypeSpaces(raw, function): global types tokens = string.split(raw) type = '' for token in tokens: if type != '': type = type + ' ' + token else: type = token if types.has_key(type): types[type].append(function) else: types[type] = [function] return type def removeComments(raw): while string.find(raw, '/*') > 0: e = string.find(raw, '/*') tmp = raw[0:e] raw = raw[e:] e = string.find(raw, '*/') if e > 0: raw = tmp + raw[e + 2:] else: raw = tmp return raw def extractArgs(raw, function): raw = removeComments(raw) raw = string.replace(raw, '\n', ' ') raw = string.replace(raw, '\r', ' ') list = string.split(raw, ",") ret = [] for arg in list: i = len(arg) if i == 0: continue i = i - 1 c = arg[i] while string.find(string.letters, c) >= 0 or \ string.find(string.digits, c) >= 0 or c == '_': i = i - 1 if i < 0: break c = arg[i] name = arg[i+1:] while string.find(string.whitespace, c) >= 0: i = i - 1 if i < 0: break c = arg[i] type = mormalizeTypeSpaces(arg[0:i+1], function) if name == 'void' and type == '': pass else: ret.append([type, name, '']) return ret def extractTypes(raw, function): global ret_types tokens = string.split(raw) type = '' for token in tokens: if type != '': type = type + ' ' + token else: type = token if ret_types.has_key(type): ret_types[type].append(function) else: ret_types[type] = [function] return type def parseMacro(): global input global macros global variables var = 1 line = input.readline()[:-1] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif string.find(line, "#define") >= 0: var = 0 line = input.readline()[:-1] if var == 1: variables[name] = ['', ''] # type, info identifiers_type[name] = "variable" else: macros[name] = [[], ''] # args, info identifiers_type[name] = "macro" def parseStruct(): global input global structs line = input.readline()[:-1] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] line = input.readline()[:-1] structs[name] = '' identifiers_type[name] = "struct" def parseTypedef(): global input global typedefs line = input.readline()[:-1] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] line = input.readline()[:-1] typedefs[name] = '' identifiers_type[name] = "typedef" def parseEnum(): global input global enums line = input.readline()[:-1] consts = [] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif string.find(line, 'enum') >= 0: pass elif string.find(line, '{') >= 0: pass elif string.find(line, '}') >= 0: pass elif string.find(line, ';') >= 0: pass else: comment = string.find(line, '/*') if comment >= 0: line = line[0:comment] decls = string.split(line, ",") for decl in decls: val = string.split(decl, "=")[0] tokens = string.split(val) if len(tokens) >= 1: token = tokens[0] if string.find(string.letters, token[0]) >= 0: consts.append(token) identifiers_type[token] = "const" line = input.readline()[:-1] enums[name] = [consts, ''] identifiers_type[name] = "enum" def parseStaticFunction(): global input global user_functions line = input.readline()[:-1] type = None signature = "" while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif line[0:9] == "" and line[-10:] == "": type = extractTypes(line[9:-10], name) else: signature = signature + line line = input.readline()[:-1] args = extractArgs(signature, name) user_functions[name] = [[type, ''] , args, ''] identifiers_type[name] = "functype" def parseFunction(): global input global functions line = input.readline()[:-1] type = None signature = "" while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif line[0:9] == "" and line[-10:] == "": type = extractTypes(line[9:-10], name) else: signature = signature + line line = input.readline()[:-1] args = extractArgs(signature, name) functions[name] = [[type, ''] , args, ''] identifiers_type[name] = "function" print "Parsing: libxslt-decl.txt" input = open('libxslt-decl.txt') while 1: line = input.readline() if not line: break line = line[:-1] if line == "": parseMacro() elif line == "": parseEnum() elif line == "": parseFunction() elif line == "": parseStruct() elif line == "": parseTypedef() elif line == "": parseStaticFunction() elif len(line) >= 1 and line[0] == "<": print "unhandled %s" % (line) print "Parsed: %d macros. %d structs, %d typedefs, %d enums" % ( len(macros.keys()), len(structs.keys()), len(typedefs.keys()), len(enums)) c = 0 for enum in enums.keys(): consts = enums[enum][0] c = c + len(consts) print " %d variables, %d constants, %d functions and %d functypes" % ( len(variables.keys()), c, len(functions.keys()), len(user_functions.keys())) print "The functions manipulates %d different types" % (len(types.keys())) print "The functions returns %d different types" % (len(ret_types.keys())) ################################################################## # # Parsing: libxslt-decl-list.txt # ################################################################## def parseSection(): global input global sections global files global identifiers_file tokens = [] line = input.readline()[:-1] while line != "": if line[0:6] == "" and line[-7:] == "": name = line[6:-7] elif len(line) > 0: tokens.append(line) line = input.readline()[:-1] sections.append(name) files[name] = tokens for token in tokens: identifiers_file[token] = name # # Small transitivity for enum values # if enums.has_key(token): for const in enums[token][0]: identifiers_file[const] = name print "Parsing: libxslt-decl-list.txt" input = open('libxslt-decl-list.txt') while 1: line = input.readline() if not line: break line = line[:-1] if line == "
": parseSection() elif len(line) >= 1 and line[0] == "<": print "unhandled %s" % (line) print "Parsed: %d files %d identifiers" % (len(files), len(identifiers_file.keys())) ################################################################## # # Parsing: xml/*.xml # To enrich the existing info with extracted comments # ################################################################## nbcomments = 0 def insertParameterComment(id, name, value, is_param): global nbcomments indexString(id, value) if functions.has_key(id): if is_param == 1: args = functions[id][1] found = 0 for arg in args: if arg[1] == name: arg[2] = value found = 1 break if found == 0 and name != '...': print "Arg %s not found on function %s description" % (name, id) return else: ret = functions[id][0] ret[1] = value elif user_functions.has_key(id): if is_param == 1: args = user_functions[id][1] found = 0 for arg in args: if arg[1] == name: arg[2] = value found = 1 break if found == 0 and name != '...': print "Arg %s not found on functype %s description" % (name, id) print args return else: ret = user_functions[id][0] ret[1] = value elif macros.has_key(id): if is_param == 1: args = macros[id][0] found = 0 for arg in args: if arg[0] == name: arg[1] = value found = 1 break if found == 0: args.append([name, value]) else: print "Return info for macro %s: %s" % (id, value) # ret = macros[id][0] # ret[1] = value else: print "lost specific comment %s: %s: %s" % (id, name, value) return nbcomments = nbcomments + 1 def insertComment(name, title, value, id): global nbcomments ids[name] = id indexString(name, value) if functions.has_key(name): functions[name][2] = value return "function" elif typedefs.has_key(name): typedefs[name] = value return "typedef" elif macros.has_key(name): macros[name][1] = value return "macro" elif variables.has_key(name): variables[name][1] = value return "variable" elif structs.has_key(name): structs[name] = value return "struct" elif enums.has_key(name): enums[name][1] = value return "enum" elif user_functions.has_key(name): user_functions[name][2] = value return "user_function" else: print "lost comment %s: %s" % (name, value) return "unknown" nbcomments = nbcomments + 1 import libxml2 import os def analyzeXMLDescriptionRow(doc, desc, id, row): if doc == None or desc == None or id == None or row == None: return ctxt = doc.xpathNewContext() ctxt.setContextNode(row) param = ctxt.xpathEval("entry[1]/parameter") entries = ctxt.xpathEval("entry") if param == []: is_param = 0 name = None else: name = param[0].content is_param = 1 str = entries[1].content str = string.replace(str, '\n', ' ') str = string.replace(str, '\r', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') while len(str) >= 1 and str[0] == ' ': str=str[1:] insertParameterComment(id, name, str, is_param) def analyzeXMLDescription(doc, desc): if doc == None or desc == None: return ctxt = doc.xpathNewContext() ctxt.setContextNode(desc) # # get the function name # try: title = ctxt.xpathEval("title")[0].content except: return old_id = ctxt.xpathEval("string(title/anchor/@id)") id = string.replace(title, '(', ' '); id = string.replace(id, ')', ' '); id = string.split(id) [0] # # get the function comments # comment = "" paras = ctxt.xpathEval("para") for para in paras: str = para.content str = string.replace(str, '\n', ' ') str = string.replace(str, '\r', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') str = string.replace(str, ' ', ' ') while len(str) >= 1 and str[0] == ' ': str=str[1:] comment = comment + str insertComment(id, title, comment, old_id) rows = ctxt.xpathEval("informaltable/tgroup/tbody/row") for row in rows: analyzeXMLDescriptionRow(doc, desc, id, row) def analyzeXMLDoc(doc): if doc == None: return ctxt = doc.xpathNewContext() descriptions = ctxt.xpathEval("//refsect2") print len(descriptions) for description in descriptions: analyzeXMLDescription(doc, description) xmlfiles = 0 filenames = os.listdir("xml") for filename in filenames: print filename try: doc = libxml2.parseFile("xml/" + filename) analyzeXMLDoc(doc) doc.freeDoc() xmlfiles = xmlfiles + 1 except: print "failed to parse XML description %s" % ("xml/" + filename) continue print "Parsed: %d XML files collexting %d comments" % (xmlfiles, nbcomments) ################################################################## # # Saving: libxslt-api.xml # ################################################################## def escape(raw): raw = string.replace(raw, '&', '&') raw = string.replace(raw, '<', '<') raw = string.replace(raw, '>', '>') raw = string.replace(raw, "'", ''') raw = string.replace(raw, '"', '"') return raw print "Saving XML description libxslt-api.xml" output = open("libxslt-api.xml", "w") output.write('\n') output.write("\n") output.write(" \n") for file in files.keys(): output.write(" \n" % file) for symbol in files[file]: output.write(" \n" % (symbol)) output.write(" \n") output.write(" \n") output.write(" \n") symbols=macros.keys() for i in structs.keys(): symbols.append(i) for i in variables.keys(): variables.append(i) for i in typedefs.keys(): symbols.append(i) for i in enums.keys(): symbols.append(i) for j in enums[i][0]: symbols.append(j) for i in functions.keys(): symbols.append(i) for i in user_functions.keys(): symbols.append(i) symbols.sort() prev = None for i in symbols: if i == prev: # print "Symbol %s redefined" % (i) continue else: prev = i if identifiers_type.has_key(i): type = identifiers_type[i] if identifiers_file.has_key(i): file = identifiers_file[i] else: file = None output.write(" <%s name='%s'" % (type, i)) if file != None: output.write(" file='%s'" % (file)) if type == "function": output.write(">\n"); (ret, args, doc) = functions[i] if doc != None and doc != '': output.write(" %s\n" % (escape(doc))) if ret[1] != None and ret[1] != '': output.write(" \n" % ( ret[0], escape(ret[1]))) else: if ret[0] != 'void' and\ ret[0][0:4] != 'void': # This one is actually a bug in GTK Doc print "Description for return on %s is missing" % (i) output.write(" \n" % (ret[0])) for arg in args: if arg[2] != None and arg[2] != '': output.write(" \n" % (arg[1], arg[0], escape(arg[2]))) else: if arg[0] != '...': print "Description for %s on %s is missing" % (arg[1], i) output.write(" \n" % ( arg[1], arg[0])) output.write(" \n" % (type)); elif type == 'functype': output.write(">\n"); (ret, args, doc) = user_functions[i] if doc != None and doc != '': output.write(" %s\n" % (escape(doc))) if ret[1] != None and ret[1] != '': output.write(" \n" % ( ret[0], escape(ret[1]))) else: if ret[0] != 'void' and\ ret[0][0:4] != 'void': # This one is actually a bug in GTK Doc print "Description for return on %s is missing" % (i) output.write(" \n" % (ret[0])) for arg in args: if arg[2] != None and arg[2] != '': output.write(" \n" % (arg[1], arg[0], escape(arg[2]))) else: if arg[0] != '...': print "Description for %s on %s is missing" % (arg[1], i) output.write(" \n" % ( arg[1], arg[0])) output.write(" \n" % (type)); elif type == 'macro': output.write(">\n"); if macros[i][1] != None and macros[i][1] != '': output.write(" %s\n" % (escape(macros[i][1]))) else: print "Description for %s is missing" % (i) args = macros[i][0] for arg in args: if arg[1] != None and arg[1] != '': output.write(" \n" % (arg[0], escape(arg[1]))) else: print "Description for %s on %s is missing" % (arg[1], i) output.write(" \n" % (arg[0])) output.write(" \n" % (type)); elif type == 'struct': if structs[i] != None and structs[i] != '': output.write(" info='%s'/>\n" % (escape(structs[i]))) else: output.write("/>\n"); elif type == 'variable': if variables[i][1] != None and variables[i][1] != '': output.write(" info='%s'/>\n" % (escape(variables[i]))) else: output.write("/>\n"); elif type == 'typedef': if typedefs[i] != None and typedefs[i] != '': output.write(" info='%s'/>\n" % (escape(typedefs[i]))) else: output.write("/>\n"); else: output.write("/>\n"); else: print "Symbol %s not found in identifiers list" % (i) output.write(" \n") output.write("\n") output.close() print "generated XML for %d symbols" % (len(symbols)) ################################################################## # # Saving: libxslt-api.xml # ################################################################## hash = {} for file in files.keys(): for symbol in files[file]: hash[symbol] = file def link(id): if ids.has_key(id): target = string.upper(ids[id]) else: target = string.upper(id) if hash.has_key(id): module = string.lower(hash[id]) else: module = 'index' file = 'html/libxslt-' + module + '.html'; return file + '#' + target print "Saving XML crossreferences libxslt-refs.xml" output = open("libxslt-refs.xml", "w") output.write('\n') output.write("\n") output.write(" \n") typ = ids.keys() typ.sort() for id in typ: output.write(" \n" % (id, link(id))) output.write(" \n") output.write(" \n") letter = None ids = ids.keys() ids.sort() for id in ids: if id[0] != letter: if letter != None: output.write(" \n") letter = id[0] output.write(" \n" % (letter)) output.write(" \n" % (id)) if letter != None: output.write(" \n") output.write(" \n") output.write(" \n") typ = ret_types.keys() typ.sort() for type in typ: if type == '' or type == 'void' or type == "int" or type == "char *" or \ type == "const char *" : continue output.write(" \n" % (type)) ids = ret_types[type] for id in ids: output.write(" \n" % (id)) output.write(" \n") output.write(" \n") output.write(" \n") typ = types.keys() typ.sort() for type in typ: if type == '' or type == 'void' or type == "int" or type == "char *" or \ type == "const char *" : continue output.write(" \n" % (type)) ids = types[type] for id in ids: output.write(" \n" % (id)) output.write(" \n") output.write(" \n") output.write(" \n") typ = files.keys() typ.sort() for file in typ: output.write(" \n" % (file)) for id in files[file]: output.write(" \n" % (id)) output.write(" \n") output.write(" \n") output.write(" \n") typ = index.keys() typ.sort() letter = None count = 0 chunk = 0 chunks = [] for id in typ: if len(index[id]) > 30: continue if id[0] != letter: if letter == None or count > 200: if letter != None: output.write(" \n") output.write(" \n") count = 0 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) output.write(" \n" % (chunk)) first_letter = id[0] chunk = chunk + 1 elif letter != None: output.write(" \n") letter = id[0] output.write(" \n" % (letter)) output.write(" \n" % (id)) tokens = index[id]; tokens.sort() tok = None for token in index[id]: if tok == token: continue tok = token output.write(" \n" % (token)) count = count + 1 output.write(" \n") if letter != None: output.write(" \n") output.write(" \n") output.write(" \n") for ch in chunks: output.write(" \n" % ( ch[0], ch[1], ch[2])) output.write(" \n") output.write(" \n") output.write("\n") output.close()