summaryrefslogtreecommitdiff
path: root/codegen/docextract.py
diff options
context:
space:
mode:
authorHasan Wan <hasan.wan@intel.com>2012-05-25 16:47:13 +0800
committerHasan Wan <hasan.wan@intel.com>2012-05-28 16:06:22 +0800
commit1ddee039e66b9c1feb2129a1904e60760e0e8fc3 (patch)
tree84a3b12d0f839c6a2ef702eb02f3288c91cf5902 /codegen/docextract.py
parentf7d643cbb2184346b6f8d26091eb7eb38c6bbfe1 (diff)
downloadpygobject2-1ddee039e66b9c1feb2129a1904e60760e0e8fc3.tar.gz
pygobject2-1ddee039e66b9c1feb2129a1904e60760e0e8fc3.tar.bz2
pygobject2-1ddee039e66b9c1feb2129a1904e60760e0e8fc3.zip
Updated with Tizen:Base source codes
Diffstat (limited to 'codegen/docextract.py')
-rw-r--r--codegen/docextract.py448
1 files changed, 448 insertions, 0 deletions
diff --git a/codegen/docextract.py b/codegen/docextract.py
new file mode 100644
index 0000000..eff8c5e
--- /dev/null
+++ b/codegen/docextract.py
@@ -0,0 +1,448 @@
+# -*- Mode: Python; py-indent-offset: 4 -*-
+'''Simple module for extracting GNOME style doc comments from C
+sources, so I can use them for other purposes.'''
+
+import sys, os, string, re
+
+__all__ = ['extract']
+
+class GtkDoc:
+ def __init__(self):
+ self.name = None
+ self.block_type = '' # The block type ('function', 'signal', 'property')
+ self.params = []
+ self.annotations = []
+ self.description = ''
+ self.ret = ('', []) # (return, annotations)
+ def set_name(self, name):
+ self.name = name
+ def set_type(self, block_type):
+ self.block_type = block_type
+ def get_type(self):
+ return self.block_type
+ def add_param(self, name, description, annotations=[]):
+ if name == '...':
+ name = 'Varargs'
+ self.params.append((name, description, annotations))
+ def append_to_last_param(self, extra):
+ self.params[-1] = (self.params[-1][0], self.params[-1][1] + extra,
+ self.params[-1][2])
+ def append_to_named_param(self, name, extra):
+ for i in range(len(self.params)):
+ if self.params[i][0] == name:
+ self.params[i] = (name, self.params[i][1] + extra,
+ self.params[i][2])
+ return
+ # fall through to adding extra parameter ...
+ self.add_param(name, extra)
+ def add_annotation(self, annotation):
+ self.annotations.append(annotation)
+ def get_annotations(self):
+ return self.annotations
+ def append_to_description(self, extra):
+ self.description = self.description + extra
+ def get_description(self):
+ return self.description
+ def add_return(self, first_line, annotations=[]):
+ self.ret = (first_line, annotations)
+ def append_to_return(self, extra):
+ self.ret = (self.ret[0] + extra, self.ret[1])
+
+comment_start_pattern = re.compile(r'^\s*/\*\*\s')
+comment_end_pattern = re.compile(r'^\s*\*+/')
+comment_line_lead_pattern = re.compile(r'^\s*\*\s*')
+comment_empty_line_pattern = re.compile(r'^\s*\**\s*$')
+function_name_pattern = re.compile(r'^([a-z]\w*)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
+signal_name_pattern = re.compile(r'^([A-Z]\w+::[a-z0-9-]+)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
+property_name_pattern = re.compile(r'^([A-Z]\w+:[a-z0-9-]+)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
+return_pattern = re.compile(r'^@?(returns:|return\s+value:)(.*\n?)$', re.IGNORECASE)
+deprecated_pattern = re.compile(r'^(deprecated\s*:\s*.*\n?)$', re.IGNORECASE)
+rename_to_pattern = re.compile(r'^(rename\s+to)\s*:\s*(.*\n?)$', re.IGNORECASE)
+param_pattern = re.compile(r'^@(\S+)\s*:(.*\n?)$')
+# Used to extract the annotations in the parameter and return descriptions
+# extracted using above [param|return]_pattern patterns.
+annotations_pattern = re.compile(r'^(?:(\s*\(.*\)\s*)*:)')
+# Used to construct the annotation lists.
+annotation_lead_pattern = re.compile(r'^\s*\(\s*(.*?)\s*\)\s*')
+
+# These patterns determine the identifier of the current comment block. They
+# are grouped in a list for easy determination of block identifiers (in
+# skip_to_identifier). The function_name_pattern should be tested for last
+# because it always matches signal and property identifiers.
+identifier_patterns = [ signal_name_pattern, property_name_pattern, function_name_pattern ]
+
+# This pattern is to match return sections that forget to have a colon (':')
+# after the initial 'Return' phrase. It is not included by default in the list
+# of final sections below because a lot of function descriptions begin with
+# 'Returns ...' and the process_description() function would stop right at that
+# first line, thinking it is a return section.
+no_colon_return_pattern = re.compile(r'^@?(returns|return\s+value)\s*(.*\n?)$', re.IGNORECASE)
+since_pattern = re.compile(r'^(since\s*:\s*.*\n?)$', re.IGNORECASE)
+
+# These patterns normally will be encountered after the description. Knowing
+# the order of their appearance is difficult so this list is used to test when
+# one begins and the other ends when processing the rest of the sections after
+# the description.
+final_section_patterns = [ return_pattern, since_pattern, deprecated_pattern, rename_to_pattern ]
+
+def parse_file(fp, doc_dict):
+ line = fp.readline()
+ while line:
+ cur_doc = GtkDoc()
+ line = skip_to_comment_block(fp, line)
+ line = skip_to_identifier(fp, line, cur_doc)
+ # See if the identifier is found (stored in the current GtkDoc by
+ # skip_to_identifier). If so, continue reading the rest of the comment
+ # block.
+ if cur_doc.name:
+ line = process_params(fp, line, cur_doc)
+ line = process_description(fp, line, cur_doc)
+ line = process_final_sections(fp, line, cur_doc)
+ # Add the current doc block to the dictionary of doc blocks.
+ doc_dict[cur_doc.name] = cur_doc
+
+# Given a list of annotations as string of the form
+# '(annotation1) (annotation2) ...' return a list of annotations of the form
+# [ (name1, value1), (name2, value2) ... ]. Not all annotations have values so
+# the values in the list of tuples could be empty ('').
+def get_annotation_list(annotations):
+ annotation_list = []
+ while annotations:
+ match = annotation_lead_pattern.match(annotations)
+ if match:
+ annotation_contents = match.group(1)
+ name, split, value = annotation_contents.strip().partition(' ')
+ annotation_list.append((name, value))
+ # Remove first occurrence to continue processing.
+ annotations = annotation_lead_pattern.sub('', annotations)
+ else:
+ break
+ return annotation_list
+
+# Given a currently read line, test that line and continue reading until the
+# beginning of a comment block is found or eof is reached. Return the last
+# read line.
+def skip_to_comment_block(fp, line):
+ while line:
+ if comment_start_pattern.match(line):
+ break
+ line = fp.readline()
+ return line
+
+# Given the current line in a comment block, continue skipping lines until a
+# non-blank line in the comment block is found or until the end of the block
+# (or eof) is reached. Returns the line where reading stopped.
+def skip_to_nonblank(fp, line):
+ while line:
+ if not comment_empty_line_pattern.match(line):
+ break
+ line = fp.readline()
+ # Stop processing if eof or end of comment block is reached.
+ if not line or comment_end_pattern.match(line):
+ break
+ return line
+
+# Given the first line of a comment block (the '/**'), see if the next
+# non-blank line is the identifier of the comment block. Stop processing if
+# the end of the block or eof is reached. Store the identifier (if there is
+# one) and its type ('function', 'signal' or 'property') in the given GtkDoc.
+# Return the line where the identifier is found or the line that stops the
+# processing (if eof or the end of the comment block is found first).
+def skip_to_identifier(fp, line, cur_doc):
+ # Skip the initial comment block line ('/**') if not eof.
+ if line: line = fp.readline()
+
+ # Now skip empty lines.
+ line = skip_to_nonblank(fp, line)
+
+ # See if the first non-blank line is the identifier.
+ if line and not comment_end_pattern.match(line):
+ # Remove the initial ' * ' in comment block line and see if there is an
+ # identifier.
+ line = comment_line_lead_pattern.sub('', line)
+ for pattern in identifier_patterns:
+ match = pattern.match(line)
+ if match:
+ # Set the GtkDoc name.
+ cur_doc.set_name(match.group(1))
+ # Get annotations and add them to the GtkDoc.
+ annotations = get_annotation_list(match.group(2))
+ for annotation in annotations:
+ cur_doc.add_annotation(annotation)
+ # Set the GtkDoc type.
+ if pattern == signal_name_pattern:
+ cur_doc.set_type('signal')
+ elif pattern == property_name_pattern:
+ cur_doc.set_type('property')
+ elif pattern == function_name_pattern:
+ cur_doc.set_type('function')
+ return line
+ return line
+
+# Given a currently read line (presumably the identifier line), read the next
+# lines, testing to see if the lines are part of parameter descriptions. If
+# so, store the parameter descriptions in the given doc block. Stop on eof and
+# return the last line that stops the processing.
+def process_params(fp, line, cur_doc):
+ # Skip the identifier line if not eof. Also skip any blank lines in the
+ # comment block. Return if eof or the end of the comment block are
+ # encountered.
+ if line: line = fp.readline()
+ line = skip_to_nonblank(fp, line)
+ if not line or comment_end_pattern.match(line):
+ return line
+
+ # Remove initial ' * ' in first non-empty comment block line.
+ line = comment_line_lead_pattern.sub('', line)
+
+ # Now process possible parameters as long as no eof or the end of the
+ # param section is not reached (which could be triggered by anything that
+ # doesn't match a '@param:..." line, even the end of the comment block).
+ match = param_pattern.match(line)
+ while line and match:
+ description = match.group(2)
+
+ # First extract the annotations from the description and save them.
+ annotations = []
+ annotation_match = annotations_pattern.match(description)
+ if annotation_match:
+ annotations = get_annotation_list(annotation_match.group(1))
+ # Remove the annotations from the description
+ description = annotations_pattern.sub('', description)
+
+ # Default to appending lines to current parameter.
+ append_func = cur_doc.append_to_last_param
+
+ # See if the return has been included as part of the parameter
+ # section and make sure that lines are added to the GtkDoc return if
+ # so.
+ if match.group(1).lower() == "returns":
+ cur_doc.add_return(description, annotations)
+ append_func = cur_doc.append_to_return
+ # If not, just add it as a regular parameter.
+ else:
+ cur_doc.add_param(match.group(1), description, annotations)
+
+ # Now read lines and append them until next parameter, beginning of
+ # description (an empty line), the end of the comment block or eof.
+ line = fp.readline()
+ while line:
+ # Stop processing if end of comment block or a blank comment line
+ # is encountered.
+ if comment_empty_line_pattern.match(line) or \
+ comment_end_pattern.match(line):
+ break
+
+ # Remove initial ' * ' in comment block line.
+ line = comment_line_lead_pattern.sub('', line)
+
+ # Break from current param processing if a new one is
+ # encountered.
+ if param_pattern.match(line): break;
+
+ # Otherwise, just append the current line and get the next line.
+ append_func(line)
+ line = fp.readline()
+
+ # Re-evaluate match for while condition
+ match = param_pattern.match(line)
+
+ # End by returning the current line.
+ return line
+
+# Having processed parameters, read the following lines into the description of
+# the current doc block until the end of the comment block, the end of file or
+# a return section is encountered.
+def process_description(fp, line, cur_doc):
+ # First skip empty lines returning on eof or end of comment block.
+ line = skip_to_nonblank(fp, line)
+ if not line or comment_end_pattern.match(line):
+ return line
+
+ # Remove initial ' * ' in non-empty comment block line.
+ line = comment_line_lead_pattern.sub('', line)
+
+ # Also remove possible 'Description:' prefix.
+ if line[:12] == 'Description:': line = line[12:]
+
+ # Used to tell if the previous line was blank and a return section
+ # uncommonly marked with 'Returns ...' instead of 'Returns: ...' has
+ # started (assume it is non-empty to begin with).
+ prev_line = 'non-empty'
+
+ # Now read lines until a new section (like a return or a since section) is
+ # encountered.
+ while line:
+ # See if the description section has ended (if the line begins with
+ # 'Returns ...' and the previous line was empty -- this loop replaces
+ # empty lines with a newline).
+ if no_colon_return_pattern.match(line) and prev_line == '\n':
+ return line
+ # Or if one of the patterns of the final sections match
+ for pattern in final_section_patterns:
+ if pattern.match(line):
+ return line
+
+ # If not, append lines to description in the doc comment block.
+ cur_doc.append_to_description(line)
+
+ prev_line = line
+ line = fp.readline()
+
+ # Stop processing on eof or at the end of comment block.
+ if not line or comment_end_pattern.match(line):
+ return line
+
+ # Remove initial ' * ' in line so that the text can be appended to the
+ # description of the comment block and make sure that if the line is
+ # empty it be interpreted as a newline.
+ line = comment_line_lead_pattern.sub('', line)
+ if not line: line = '\n'
+
+# Given the line that ended the description (the first line of one of the final
+# sections) process the final sections ('Returns:', 'Since:', etc.) until the
+# end of the comment block or eof. Return the line that ends the processing.
+def process_final_sections(fp, line, cur_doc):
+ while line and not comment_end_pattern.match(line):
+ # Remove leading ' * ' from current non-empty comment line.
+ line = comment_line_lead_pattern.sub('', line)
+ # Temporarily append the no colon return pattern to the final section
+ # patterns now that the description has been processed. It will be
+ # removed after the for loop below executes so that future descriptions
+ # that begin with 'Returns ...' are not interpreted as a return
+ # section.
+ final_section_patterns.append(no_colon_return_pattern)
+ for pattern in final_section_patterns:
+ match = pattern.match(line)
+ if match:
+ if pattern == return_pattern or \
+ pattern == no_colon_return_pattern:
+ # Dealing with a 'Returns:' so first extract the
+ # annotations from the description and save them.
+ description = match.group(2)
+ annotations = []
+ annotation_match = \
+ annotations_pattern.match(description)
+ if annotation_match:
+ annotations = \
+ get_annotation_list(annotation_match.group(1))
+ # Remove the annotations from the description
+ description = annotations_pattern.sub('', description)
+
+ # Now add the return.
+ cur_doc.add_return(description, annotations)
+ # In case more lines need to be appended.
+ append_func = cur_doc.append_to_return
+ elif pattern == rename_to_pattern:
+ # Dealing with a 'Rename to:' section (GObjectIntrospection
+ # annotation) so no further lines will be appended but this
+ # single one (and only to the annotations).
+ append_func = None
+ cur_doc.add_annotation((match.group(1),
+ match.group(2)))
+ else:
+ # For all others ('Since:' and 'Deprecated:') just append
+ # the line to the description for now.
+ cur_doc.append_to_description(line)
+ # In case more lines need to be appended.
+ append_func = cur_doc.append_to_description
+
+ # Stop final section pattern matching for loop since a match
+ # has already been found.
+ break
+
+ # Remove the no colon return pattern (which was temporarily added in
+ # the just executed loop) from the list of final section patterns.
+ final_section_patterns.pop()
+
+ line = fp.readline()
+
+ # Now continue appending lines to current section until a new one is
+ # found or an eof or the end of the comment block is encountered.
+ finished = False
+ while not finished and line and \
+ not comment_end_pattern.match(line):
+ # Remove leading ' * ' from line and make sure that if it is empty,
+ # it be interpreted as a newline.
+ line = comment_line_lead_pattern.sub('', line)
+ if not line: line = '\n'
+
+ for pattern in final_section_patterns:
+ if pattern.match(line):
+ finished = True
+ break
+
+ # Break out of loop if a new section is found (determined in above
+ # inner loop).
+ if finished: break
+
+ # Now it's safe to append line.
+ if append_func: append_func(line)
+
+ # Get the next line to continue processing.
+ line = fp.readline()
+
+ return line
+
+def parse_dir(dir, doc_dict):
+ for file in os.listdir(dir):
+ if file in ('.', '..'): continue
+ path = os.path.join(dir, file)
+ if os.path.isdir(path):
+ parse_dir(path, doc_dict)
+ if len(file) > 2 and file[-2:] == '.c':
+ sys.stderr.write("Processing " + path + '\n')
+ parse_file(open(path, 'r'), doc_dict)
+
+def extract(dirs, doc_dict=None):
+ if not doc_dict: doc_dict = {}
+ for dir in dirs:
+ parse_dir(dir, doc_dict)
+ return doc_dict
+
+tmpl_section_pattern = re.compile(r'^<!-- ##### (\w+) (\w+) ##### -->$')
+def parse_tmpl(fp, doc_dict):
+ cur_doc = None
+
+ line = fp.readline()
+ while line:
+ match = tmpl_section_pattern.match(line)
+ if match:
+ cur_doc = None # new input shouldn't affect the old doc dict
+ sect_type = match.group(1)
+ sect_name = match.group(2)
+
+ if sect_type == 'FUNCTION':
+ cur_doc = doc_dict.get(sect_name)
+ if not cur_doc:
+ cur_doc = GtkDoc()
+ cur_doc.set_name(sect_name)
+ doc_dict[sect_name] = cur_doc
+ elif line == '<!-- # Unused Parameters # -->\n':
+ cur_doc = None # don't worry about unused params.
+ elif cur_doc:
+ if line[:10] == '@Returns: ':
+ if string.strip(line[10:]):
+ cur_doc.append_to_return(line[10:])
+ elif line[0] == '@':
+ pos = string.find(line, ':')
+ if pos >= 0:
+ cur_doc.append_to_named_param(line[1:pos], line[pos+1:])
+ else:
+ cur_doc.append_to_description(line)
+ else:
+ cur_doc.append_to_description(line)
+
+ line = fp.readline()
+
+def extract_tmpl(dirs, doc_dict=None):
+ if not doc_dict: doc_dict = {}
+ for dir in dirs:
+ for file in os.listdir(dir):
+ if file in ('.', '..'): continue
+ path = os.path.join(dir, file)
+ if os.path.isdir(path):
+ continue
+ if len(file) > 2 and file[-2:] == '.sgml':
+ parse_tmpl(open(path, 'r'), doc_dict)
+ return doc_dict