codegen/docextract_to_xml.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

#!/usr/bin/env python
# -*- Mode: Python; py-indent-offset: 4 -*-
#
# This litte script outputs the C doc comments to an XML format.
# So far it's only used by gtkmm (The C++ bindings). Murray Cumming.
# Usage example:
# # ./docextract_to_xml.py -s /gnome/head/cvs/gtk+/gtk/ -s /gnome/head/cvs/gtk+/docs/reference/gtk/tmpl/ > gtk_docs.xml

import getopt
import re
import string
import sys

import docextract

def usage():
    sys.stderr.write('usage: docextract_to_xml.py ' +
        '[-s /src/dir | --source-dir=/src/dir] ' +
        '[-a | --with-annotations] [-p | --with-properties] ' +
        '[-i | --with-signals ]\n')
    sys.exit(1)

# Translates special texts to &... HTML acceptable format.  Also replace
# occurrences of '/*' and '*/' with '/ *' and '* /' respectively to avoid
# comment errors (note the spaces).  Some function descriptions include C++
# multi-line comments which cause errors when the description is included in a
# C++ Doxygen comment block.
def escape_text(unescaped_text):
    # Escape every "&" not part of an entity reference
    escaped_text = re.sub(r'&(?![A-Za-z]+;)', '&amp;', unescaped_text)

    # These weird entities turn up in the output...
    escaped_text = string.replace(escaped_text, '&mdash;', '&#8212;')
    escaped_text = string.replace(escaped_text, '&ast;', '*')
    escaped_text = string.replace(escaped_text, '&percnt;', '%')
    escaped_text = string.replace(escaped_text, '&commat;', '@')
    escaped_text = string.replace(escaped_text, '&num;', '&#35;')
    escaped_text = string.replace(escaped_text, '&nbsp;', '&#160;')
    # This represents a '/' before or after an '*' so replace with slash but
    # with spaces.
    escaped_text = string.replace(escaped_text, '&sol;', ' / ')

    # Escape for both tag contents and attribute values
    escaped_text = string.replace(escaped_text, '<', '&lt;')
    escaped_text = string.replace(escaped_text, '>', '&gt;')
    escaped_text = string.replace(escaped_text, '"', '&quot;')

    # Replace C++ comment begin and ends to ones that don't affect Doxygen.
    escaped_text = string.replace(escaped_text, '/*', '/ *')
    escaped_text = string.replace(escaped_text, '*/', '* /')

    return escaped_text

def print_annotations(annotations):
    for annotation in annotations:
        print "<annotation name=" + annotation[0] +  ">" + \
                escape_text(annotation[1]) + "</annotation>"

if __name__ == '__main__':
    try:
        opts, args = getopt.getopt(sys.argv[1:], "d:s:o:api",
                                   ["source-dir=", "with-annotations",
                                     "with-properties", "with-signals"])
    except getopt.error, e:
        sys.stderr.write('docextract_to_xml.py: %s\n' % e)
        usage()
    source_dirs = []
    with_annotations = False
    with_signals = False
    with_properties = False
    for opt, arg in opts:
        if opt in ('-s', '--source-dir'):
            source_dirs.append(arg)
        if opt in ('-a', '--with-annotations'):
            with_annotations = True
        if opt in ('-p', '--with-properties'):
            with_properties = True
        if opt in ('-i', '--with-signals'):
            with_signals = True
    if len(args) != 0:
        usage()

    docs = docextract.extract(source_dirs);
    docextract.extract_tmpl(source_dirs, docs); #Try the tmpl sgml files too.

    # print d.docs

    if docs:

        print "<root>"

        for name, value in sorted(docs.items()):
            # Get the type of comment block ('function', 'signal' or
            # 'property') (the value is a GtkDoc).
            block_type = value.get_type()

            # Skip signals if the option was not specified.
            if block_type == 'signal' and not with_signals:
                continue
            # Likewise for properties.
            elif block_type == 'property' and not with_properties:
                continue

            print "<" + block_type + " name=\"" + escape_text(name) + "\">"

            print "<description>"
            print escape_text(value.get_description())
            print "</description>"

            # Loop through the parameters if not dealing with a property:
            if block_type != 'property':
                print "<parameters>"
                for name, description, annotations in value.params:
                        print "<parameter name=\"" + escape_text(name) + "\">"
                        print "<parameter_description>" + escape_text(description) + "</parameter_description>"

                        if with_annotations:
                            print_annotations(annotations)

                        print "</parameter>"

                print "</parameters>"

                # Show the return-type (also if not dealing with a property):
                if with_annotations:
                    print "<return>"
                    print "<return_description>" + escape_text(value.ret[0]) + \
                            "</return_description>"
                    print_annotations(value.ret[1])
                    print "</return>"
                else:
                    print "<return>" + escape_text(value.ret[0]) + "</return>"

            if with_annotations:
                print_annotations(value.get_annotations())

            print "</" + block_type + ">\n"

        print "</root>"