summaryrefslogtreecommitdiff
path: root/tools/quickbook/src/id_generation.cpp
blob: 80df0ad4eb4e992574452d9f5dd07dd594689da6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
/*=============================================================================
    Copyright (c) 2011, 2013 Daniel James

    Use, modification and distribution is subject to the Boost Software
    License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
    http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/

#include <cctype>
#include "document_state_impl.hpp"
#include <boost/make_shared.hpp>
#include <boost/unordered_map.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/foreach.hpp>
#include <boost/range/algorithm.hpp>

namespace quickbook {
    //
    // The maximum size of a generated part of an id.
    //
    // Not a strict maximum, sometimes broken because the user
    // explicitly uses a longer id, or for backwards compatibility.

    static const std::size_t max_size = 32;

    typedef std::vector<id_placeholder const*> placeholder_index;
    placeholder_index index_placeholders(document_state_impl const&, quickbook::string_view);

    void generate_id_block(
            placeholder_index::iterator, placeholder_index::iterator,
            std::vector<std::string>& generated_ids);

    std::vector<std::string> generate_ids(document_state_impl const& state, quickbook::string_view xml)
    {
        std::vector<std::string> generated_ids(state.placeholders.size());

        // Get a list of the placeholders in the order that we wish to
        // process them.
        placeholder_index placeholders = index_placeholders(state, xml);

        typedef std::vector<id_placeholder const*>::iterator iterator;
        iterator it = placeholders.begin(), end = placeholders.end();

        while (it != end) {
            // We process all the ids that have the same number of dots
            // together. Note that ids with different parents can clash, e.g.
            // because of old fashioned id generation or anchors containing
            // multiple dots.
            //
            // So find the group of placeholders with the same number of dots.
            iterator group_begin = it, group_end = it;
            while (group_end != end && (*group_end)->num_dots == (*it)->num_dots)
                ++group_end;

            generate_id_block(group_begin, group_end, generated_ids);
            it = group_end;
        }

        return generated_ids;
    }

    //
    // index_placeholders
    //
    // Create a sorted index of the placeholders, in order
    // to make numbering duplicates easy. A total order.
    //

    struct placeholder_compare
    {
        std::vector<unsigned>& order;

        placeholder_compare(std::vector<unsigned>& order_) : order(order_) {}

        bool operator()(id_placeholder const* x, id_placeholder const* y) const
        {
            bool x_explicit = x->category.c >= id_category::explicit_id;
            bool y_explicit = y->category.c >= id_category::explicit_id;

            return
                x->num_dots < y->num_dots ? true :
                x->num_dots > y->num_dots ? false :
                x_explicit > y_explicit ? true :
                x_explicit < y_explicit ? false :
                order[x->index] < order[y->index];
        }
    };

    struct get_placeholder_order_callback : xml_processor::callback
    {
        document_state_impl const& state;
        std::vector<unsigned>& order;
        unsigned count;

        get_placeholder_order_callback(document_state_impl const& state_,
                std::vector<unsigned>& order_)
          : state(state_),
            order(order_),
            count(0)
        {}

        void id_value(quickbook::string_view value)
        {
            set_placeholder_order(state.get_placeholder(value));
        }

        void set_placeholder_order(id_placeholder const* p)
        {
            if (p && !order[p->index]) {
                set_placeholder_order(p->parent);
                order[p->index] = ++count;
            }
        }
    };

    placeholder_index index_placeholders(
            document_state_impl const& state,
            quickbook::string_view xml)
    {
        // The order that the placeholder appear in the xml source.
        std::vector<unsigned> order(state.placeholders.size());

        xml_processor processor;
        get_placeholder_order_callback callback(state, order);
        processor.parse(xml, callback);

        placeholder_index sorted_placeholders;
        sorted_placeholders.reserve(state.placeholders.size());
        BOOST_FOREACH(id_placeholder const& p, state.placeholders)
            if (order[p.index]) sorted_placeholders.push_back(&p);
        boost::sort(sorted_placeholders, placeholder_compare(order));

        return sorted_placeholders;
    }

    // Resolve and generate ids.

    struct generate_id_block_type
    {
        // The ids which won't require duplicate handling.
        typedef boost::unordered_map<std::string, id_placeholder const*>
            chosen_id_map;
        chosen_id_map chosen_ids;
        std::vector<std::string>& generated_ids;

        explicit generate_id_block_type(std::vector<std::string>& generated_ids_) :
            generated_ids(generated_ids_) {}

        void generate(placeholder_index::iterator begin,
            placeholder_index::iterator end);

        std::string resolve_id(id_placeholder const*);
        std::string generate_id(id_placeholder const*, std::string const&);
    };

    void generate_id_block(placeholder_index::iterator begin,
            placeholder_index::iterator end,
            std::vector<std::string>& generated_ids)
    {
        generate_id_block_type impl(generated_ids);
        impl.generate(begin, end);
    }

    void generate_id_block_type::generate(placeholder_index::iterator begin,
            placeholder_index::iterator end)
    {
        std::vector<std::string> resolved_ids;

        for (placeholder_index::iterator i = begin; i != end; ++i)
            resolved_ids.push_back(resolve_id(*i));

        unsigned index = 0;
        for (placeholder_index::iterator i = begin; i != end; ++i, ++index)
        {
            generated_ids[(**i).index] =
                generate_id(*i, resolved_ids[index]);
        }
    }

    std::string generate_id_block_type::resolve_id(id_placeholder const* p)
    {
        std::string id = p->parent ?
            generated_ids[p->parent->index] + "." + p->id :
            p->id;

        if (p->category.c > id_category::numbered) {
            // Reserve the id if it isn't already reserved.
            chosen_id_map::iterator pos = chosen_ids.emplace(id, p).first;

            // If it was reserved by a placeholder with a lower category,
            // then overwrite it.
            if (p->category.c > pos->second->category.c)
                pos->second = p;
        }

        return id;
    }

    std::string generate_id_block_type::generate_id(id_placeholder const* p,
            std::string const& resolved_id)
    {
        if (p->category.c > id_category::numbered &&
                chosen_ids.at(resolved_id) == p)
        {
            return resolved_id;
        }

        // Split the id into its parent part and child part.
        //
        // Note: can't just use the placeholder's parent, as the
        // placeholder id might contain dots.
        std::size_t child_start = resolved_id.rfind('.');
        std::string parent_id, base_id;

        if (child_start == std::string::npos) {
            base_id = normalize_id(resolved_id, max_size - 1);
        }
        else {
            parent_id = resolved_id.substr(0, child_start + 1);
            base_id = normalize_id(resolved_id.substr(child_start + 1),
                    max_size - 1);
        }

        // Since we're adding digits, don't want an id that ends in
        // a digit.

        std::string::size_type length = base_id.size();

        if (length > 0 && std::isdigit(base_id[length - 1])) {
            if (length < max_size - 1) {
                base_id += '_';
                ++length;
            }
            else {
                while (length > 0 && std::isdigit(base_id[length -1]))
                    --length;
                base_id.erase(length);
            }
        }

        unsigned count = 0;

        for (;;)
        {
            std::string postfix =
                boost::lexical_cast<std::string>(count++);

            if ((base_id.size() + postfix.size()) > max_size) {
                // The id is now too long, so reduce the length and
                // start again.

                // Would need a lot of ids to get this far....
                if (length == 0) throw std::runtime_error("Too many ids");

                // Trim a character.
                --length;

                // Trim any trailing digits.
                while (length > 0 && std::isdigit(base_id[length -1]))
                    --length;

                base_id.erase(length);
                count = 0;
            }
            else {
                // Try to reserve this id.
                std::string generated_id = parent_id + base_id + postfix;

                if (chosen_ids.emplace(generated_id, p).second) {
                    return generated_id;
                }
            }
        }
    }

    //
    // replace_ids
    //
    // Return a copy of the xml with all the placeholders replaced by
    // generated_ids.
    //

    struct replace_ids_callback : xml_processor::callback
    {
        document_state_impl const& state;
        std::vector<std::string> const* ids;
        string_iterator source_pos;
        std::string result;

        replace_ids_callback(document_state_impl const& state_,
                std::vector<std::string> const* ids_)
          : state(state_),
            ids(ids_),
            source_pos(),
            result()
        {}

        void start(quickbook::string_view xml)
        {
            source_pos = xml.begin();
        }

        void id_value(quickbook::string_view value)
        {
            if (id_placeholder const* p = state.get_placeholder(value))
            {
                quickbook::string_view id = ids ?
                    (*ids)[p->index] : p->unresolved_id;

                result.append(source_pos, value.begin());
                result.append(id.begin(), id.end());
                source_pos = value.end();
            }
        }

        void finish(quickbook::string_view xml)
        {
            result.append(source_pos, xml.end());
            source_pos = xml.end();
        }
    };

    std::string replace_ids(document_state_impl const& state, quickbook::string_view xml,
            std::vector<std::string> const* ids)
    {
        xml_processor processor;
        replace_ids_callback callback(state, ids);
        processor.parse(xml, callback);
        return callback.result;
    }

    //
    // normalize_id
    //
    // Normalizes generated ids.
    //

    std::string normalize_id(quickbook::string_view src_id)
    {
        return normalize_id(src_id, max_size);
    }

    std::string normalize_id(quickbook::string_view src_id, std::size_t size)
    {
        std::string id(src_id.begin(), src_id.end());

        std::size_t src = 0;
        std::size_t dst = 0;

        while (src < id.length() && id[src] == '_') {
            ++src;
        }

        if (src == id.length()) {
            id = "_";
        }
        else {
            while (src < id.length() && dst < size) {
                if (id[src] == '_') {
                    do {
                        ++src;
                    } while(src < id.length() && id[src] == '_');

                    if (src < id.length()) id[dst++] = '_';
                }
                else {
                    id[dst++] = id[src++];
                }
            }

            id.erase(dst);
        }

        return id;
    }
}