summaryrefslogtreecommitdiff
path: root/glib/gstringchunk.c
blob: 1c40a488d51fa986c1cc4ab22db34f0a4583ebca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
/* GLIB - Library of useful routines for C programming
 * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

/*
 * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
 * file for a list of people on the GLib Team.  See the ChangeLog
 * files for a list of changes.  These files are distributed with
 * GLib at ftp://ftp.gtk.org/pub/gtk/.
 */

/*
 * MT safe
 */

#include "config.h"

#include <string.h>

#include "gstringchunk.h"

#include "ghash.h"
#include "gslist.h"
#include "gmessages.h"

#include "gutils.h"

/**
 * SECTION:string_chunks
 * @title: String Chunks
 * @short_description: efficient storage of groups of strings
 *
 * String chunks are used to store groups of strings. Memory is
 * allocated in blocks, and as strings are added to the #GStringChunk
 * they are copied into the next free position in a block. When a block
 * is full a new block is allocated.
 *
 * When storing a large number of strings, string chunks are more
 * efficient than using g_strdup() since fewer calls to malloc() are
 * needed, and less memory is wasted in memory allocation overheads.
 *
 * By adding strings with g_string_chunk_insert_const() it is also
 * possible to remove duplicates.
 *
 * To create a new #GStringChunk use g_string_chunk_new().
 *
 * To add strings to a #GStringChunk use g_string_chunk_insert().
 *
 * To add strings to a #GStringChunk, but without duplicating strings
 * which are already in the #GStringChunk, use
 * g_string_chunk_insert_const().
 *
 * To free the entire #GStringChunk use g_string_chunk_free(). It is
 * not possible to free individual strings.
 */

/**
 * GStringChunk:
 *
 * An opaque data structure representing String Chunks.
 * It should only be accessed by using the following functions.
 */
struct _GStringChunk
{
  GHashTable *const_table;
  GSList     *storage_list;
  gsize       storage_next;
  gsize       this_size;
  gsize       default_size;
};

#define MY_MAXSIZE ((gsize)-1)

static inline gsize
nearest_power (gsize base,
               gsize num)
{
  if (num > MY_MAXSIZE / 2)
    {
      return MY_MAXSIZE;
    }
  else
    {
      gsize n = base;

      while (n < num)
        n <<= 1;

      return n;
    }
}

/**
 * g_string_chunk_new:
 * @size: the default size of the blocks of memory which are
 *     allocated to store the strings. If a particular string
 *     is larger than this default size, a larger block of
 *     memory will be allocated for it.
 *
 * Creates a new #GStringChunk.
 *
 * Returns: a new #GStringChunk
 */
GStringChunk *
g_string_chunk_new (gsize size)
{
  GStringChunk *new_chunk = g_new (GStringChunk, 1);
  gsize actual_size = 1;

  actual_size = nearest_power (1, size);

  new_chunk->const_table  = NULL;
  new_chunk->storage_list = NULL;
  new_chunk->storage_next = actual_size;
  new_chunk->default_size = actual_size;
  new_chunk->this_size    = actual_size;

  return new_chunk;
}

/**
 * g_string_chunk_free:
 * @chunk: a #GStringChunk
 *
 * Frees all memory allocated by the #GStringChunk.
 * After calling g_string_chunk_free() it is not safe to
 * access any of the strings which were contained within it.
 */
void
g_string_chunk_free (GStringChunk *chunk)
{
  GSList *tmp_list;

  g_return_if_fail (chunk != NULL);

  if (chunk->storage_list)
    {
      for (tmp_list = chunk->storage_list; tmp_list; tmp_list = tmp_list->next)
        g_free (tmp_list->data);

      g_slist_free (chunk->storage_list);
    }

  if (chunk->const_table)
    g_hash_table_destroy (chunk->const_table);

  g_free (chunk);
}

/**
 * g_string_chunk_clear:
 * @chunk: a #GStringChunk
 *
 * Frees all strings contained within the #GStringChunk.
 * After calling g_string_chunk_clear() it is not safe to
 * access any of the strings which were contained within it.
 *
 * Since: 2.14
 */
void
g_string_chunk_clear (GStringChunk *chunk)
{
  GSList *tmp_list;

  g_return_if_fail (chunk != NULL);

  if (chunk->storage_list)
    {
      for (tmp_list = chunk->storage_list; tmp_list; tmp_list = tmp_list->next)
        g_free (tmp_list->data);

      g_slist_free (chunk->storage_list);

      chunk->storage_list = NULL;
      chunk->storage_next = chunk->default_size;
      chunk->this_size    = chunk->default_size;
    }

  if (chunk->const_table)
      g_hash_table_remove_all (chunk->const_table);
}

/**
 * g_string_chunk_insert:
 * @chunk: a #GStringChunk
 * @string: the string to add
 *
 * Adds a copy of @string to the #GStringChunk.
 * It returns a pointer to the new copy of the string
 * in the #GStringChunk. The characters in the string
 * can be changed, if necessary, though you should not
 * change anything after the end of the string.
 *
 * Unlike g_string_chunk_insert_const(), this function
 * does not check for duplicates. Also strings added
 * with g_string_chunk_insert() will not be searched
 * by g_string_chunk_insert_const() when looking for
 * duplicates.
 *
 * Returns: a pointer to the copy of @string within
 *     the #GStringChunk
 */
gchar*
g_string_chunk_insert (GStringChunk *chunk,
                       const gchar  *string)
{
  g_return_val_if_fail (chunk != NULL, NULL);

  return g_string_chunk_insert_len (chunk, string, -1);
}

/**
 * g_string_chunk_insert_const:
 * @chunk: a #GStringChunk
 * @string: the string to add
 *
 * Adds a copy of @string to the #GStringChunk, unless the same
 * string has already been added to the #GStringChunk with
 * g_string_chunk_insert_const().
 *
 * This function is useful if you need to copy a large number
 * of strings but do not want to waste space storing duplicates.
 * But you must remember that there may be several pointers to
 * the same string, and so any changes made to the strings
 * should be done very carefully.
 *
 * Note that g_string_chunk_insert_const() will not return a
 * pointer to a string added with g_string_chunk_insert(), even
 * if they do match.
 *
 * Returns: a pointer to the new or existing copy of @string
 *     within the #GStringChunk
 */
gchar*
g_string_chunk_insert_const (GStringChunk *chunk,
                             const gchar  *string)
{
  char* lookup;

  g_return_val_if_fail (chunk != NULL, NULL);

  if (!chunk->const_table)
    chunk->const_table = g_hash_table_new (g_str_hash, g_str_equal);

  lookup = (char*) g_hash_table_lookup (chunk->const_table, (gchar *)string);

  if (!lookup)
    {
      lookup = g_string_chunk_insert (chunk, string);
      g_hash_table_insert (chunk->const_table, lookup, lookup);
    }

  return lookup;
}

/**
 * g_string_chunk_insert_len:
 * @chunk: a #GStringChunk
 * @string: bytes to insert
 * @len: number of bytes of @string to insert, or -1 to insert a
 *     nul-terminated string
 *
 * Adds a copy of the first @len bytes of @string to the #GStringChunk.
 * The copy is nul-terminated.
 *
 * Since this function does not stop at nul bytes, it is the caller's
 * responsibility to ensure that @string has at least @len addressable
 * bytes.
 *
 * The characters in the returned string can be changed, if necessary,
 * though you should not change anything after the end of the string.
 *
 * Return value: a pointer to the copy of @string within the #GStringChunk
 *
 * Since: 2.4
 */
gchar*
g_string_chunk_insert_len (GStringChunk *chunk,
                           const gchar  *string,
                           gssize        len)
{
  gssize size;
  gchar* pos;

  g_return_val_if_fail (chunk != NULL, NULL);

  if (len < 0)
    size = strlen (string);
  else
    size = len;

  if ((chunk->storage_next + size + 1) > chunk->this_size)
    {
      gsize new_size = nearest_power (chunk->default_size, size + 1);

      chunk->storage_list = g_slist_prepend (chunk->storage_list,
                                             g_new (gchar, new_size));

      chunk->this_size = new_size;
      chunk->storage_next = 0;
    }

  pos = ((gchar *) chunk->storage_list->data) + chunk->storage_next;

  *(pos + size) = '\0';

  memcpy (pos, string, size);

  chunk->storage_next += size + 1;

  return pos;
}