2
* cong-random-document.c
4
* Plugin for testing service of various kinds. Not really intended for end-users.
6
* Copyright (C) 2005 David Malcolm
8
* Conglomerate is free software; you can redistribute it and/or
9
* modify it under the terms of the GNU General Public License as
10
* published by the Free Software Foundation; either version 2 of the
11
* License, or (at your option) any later version.
13
* Conglomerate is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
* General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
* Authors: David Malcolm <david@davemalcolm.demon.co.uk>
26
#include "cong-util.h"
28
#include "cong-dispspec.h"
29
#include "cong-dispspec-element.h"
30
#include "cong-dispspec-registry.h"
34
#define LOG_RANDOM1(x) (g_message ((x)))
35
#define LOG_RANDOM2(x, a) (g_message ((x), (a)))
36
#define LOG_RANDOM3(x, a, b) (g_message ((x), (a), (b)))
38
typedef struct RandomCreationInfo RandomCreationInfo;
39
struct RandomCreationInfo
41
CongDispspec *dispspec;
42
gboolean ensure_valid;
48
populate_element (RandomCreationInfo *rci,
54
* generate_bool_for_opt:
60
generate_bool_for_opt (RandomCreationInfo *rci)
64
return g_rand_boolean (rci->random);
68
* generate_count_for_mult:
74
generate_count_for_mult (RandomCreationInfo *rci)
78
return g_rand_int_range (rci->random,
84
* generate_count_for_plus:
90
generate_count_for_plus (RandomCreationInfo *rci)
94
return g_rand_int_range (rci->random,
100
* generate_count_for_ocur:
107
generate_count_for_ocur (RandomCreationInfo *rci,
108
xmlElementContentOccur ocur)
113
default: g_assert_not_reached ();
114
case XML_ELEMENT_CONTENT_ONCE:
117
case XML_ELEMENT_CONTENT_OPT:
118
return generate_bool_for_opt (rci)?1:0;
120
case XML_ELEMENT_CONTENT_MULT:
121
return generate_count_for_mult (rci);
123
case XML_ELEMENT_CONTENT_PLUS:
124
return generate_count_for_plus (rci);
131
cong_dtd_generate_source_for_content (xmlElementContentPtr content)
133
g_return_val_if_fail (content, NULL);
136
default: g_assert_not_reached ();
137
case XML_ELEMENT_CONTENT_ONCE:
140
case XML_ELEMENT_CONTENT_OPT:
141
return generate_bool_for_opt (rci)?1:0;
143
case XML_ELEMENT_CONTENT_MULT:
144
return generate_count_for_mult (rci);
146
case XML_ELEMENT_CONTENT_PLUS:
147
return generate_count_for_plus (rci);
160
random_unichar (RandomCreationInfo *rci)
162
/* FIXME: probably should have a smarter system... */
165
/* Have a high chance of spaces, to create word-breaking opportunities: */
166
if (0==g_rand_int_range (rci->random, 0, 10)) {
171
result = g_rand_int_range (rci->random, 1, 65535);
173
if (g_unichar_isdefined (result)) {
174
if (!g_unichar_iscntrl (result)) {
176
#define UNICODE_VALID(Char) \
177
((Char) < 0x110000 && \
178
(((Char) & 0xFFFFF800) != 0xD800) && \
179
((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
180
((Char) & 0xFFFE) != 0xFFFE)
182
if (UNICODE_VALID (result)) {
197
random_text (RandomCreationInfo *rci)
199
/* FIXME: should we translate the various strings in this function? */
200
switch (g_rand_int_range (rci->random, 0, 3)) {
201
default: g_assert_not_reached ();
203
return g_strdup ("the quick brown fox jumps over the lazy dog");
206
return g_strdup ("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.");
209
/* Generate an entirely random unicode string: */
211
#define MAX_LENGTH (50)
212
gint count = g_rand_int_range (rci->random, 1, MAX_LENGTH);
214
gunichar tmp_str[MAX_LENGTH+1];
217
for (i=0;i<count;i++) {
218
tmp_str[i]= random_unichar (rci);
222
utf8_text = g_ucs4_to_utf8 (tmp_str,
228
if (g_utf8_validate (utf8_text, -1, NULL)) {
232
return g_strdup ("fubar");
240
populate_element_from_content (RandomCreationInfo *rci,
244
xmlElementContentPtr content)
251
count = generate_count_for_ocur (rci, content->ocur);
255
gchar *frag = cong_dtd_generate_source_for_content (content);
256
g_message ("got count of %i for %s", count, frag);
261
for (i=0;i<count;i++) {
262
switch (content->type) {
263
default: g_assert_not_reached ();
264
case XML_ELEMENT_CONTENT_PCDATA:
266
gchar *text = random_text (rci);
267
xmlNodePtr child_node = xmlNewDocText (xml_doc,
268
(const xmlChar*)text);
271
xmlAddChild (xml_node,
275
case XML_ELEMENT_CONTENT_ELEMENT:
277
xmlNodePtr child_node = xmlNewDocNode (xml_doc,
280
(const xmlChar*)""); /* FIXME: namespace? */
281
xmlAddChild (xml_node,
283
populate_element (rci,
289
case XML_ELEMENT_CONTENT_SEQ:
290
/* Do both c1 and c2 in sequence: */
291
populate_element_from_content (rci,
296
populate_element_from_content (rci,
302
case XML_ELEMENT_CONTENT_OR:
303
/* Do one of c1 or c2: */
304
if (generate_bool_for_opt (rci)) {
305
populate_element_from_content (rci,
311
populate_element_from_content (rci,
323
populate_element_from_dtd (RandomCreationInfo *rci,
327
xmlElementPtr element)
334
switch (element->etype) {
335
default: g_assert_not_reached ();
336
case XML_ELEMENT_TYPE_UNDEFINED:
337
case XML_ELEMENT_TYPE_EMPTY:
341
case XML_ELEMENT_TYPE_ANY:
344
case XML_ELEMENT_TYPE_MIXED:
347
case XML_ELEMENT_TYPE_ELEMENT:
351
if (element->content) {
352
populate_element_from_content (rci,
361
/* FIXME: set up attributes! */
365
populate_element (RandomCreationInfo *rci,
374
LOG_RANDOM3 ("populate_element (below <%s>, %i)", xml_node->name, depth);
378
/* Stop if we've reached the maximum depth */
379
if (depth>=rci->depth) {
384
if (xml_doc->extSubset) {
385
xmlElementPtr element = cong_dtd_element_get_element_for_node (xml_doc->extSubset,
388
populate_element_from_dtd (rci,
397
/* No DTD information was available for this node; randomly add content: */
403
guint num_elements = cong_dispspec_get_num_elements (rci->dispspec);
405
child_count = g_rand_int_range (rci->random,
409
for (i=0;i<child_count;i++) {
410
CongDispspecElement* ds_element;
411
xmlNodePtr child_node;
413
ds_element = cong_dispspec_get_element (rci->dispspec,
414
g_rand_int_range (rci->random,
417
g_assert (ds_element);
419
child_node = xmlNewDocNode (xml_doc,
421
(const xmlChar*)cong_dispspec_element_get_local_name (ds_element),
423
if (cong_dispspec_element_get_ns_uri (ds_element)) {
424
xmlNsPtr xml_ns = xmlNewNs (child_node,
425
(const xmlChar*)cong_dispspec_element_get_ns_uri (ds_element),
427
xmlSetNs (child_node,
431
xmlAddChild (xml_node,
434
populate_element (rci,
443
cong_make_random_doc (CongDispspec *dispspec,
444
gboolean ensure_valid,
447
RandomCreationInfo rci;
449
xmlNodePtr root_node;
450
const CongExternalDocumentModel* dtd_model;
451
CongDispspecElement *ds_element_root;
452
const gchar *root_element;
454
g_return_val_if_fail (dispspec, NULL);
456
rci.dispspec = dispspec;
457
rci.ensure_valid = ensure_valid;
459
rci.random = g_rand_new ();
461
ds_element_root = cong_dispspec_get_first_element (dispspec); /* FIXME */
462
g_assert (ds_element_root);
464
root_element = cong_dispspec_element_get_local_name (ds_element_root);
465
g_assert (root_element);
467
dtd_model = cong_dispspec_get_external_document_model (dispspec,
468
CONG_DOCUMENT_MODE_TYPE_DTD);
470
xml_doc = xmlNewDoc ((const xmlChar*)"1.0");
472
root_node = xmlNewDocNode (xml_doc,
473
NULL, /* xmlNsPtr ns, */
474
(const xmlChar*)root_element,
476
if (cong_dispspec_element_get_ns_uri (ds_element_root)) {
477
xmlNsPtr xml_ns = xmlNewNs (root_node,
478
(const xmlChar*)cong_dispspec_element_get_ns_uri (ds_element_root),
483
xmlDocSetRootElement (xml_doc,
487
cong_util_add_external_dtd (xml_doc,
489
cong_external_document_model_get_public_id (dtd_model),
490
cong_external_document_model_get_system_id (dtd_model));
493
populate_element (&rci,