21
21
//======================= FUNCTIONS ====================================================================
23
//error reporting functions
24
static void PP_ERROR(const char* fmt, ...) G_GNUC_PRINTF(1,2); //prints an error message
23
26
//xml pretty printing functions
24
27
static void putCharInBuffer(char charToAdd); //put a char into the new char buffer
25
static void putCharsInBuffer(char* charsToAdd); //put the chars into the new char buffer
28
static void putCharsInBuffer(const char* charsToAdd); //put the chars into the new char buffer
26
29
static void putNextCharsInBuffer(int nbChars); //put the next nbChars of the input buffer into the new buffer
27
static int readWhites(); //read the next whites into the input buffer
28
static char readNextChar(); //read the next char into the input buffer;
29
static char getNextChar(); //returns the next char but do not increase the input buffer index (use readNextChar for that)
30
static char getPreviousInsertedChar(); //returns the last inserted char into the new buffer
31
static gboolean isWhite(char c); //check if the specified char is a white
32
static gboolean isLineBreak(char c); //check if the specified char is a new line
33
static gboolean isQuote(char c); //check if the specified char is a quote (simple or double)
34
static int putNewLine(); //put a new line into the new char buffer with the correct number of whites (indentation)
35
static gboolean isInlineNodeAllowed(); //check if it is possible to have an inline node
36
static void resetBackwardIndentation(gboolean resetLineBreak); //reset the indentation for the current depth (just reset the index in fact)
30
static int readWhites(bool considerLineBreakAsWhite); //read the next whites into the input buffer
31
static char readNextChar(void); //read the next char into the input buffer;
32
static char getNextChar(void); //returns the next char but do not increase the input buffer index (use readNextChar for that)
33
static char getPreviousInsertedChar(void); //returns the last inserted char into the new buffer
34
static bool isWhite(char c); //check if the specified char is a white
35
static bool isSpace(char c); //check if the specified char is a space
36
static bool isLineBreak(char c); //check if the specified char is a new line
37
static bool isQuote(char c); //check if the specified char is a quote (simple or double)
38
static int putNewLine(void); //put a new line into the new char buffer with the correct number of whites (indentation)
39
static bool isInlineNodeAllowed(void); //check if it is possible to have an inline node
40
static bool isOnSingleLine(int skip, char stop1, char stop2); //check if the current node data is on one line (for inlining)
41
static void resetBackwardIndentation(bool resetLineBreak); //reset the indentation for the current depth (just reset the index in fact)
38
43
//specific parsing functions
39
static int processElements(); //returns the number of elements processed
40
static void processElementAttribute(); //process on attribute of a node
41
static void processElementAttributes(); //process all the attributes of a node
42
static void processHeader(); //process the header <?xml version="..." ?>
43
static void processNode(); //process an XML node
44
static void processTextNode(); //process a text node
45
static void processComment(); //process a comment
46
static void processCDATA(); //process a CDATA node
47
static void processDoctype(); //process a DOCTYPE node
48
static void processDoctypeElement(); //process a DOCTYPE ELEMENT node
44
static int processElements(void); //returns the number of elements processed
45
static void processElementAttribute(void); //process on attribute of a node
46
static void processElementAttributes(void); //process all the attributes of a node
47
static void processHeader(void); //process the header <?xml version="..." ?>
48
static void processNode(void); //process an XML node
49
static void processTextNode(void); //process a text node
50
static void processComment(void); //process a comment
51
static void processCDATA(void); //process a CDATA node
52
static void processDoctype(void); //process a DOCTYPE node
53
static void processDoctypeElement(void); //process a DOCTYPE ELEMENT node
51
static void printError(char *msg, ...); //just print a message like the printf method
52
static void printDebugStatus(); //just print some variables into the console for debugging
56
static void printError(const char *msg, ...) G_GNUC_PRINTF(1,2); //just print a message like the printf method
57
static void printDebugStatus(void); //just print some variables into the console for debugging
54
59
//============================================ PRIVATE PROPERTIES ======================================
65
70
static int inputBufferIndex; //input buffer index (position of the next char to read into the input string)
66
71
static int currentDepth; //current depth (for indentation)
67
72
static char* currentNodeName; //current node name
68
static gboolean appendIndentation; //if the indentation must be added (with a line break before)
69
static gboolean lastNodeOpen; //defines if the last action was a not opening or not
73
static bool appendIndentation; //if the indentation must be added (with a line break before)
74
static bool lastNodeOpen; //defines if the last action was a not opening or not
70
75
static PrettyPrintingOptions* options; //options of PrettyPrinting
72
77
//============================================ GENERAL FUNCTIONS =======================================
79
static void PP_ERROR(const char* fmt, ...)
84
vfprintf(stderr, fmt, va);
74
89
int processXMLPrettyPrinting(char** buffer, int* length, PrettyPrintingOptions* ppOptions)
76
91
//empty buffer, nothing to process
139
PrettyPrintingOptions* createDefaultPrettyPrintingOptions()
159
PrettyPrintingOptions* createDefaultPrettyPrintingOptions(void)
141
PrettyPrintingOptions* options = (PrettyPrintingOptions*)malloc(sizeof(PrettyPrintingOptions));
161
PrettyPrintingOptions* defaultOptions = (PrettyPrintingOptions*)malloc(sizeof(PrettyPrintingOptions));
162
if (defaultOptions == NULL)
144
g_error("Unable to allocate memory for PrettyPrintingOptions");
164
PP_ERROR("Unable to allocate memory for PrettyPrintingOptions");
148
options->newLineChars = "\r\n";
149
options->indentChar = ' ';
150
options->indentLength = 2;
151
options->oneLineText = TRUE;
152
options->inlineText = TRUE;
153
options->oneLineComment = TRUE;
154
options->inlineComment = TRUE;
155
options->oneLineCdata = TRUE;
156
options->inlineCdata = TRUE;
157
options->emptyNodeStripping = TRUE;
158
options->emptyNodeStrippingSpace = TRUE;
159
options->forceEmptyNodeSplit = FALSE;
160
options->trimLeadingWhites = TRUE;
161
options->trimTrailingWhites = TRUE;
168
defaultOptions->newLineChars = "\r\n";
169
defaultOptions->indentChar = ' ';
170
defaultOptions->indentLength = 2;
171
defaultOptions->oneLineText = FALSE;
172
defaultOptions->inlineText = TRUE;
173
defaultOptions->oneLineComment = FALSE;
174
defaultOptions->inlineComment = TRUE;
175
defaultOptions->oneLineCdata = FALSE;
176
defaultOptions->inlineCdata = TRUE;
177
defaultOptions->emptyNodeStripping = TRUE;
178
defaultOptions->emptyNodeStrippingSpace = TRUE;
179
defaultOptions->forceEmptyNodeSplit = FALSE;
180
defaultOptions->trimLeadingWhites = TRUE;
181
defaultOptions->trimTrailingWhites = TRUE;
182
defaultOptions->alignComment = TRUE;
183
defaultOptions->alignText = TRUE;
184
defaultOptions->alignCdata = TRUE;
186
return defaultOptions;
166
189
void putNextCharsInBuffer(int nbChars)
189
213
++xmlPrettyPrintedIndex;
192
void putCharsInBuffer(char* charsToAdd)
216
void putCharsInBuffer(const char* charsToAdd)
195
while (charsToAdd[index] != '\0')
218
int currentIndex = 0;
219
while (charsToAdd[currentIndex] != '\0')
197
putCharInBuffer(charsToAdd[index]);
221
putCharInBuffer(charsToAdd[currentIndex]);
202
char getPreviousInsertedChar()
226
char getPreviousInsertedChar(void)
204
228
return xmlPrettyPrinted[xmlPrettyPrintedIndex-1];
209
233
putCharsInBuffer(options->newLineChars);
210
234
int spaces = currentDepth*options->indentLength;
242
gboolean isQuote(char c)
244
if (c == '\'') return TRUE;
245
if (c == '"') return TRUE;
250
gboolean isWhite(char c)
252
if (c == ' ') return TRUE;
253
if (c == '\t') return TRUE;
254
if (c == '\r') return TRUE;
255
if (c == '\n') return TRUE;
260
gboolean isLineBreak(char c)
262
if (c == '\n') return TRUE;
263
if (c == '\r') return TRUE;
268
gboolean isInlineNodeAllowed()
276
return (isSpace(c) ||
286
bool isLineBreak(char c)
292
bool isInlineNodeAllowed(void)
270
294
//the last action was not an opening => inline not allowed
271
295
if (!lastNodeOpen) { return FALSE; }
274
298
int secondChar = inputBuffer[inputBufferIndex+1]; //should be '!'
275
299
int thirdChar = inputBuffer[inputBufferIndex+2]; //should be '-' or '['
277
int index = inputBufferIndex+1;
301
//loop through the content up to the next opening/closing node
302
int currentIndex = inputBufferIndex+1;
278
303
if (firstChar == '<')
280
305
//another node is being open ==> no inline !
281
306
if (secondChar != '!') { return FALSE; }
283
//okay we are in a comment node, so read until it is closed
308
//okay we are in a comment/cdata node, so read until it is closed
285
310
//select the closing char
286
311
char closingComment = '-';
289
314
//read until closing
290
315
char oldChar = ' ';
291
index += 3; //that by pass meanless chars
292
gboolean loop = TRUE;
316
currentIndex += 3; //that bypass meanless chars
295
char current = inputBuffer[index];
296
if (current == closingComment && oldChar == closingComment) { loop = FALSE; } //end of comment
320
char current = inputBuffer[currentIndex];
321
if (current == closingComment && oldChar == closingComment) { loop = FALSE; } //end of comment/cdata
297
322
oldChar = current;
301
326
//okay now avoid blanks
302
327
// inputBuffer[index] is now '>'
304
while (isWhite(inputBuffer[index])) { ++index; }
329
while (isWhite(inputBuffer[currentIndex])) { ++currentIndex; }
308
333
//this is a text node. Simply loop to the next '<'
309
while (inputBuffer[index] != '<') { ++index; }
334
while (inputBuffer[currentIndex] != '<') { ++currentIndex; }
312
337
//check what do we have now
313
char currentChar = inputBuffer[index];
338
char currentChar = inputBuffer[currentIndex];
314
339
if (currentChar == '<')
316
341
//check if that is a closing node
317
currentChar = inputBuffer[index+1];
342
currentChar = inputBuffer[currentIndex+1];
318
343
if (currentChar == '/')
320
345
//as we are in a correct XML (so far...), if the node is
321
//being directly close, the inline is allowed !!!
346
//being directly closed, the inline is allowed !!!
330
void resetBackwardIndentation(gboolean resetLineBreak)
355
bool isOnSingleLine(int skip, char stop1, char stop2)
357
int currentIndex = inputBufferIndex+skip; //skip the n first chars (in comment <!--)
358
bool onSingleLine = TRUE;
360
char oldChar = inputBuffer[currentIndex];
361
char currentChar = inputBuffer[currentIndex+1];
362
while(onSingleLine && oldChar != stop1 && currentChar != stop2)
364
onSingleLine = !isLineBreak(oldChar);
367
oldChar = currentChar;
368
currentChar = inputBuffer[currentIndex+1];
371
* A line break inside the node has been reached. But we should check
372
* if there is something before the end of the node (otherwise, there
373
* are only spaces and it may be wanted to be considered as a single
374
* line). //TODO externalize an option for that ?
378
while(oldChar != stop1 && currentChar != stop2)
380
//okay there is something else => this is not on one line
381
if (!isWhite(oldChar)) return FALSE;
384
oldChar = currentChar;
385
currentChar = inputBuffer[currentIndex+1];
388
//the end of the node has been reached with only whites. Then
389
//the node can be considered being one single line
397
void resetBackwardIndentation(bool resetLineBreak)
332
399
xmlPrettyPrintedIndex -= (currentDepth*options->indentLength);
333
400
if (resetLineBreak)
526
593
char* nodeName = (char*)malloc(sizeof(char)*nodeNameLength+1);
527
if (nodeName == NULL) { g_error("Allocation error"); }
594
if (nodeName == NULL) { PP_ERROR("Allocation error (node name length is %d)", nodeNameLength); return ; }
528
595
nodeName[nodeNameLength] = '\0';
530
597
for (i=0 ; i<nodeNameLength ; ++i)
532
int index = xmlPrettyPrintedIndex-nodeNameLength+i;
533
nodeName[i] = xmlPrettyPrinted[index];
599
int tempIndex = xmlPrettyPrintedIndex-nodeNameLength+i;
600
nodeName[i] = xmlPrettyPrinted[tempIndex];
536
603
currentNodeName = nodeName; //set the name for using in other methods
537
604
lastNodeOpen = TRUE;
539
606
//process the attributes
541
608
processElementAttributes();
543
610
//process the end of the tag
659
727
if (!isLineBreak(nextChar)) //the comment simply continues
661
putCharInBuffer(nextChar);
664
else if (!options->oneLineComment) //oh ! there is a line break
666
readWhites(); //strip the whites and new line
667
putNewLine(); //put a new indentation line
668
oldChar = ' '; //and update the last char
729
if (options->oneLineComment && isSpace(nextChar))
731
//removes all the unecessary spaces
732
while(isSpace(getNextChar()))
734
nextChar = readNextChar();
736
putCharInBuffer(' ');
741
//comment is left untouched
742
putCharInBuffer(nextChar);
670
//TODO manage relative spacing
746
if (!loop && options->alignComment) //end of comment
748
//ensures the chars preceding the first '-' are all spaces (there are at least
749
//5 spaces in front of the '-->' for the alignment with '<!--')
750
bool onlySpaces = xmlPrettyPrinted[xmlPrettyPrintedIndex-3] == ' ' &&
751
xmlPrettyPrinted[xmlPrettyPrintedIndex-4] == ' ' &&
752
xmlPrettyPrinted[xmlPrettyPrintedIndex-5] == ' ' &&
753
xmlPrettyPrinted[xmlPrettyPrintedIndex-6] == ' ' &&
754
xmlPrettyPrinted[xmlPrettyPrintedIndex-7] == ' ';
756
//if all the preceding chars are white, then go for replacement
759
xmlPrettyPrintedIndex -= 7; //remove indentation spaces
760
putCharsInBuffer("--"); //reset the first chars of '-->'
764
else if (!options->oneLineComment && !inlineAllowed) //oh ! there is a line break
766
//if the comments need to be aligned, just add 5 spaces
767
if (options->alignComment)
769
int read = readWhites(FALSE); //strip the whites and new line
770
if (nextChar == '\r' && read == 0 && getNextChar() == '\n') //handles the \r\n return line
776
putNewLine(); //put a new indentation line
777
putCharsInBuffer(" "); //align with <!--
778
oldChar = ' '; //and update the last char
782
putCharInBuffer(nextChar);
672
786
else //the comments must be inlined
674
readWhites(); //strip the whites and add a space if needed
675
if (getPreviousInsertedChar() != ' ')
788
readWhites(TRUE); //strip the whites and add a space if needed
789
if (getPreviousInsertedChar() != ' ' &&
790
strncmp(xmlPrettyPrinted+xmlPrettyPrintedIndex-4, "<!--", 4) != 0) //prevents adding a space at the beginning
677
792
putCharInBuffer(' ');
694
810
lastNodeOpen = FALSE;
697
void processTextNode()
813
void processTextNode(void)
699
815
//checks if inline is allowed
700
gboolean inlineTextAllowed = FALSE;
816
bool inlineTextAllowed = FALSE;
701
817
if (options->inlineText) { inlineTextAllowed = isInlineNodeAllowed(); }
702
if (inlineTextAllowed) { resetBackwardIndentation(TRUE); } //remove previous indentation
818
if (inlineTextAllowed && !options->oneLineText) { inlineTextAllowed = isOnSingleLine(0, '<', '/'); }
819
if (inlineTextAllowed || !options->alignText)
821
resetBackwardIndentation(TRUE); //remove previous indentation
822
if (!inlineTextAllowed) { putNewLine(); }
704
825
//the leading whites are automatically stripped. So we re-add it
705
826
if (!options->trimLeadingWhites)
707
828
int backwardIndex = inputBufferIndex-1;
708
while (inputBuffer[backwardIndex] == ' ' ||
709
inputBuffer[backwardIndex] == '\t')
829
while (isSpace(inputBuffer[backwardIndex]))
711
831
--backwardIndex; //backward rolling
730
850
char nextChar = readNextChar();
731
851
if (isLineBreak(nextChar))
734
853
if (options->oneLineText)
736
857
//as we can put text on one line, remove the line break
737
858
//and replace it by a space but only if the previous
738
859
//char wasn't a space
739
if (getPreviousInsertedChar() != ' ')
741
putCharInBuffer(' ');
860
if (getPreviousInsertedChar() != ' ') { putCharInBuffer(' '); }
862
else if (options->alignText)
864
int read = readWhites(FALSE);
865
if (nextChar == '\r' && read == 0 && getNextChar() == '\n') //handles the '\r\n'
867
nextChar = readNextChar();
746
871
//put a new line only if the closing tag is not reached
747
872
if (getNextChar() != '<')
879
putCharInBuffer(nextChar);
792
922
if (!isLineBreak(nextChar)) //the cdata simply continues
794
putCharInBuffer(nextChar);
797
else if (!options->oneLineCdata)
799
readWhites(); //strip the whites and new line
800
putNewLine(); //put a new indentation line
801
oldChar = ' '; //and update the last char
924
if (options->oneLineCdata && isSpace(nextChar))
926
//removes all the unecessary spaces
927
while(isSpace(nextChar2))
929
nextChar = readNextChar();
930
nextChar2 = getNextChar();
933
putCharInBuffer(' ');
938
//comment is left untouched
939
putCharInBuffer(nextChar);
803
//TODO manage relative spacing
943
if (!loop && options->alignCdata) //end of cdata
945
//ensures the chars preceding the first '-' are all spaces (there are at least
946
//10 spaces in front of the ']]>' for the alignment with '<![CDATA[')
947
bool onlySpaces = xmlPrettyPrinted[xmlPrettyPrintedIndex-3] == ' ' &&
948
xmlPrettyPrinted[xmlPrettyPrintedIndex-4] == ' ' &&
949
xmlPrettyPrinted[xmlPrettyPrintedIndex-5] == ' ' &&
950
xmlPrettyPrinted[xmlPrettyPrintedIndex-6] == ' ' &&
951
xmlPrettyPrinted[xmlPrettyPrintedIndex-7] == ' ' &&
952
xmlPrettyPrinted[xmlPrettyPrintedIndex-8] == ' ' &&
953
xmlPrettyPrinted[xmlPrettyPrintedIndex-9] == ' ' &&
954
xmlPrettyPrinted[xmlPrettyPrintedIndex-10] == ' ' &&
955
xmlPrettyPrinted[xmlPrettyPrintedIndex-11] == ' ';
957
//if all the preceding chars are white, then go for replacement
960
xmlPrettyPrintedIndex -= 11; //remove indentation spaces
961
putCharsInBuffer("]]"); //reset the first chars of '-->'
965
else if (!options->oneLineCdata && !inlineAllowed) //line break
967
//if the cdata need to be aligned, just add 9 spaces
968
if (options->alignCdata)
970
int read = readWhites(FALSE); //strip the whites and new line
971
if (nextChar == '\r' && read == 0 && getNextChar() == '\n') //handles the \r\n return line
977
putNewLine(); //put a new indentation line
978
putCharsInBuffer(" "); //align with <![CDATA[
979
oldChar = ' '; //and update the last char
983
putCharInBuffer(nextChar);
805
987
else //cdata are inlined
807
readWhites(); //strip the whites and add a space if necessary
808
if(getPreviousInsertedChar() != ' ') { putCharInBuffer(' '); }
989
readWhites(TRUE); //strip the whites and add a space if necessary
990
if(getPreviousInsertedChar() != ' ' &&
991
strncmp(xmlPrettyPrinted+xmlPrettyPrintedIndex-9, "<![CDATA[", 9) != 0) //prevents adding a space at the beginning
993
putCharInBuffer(' ');
999
//if the cdata is inline, then all the trailing spaces are removed
1000
if (options->oneLineCdata)
1002
xmlPrettyPrintedIndex -= 2; //because of the last ']]' inserted
1003
while(isWhite(xmlPrettyPrinted[xmlPrettyPrintedIndex-1]))
1005
--xmlPrettyPrintedIndex;
1007
putCharsInBuffer("]]");
1010
//finalize the cdata
812
1011
char lastChar = readNextChar(); //should be '>'
813
1012
if (lastChar != '>')
889
void processDoctypeElement()
1088
void processDoctypeElement(void)
891
1090
printError("ELEMENT is currently not supported by PrettyPrinter\n");
892
1091
result = PRETTY_PRINTING_NOT_SUPPORTED_YET;
895
void printError(char *msg, ...)
1094
void printError(const char *msg, ...)
898
1097
va_start(va, msg);
1099
g_logv(G_LOG_DOMAIN, G_LOG_LEVEL_WARNING, msg, va);
1101
vfprintf(stderr, msg, va);
902
//TODO also do a fprintf on stderr ?
904
1106
printDebugStatus();
907
void printDebugStatus()
1109
void printDebugStatus(void)
909
1112
g_debug("\n===== INPUT =====\n%s\n=================\ninputLength = %d\ninputIndex = %d\noutputLength = %d\noutputIndex = %d\n",
911
1114
inputBufferLength,
912
1115
inputBufferIndex,
913
1116
xmlPrettyPrintedLength,
914
1117
xmlPrettyPrintedIndex);
1119
PP_ERROR("\n===== INPUT =====\n%s\n=================\ninputLength = %d\ninputIndex = %d\noutputLength = %d\noutputIndex = %d\n",
1123
xmlPrettyPrintedLength,
1124
xmlPrettyPrintedIndex);