17
18
* along with this program; if not, write to the Free Software
18
19
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
26
#include "char_table.h"
26
34
Tchar_entity ascii_charset[] = {
27
{34, """}, {38, "&"}, {60, "<"}, {62, ">"}, {0, NULL}
30
/* Do not modify this table */
31
/* For convert iso to html to work properly */
42
/* Do not modify this table
43
* for convert_unichar_to_htmlstring to work properly
33
46
Tchar_entity iso8859_1_charset[] = {
34
{160, " "}, {161, "¡"}, {162, "¢"}, {163, "£"},
35
{164, "¤"}, {165, "¥"}, {166, "¦"}, {167, "§"},
36
{168, "¨"}, {169, "©"}, {170, "ª"}, {171, "«"},
37
{172, "¬"}, {173, "­"}, {174, "®"}, {175, "¯"},
38
{176, "°"}, {177, "±"}, {178, "²"}, {179, "³"},
39
{180, "´"}, {181, "µ"}, {182, "¶"}, {183, "·"},
40
{184, "¸"}, {185, "¹"}, {186, "º"}, {187,"»"},
41
{188, "¼"}, {189, "½"}, {190, "¾"}, {191,"¿"},
42
{192, "À"}, {193, "Á"}, {194, "Â"}, {195, "Ã"},
43
{196, "Ä"}, {197, "Å"}, {198, "Æ"}, {199, "Ç"},
44
{200, "È"}, {201, "É"}, {202, "Ê"}, {203,
46
{204, "Ì"}, {205, "Í"}, {206, "Î"}, {207,
48
{208, "Ð"}, {209, "Ñ"}, {210, "Ò"}, {211,
50
{212, "Ô"}, {213, "Õ"}, {214, "Ö"}, {215,
52
{216, "Ø"}, {217, "Ù"}, {218, "Ú"}, {219,
54
{220, "Ü"}, {221, "Ý"}, {222, "Þ"}, {223,
56
{224, "à"}, {225, "á"}, {226, "â"}, {227,
58
{228, "ä"}, {229, "å"}, {230, "æ"}, {231,
60
{232, "è"}, {233, "é"}, {234, "ê"}, {235,
62
{236, "ì"}, {237, "í"}, {238, "î"}, {239,
64
{240, "ð"}, {241, "ñ"}, {242, "ò"}, {243,
66
{244, "ô"}, {245, "õ"}, {246, "ö"}, {247,
68
{248, "ø"}, {249, "ù"}, {250, "ú"}, {251,
70
{252, "ü"}, {253, "ý"}, {254, "þ"}, {255,
75
gboolean isalpha_iso(unsigned char c)
76
/* test for iso-8859-1 alphabetical characters */
78
/* Fixme: is character 223 non alpha ? */
79
DEBUG_MSG("Testing if %c(code %i) is alpha iso \n",c,c);
80
return((c>=192) && (c!=215) && (c!=222) && (c!=223) && (c!=247) &&(c!=254));
83
/* Convert a speciall character from html to iso_8859_1 or ascii */
84
/* If charset = ANY_CHAR_SET look up in both tables */
85
/* If convertion fails, returns '\0' */
87
gchar convert_from_html_chars (char* character ,Tchar_entity charset[])
91
DEBUG_MSG("running convert_form_html_chars\n");
92
DEBUG_MSG("character=\" %s \" \n",character);
94
if (charset==ANY_CHAR_SET)
96
c = convert_from_html_chars(character,iso8859_1_charset);
100
return(convert_from_html_chars(character,ascii_charset));
104
DEBUG_MSG("Character to convert= \" %s \" \n",character);
105
while (charset[j].entity != NULL)
107
if (strcmp(charset[j].entity,character)==0)
109
DEBUG_MSG("match \"%s\"\n",charset[j].entity);
110
DEBUG_MSG("converted character='%c' \n", charset[j].id);
111
return (charset[j].id);
116
return ('\0'); /* if could not be converted */
120
gchar* convert_char_iso_to_html (unsigned char c)
121
/* also converts ascii chars */
124
DEBUG_MSG("Converting iso char '%c' to html \n",c);
126
return(iso8859_1_charset[c-160].entity);
129
if (ascii_charset[i].id==c)
130
return(ascii_charset[i].entity);
131
DEBUG_MSG("Unconverted\n");
132
return(NULL); /* if cannot be converted */
135
gchar* convert_string_iso_to_html (gchar* string)
137
gchar* converted_string;
138
gchar* converted_char;
140
DEBUG_MSG("Converting string \"%s\" from iso to html\n",string);
141
converted_string = g_malloc(8*strlen(string));
142
/* for the converted string we need at most 8 times the original length
143
This function is designed to save time , not memory */
144
p = converted_string;
145
while (*string !='\0')
147
converted_char = convert_char_iso_to_html(*string);
148
if (converted_char==NULL)
155
DEBUG_MSG("Converted char: %s\n",converted_char);
156
while (*converted_char !='\0')
158
*p = * converted_char;
166
DEBUG_MSG("Converted string:\"%s\" \n",converted_string);
167
return(converted_string);
183
static void convert_unichar_to_htmlstring(gunichar unichar, gchar *deststring, gboolean ascii, gboolean iso) {
186
while (ascii_charset[j].id != 0) {
187
if (ascii_charset[j].id == unichar) {
189
strncat(deststring, ascii_charset[j].entity, 8);
196
if (unichar >= 160 && unichar < 256) {
198
strncat(deststring, iso8859_1_charset[unichar - 160].entity, 8);
203
gint len= g_unichar_to_utf8(unichar, deststring);
204
deststring[len] = '\0';
208
/* utf8string MUST BE VALIDATED UTF8 otherwise this function is broken!!
209
so text from the TextBuffer is OK to use */
210
gchar *convert_string_utf8_to_html(const gchar *utf8string, gboolean ascii, gboolean iso) {
211
if (!utf8string || utf8string[0] == '\0' || (!ascii && !iso)) {
212
return g_strdup(utf8string);
214
/* optimize for speed, not for memory usage because that is very temporary */
215
gchar *converted_string = g_malloc0(8 * strlen(utf8string)*sizeof(gchar));
216
const gchar *srcp = utf8string;
217
gunichar unichar = g_utf8_get_char(srcp);
218
DEBUG_MSG("convert_string_utf8_to_html, utf8string='%s'\n", utf8string);
221
convert_unichar_to_htmlstring(unichar, converted, ascii, iso);
222
converted_string = strncat(converted_string, converted, 8);
223
srcp = g_utf8_next_char(srcp);
224
unichar = g_utf8_get_char (srcp);
226
DEBUG_MSG("convert_string_utf8_to_html, converted string='%s'\n", converted_string);
227
return converted_string;