~ubuntu-branches/ubuntu/natty/bluefish/natty-proposed

« back to all changes in this revision

Viewing changes to src/char_table.c

  • Committer: Bazaar Package Importer
  • Author(s): Davide Puricelli (evo)
  • Date: 2005-04-23 17:05:18 UTC
  • mfrom: (1.2.1 upstream) (2.1.1 warty)
  • Revision ID: james.westby@ubuntu.com-20050423170518-pb8zi3vg32cm6g04
Tags: 1.0-1
* Acknowledge NMU, thanks Leo; closes: #291222.
* Updated debian/ files, thanks Daniel Leidert. 

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/* Bluefish HTML Editor
2
2
 * char_table.h - character convertion prototypes
3
3
 *
4
 
 * Copyright (C) 2000 Olivier Sessink & Pablo De Napoli (for this module)
 
4
 * Complete rewrite for UTF8 Copyright (C) 2002 Olivier Sessink
 
5
 * some ideas from original version Copyright (C) 2000 Pablo De Napoli
5
6
 *
6
7
 * This program is free software; you can redistribute it and/or modify
7
8
 * it under the terms of the GNU General Public License as published by
17
18
 * along with this program; if not, write to the Free Software
18
19
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
20
*/
 
21
/* #define DEBUG */
 
22
 
 
23
#include <string.h>
 
24
#include <gtk/gtk.h>
20
25
 
21
26
#include "char_table.h"
22
 
#include "string.h"
23
 
#include "debug.h"
 
27
#include "bluefish.h"
24
28
 
 
29
typedef struct {
 
30
        gunichar id;
 
31
        char *entity;
 
32
} Tchar_entity;
25
33
 
26
34
Tchar_entity ascii_charset[] = {
27
 
        {34, "&quot;"}, {38, "&amp;"}, {60, "&lt;"}, {62, "&gt;"}, {0, NULL}
 
35
        {34, "&quot;"}
 
36
        , {38, "&amp;"}
 
37
        , {60, "&lt;"}
 
38
        , {62, "&gt;"}
 
39
        , {0, NULL}
28
40
};
29
41
 
30
 
/* Do not modify this table */
31
 
/* For convert iso to html to work properly */
 
42
/* Do not modify this table
 
43
 * for convert_unichar_to_htmlstring to work properly 
 
44
 */
32
45
 
33
46
Tchar_entity iso8859_1_charset[] = {
34
 
         {160, "&nbsp;"}, {161, "&iexcl;"}, {162, "&cent;"}, {163, "&pound;"},
35
 
                {164, "&curren;"}, {165, "&yen;"}, {166, "&brvbar;"}, {167, "&sect;"},
36
 
                {168, "&uml;"}, {169, "&copy;"}, {170, "&ordf;"}, {171, "&laquo;"},
37
 
                {172, "&not;"}, {173, "&shy;"}, {174, "&reg;"}, {175, "&macr;"},
38
 
                {176, "&deg;"}, {177, "&plusmn;"}, {178, "&sup2;"}, {179, "&sup3;"},
39
 
                {180, "&acute;"}, {181, "&micro;"}, {182, "&para;"}, {183, "&middot;"},
40
 
                {184, "&cedil;"}, {185, "&sup1;"}, {186, "&ordm;"}, {187,"&raquo;"},
41
 
                {188, "&frac14;"}, {189, "&frac12;"}, {190, "&frac34;"}, {191,"&iquest;"},
42
 
                {192, "&Agrave;"}, {193, "&Aacute;"}, {194, "&Acirc;"}, {195, "&Atilde;"},
43
 
                {196, "&Auml;"}, {197, "&Aring;"}, {198, "&AElig;"}, {199,  "&Ccedil;"},
44
 
                {200, "&Egrave;"}, {201, "&Eacute;"}, {202, "&Ecirc;"}, {203,
45
 
                                                                                                                                 "&Euml;"},
46
 
                {204, "&Igrave;"}, {205, "&Iacute;"}, {206, "&Icirc;"}, {207,
47
 
                                                                                                                                 "&Iuml;"},
48
 
                {208, "&ETH;"}, {209, "&Ntilde;"}, {210, "&Ograve;"}, {211,
49
 
                                                                                                                           "&Oacute;"},
50
 
                {212, "&Ocirc;"}, {213, "&Otilde;"}, {214, "&Ouml;"}, {215,
51
 
                                                                                                                           "&times;"},
52
 
                {216, "&Oslash;"}, {217, "&Ugrave;"}, {218, "&Uacute;"}, {219,
53
 
                                                                                                                                  "&Ucirc;"},
54
 
                {220, "&Uuml;"}, {221, "&Yacute;"}, {222, "&THORN;"}, {223,
55
 
                                                                                                                           "&szlig;"},
56
 
                {224, "&agrave;"}, {225, "&aacute;"}, {226, "&acirc;"}, {227,
57
 
                                                                                                                                 "&atilde;"},
58
 
                {228, "&auml;"}, {229, "&aring;"}, {230, "&aelig;"}, {231,
59
 
                                                                                                                          "&ccedil;"},
60
 
                {232, "&egrave;"}, {233, "&eacute;"}, {234, "&ecirc;"}, {235,
61
 
                                                                                                                                 "&euml;"},
62
 
                {236, "&igrave;"}, {237, "&iacute;"}, {238, "&icirc;"}, {239,
63
 
                                                                                                                                 "&iuml;"},
64
 
                {240, "&eth;"}, {241, "&ntilde;"}, {242, "&ograve;"}, {243,
65
 
                                                                                                                           "&oacute;"},
66
 
                {244, "&ocirc;"}, {245, "&otilde;"}, {246, "&ouml;"}, {247,
67
 
                                                                                                                           "&divide;"},
68
 
                {248, "&oslash;"}, {249, "&ugrave;"}, {250, "&uacute;"}, {251,
69
 
                                                                                                                                  "&ucirc;"},
70
 
                {252, "&uuml;"}, {253, "&yacute;"}, {254, "&thorn;"}, {255,
71
 
                                                                                                                           "&yuml;"}
 
47
        {160, "&nbsp;"}
 
48
        , {161, "&iexcl;"}
 
49
        , {162, "&cent;"}
 
50
        , {163, "&pound;"}
 
51
        ,
 
52
        {164, "&curren;"}
 
53
        , {165, "&yen;"}
 
54
        , {166, "&brvbar;"}
 
55
        , {167, "&sect;"}
 
56
        ,
 
57
        {168, "&uml;"}
 
58
        , {169, "&copy;"}
 
59
        , {170, "&ordf;"}
 
60
        , {171, "&laquo;"}
 
61
        ,
 
62
        {172, "&not;"}
 
63
        , {173, "&shy;"}
 
64
        , {174, "&reg;"}
 
65
        , {175, "&macr;"}
 
66
        ,
 
67
        {176, "&deg;"}
 
68
        , {177, "&plusmn;"}
 
69
        , {178, "&sup2;"}
 
70
        , {179, "&sup3;"}
 
71
        ,
 
72
        {180, "&acute;"}
 
73
        , {181, "&micro;"}
 
74
        , {182, "&para;"}
 
75
        , {183, "&middot;"}
 
76
        ,
 
77
        {184, "&cedil;"}
 
78
        , {185, "&sup1;"}
 
79
        , {186, "&ordm;"}
 
80
        , {187, "&raquo;"}
 
81
        ,
 
82
        {188, "&frac14;"}
 
83
        , {189, "&frac12;"}
 
84
        , {190, "&frac34;"}
 
85
        , {191, "&iquest;"}
 
86
        ,
 
87
        {192, "&Agrave;"}
 
88
        , {193, "&Aacute;"}
 
89
        , {194, "&Acirc;"}
 
90
        , {195, "&Atilde;"}
 
91
        ,
 
92
        {196, "&Auml;"}
 
93
        , {197, "&Aring;"}
 
94
        , {198, "&AElig;"}
 
95
        , {199, "&Ccedil;"}
 
96
        ,
 
97
        {200, "&Egrave;"}
 
98
        , {201, "&Eacute;"}
 
99
        , {202, "&Ecirc;"}
 
100
        , {203,
 
101
           "&Euml;"}
 
102
        ,
 
103
        {204, "&Igrave;"}
 
104
        , {205, "&Iacute;"}
 
105
        , {206, "&Icirc;"}
 
106
        , {207,
 
107
           "&Iuml;"}
 
108
        ,
 
109
        {208, "&ETH;"}
 
110
        , {209, "&Ntilde;"}
 
111
        , {210, "&Ograve;"}
 
112
        , {211,
 
113
           "&Oacute;"}
 
114
        ,
 
115
        {212, "&Ocirc;"}
 
116
        , {213, "&Otilde;"}
 
117
        , {214, "&Ouml;"}
 
118
        , {215,
 
119
           "&times;"}
 
120
        ,
 
121
        {216, "&Oslash;"}
 
122
        , {217, "&Ugrave;"}
 
123
        , {218, "&Uacute;"}
 
124
        , {219,
 
125
           "&Ucirc;"}
 
126
        ,
 
127
        {220, "&Uuml;"}
 
128
        , {221, "&Yacute;"}
 
129
        , {222, "&THORN;"}
 
130
        , {223,
 
131
           "&szlig;"}
 
132
        ,
 
133
        {224, "&agrave;"}
 
134
        , {225, "&aacute;"}
 
135
        , {226, "&acirc;"}
 
136
        , {227,
 
137
           "&atilde;"}
 
138
        ,
 
139
        {228, "&auml;"}
 
140
        , {229, "&aring;"}
 
141
        , {230, "&aelig;"}
 
142
        , {231,
 
143
           "&ccedil;"}
 
144
        ,
 
145
        {232, "&egrave;"}
 
146
        , {233, "&eacute;"}
 
147
        , {234, "&ecirc;"}
 
148
        , {235,
 
149
           "&euml;"}
 
150
        ,
 
151
        {236, "&igrave;"}
 
152
        , {237, "&iacute;"}
 
153
        , {238, "&icirc;"}
 
154
        , {239,
 
155
           "&iuml;"}
 
156
        ,
 
157
        {240, "&eth;"}
 
158
        , {241, "&ntilde;"}
 
159
        , {242, "&ograve;"}
 
160
        , {243,
 
161
           "&oacute;"}
 
162
        ,
 
163
        {244, "&ocirc;"}
 
164
        , {245, "&otilde;"}
 
165
        , {246, "&ouml;"}
 
166
        , {247,
 
167
           "&divide;"}
 
168
        ,
 
169
        {248, "&oslash;"}
 
170
        , {249, "&ugrave;"}
 
171
        , {250, "&uacute;"}
 
172
        , {251,
 
173
           "&ucirc;"}
 
174
        ,
 
175
        {252, "&uuml;"}
 
176
        , {253, "&yacute;"}
 
177
        , {254, "&thorn;"}
 
178
        , {255,
 
179
           "&yuml;"}
72
180
        , {0, NULL}
73
181
};
74
182
 
75
 
gboolean isalpha_iso(unsigned char c)
76
 
/* test for iso-8859-1 alphabetical characters  */
77
 
{
78
 
 /* Fixme: is character 223 non alpha ? */
79
 
 DEBUG_MSG("Testing if %c(code %i) is alpha iso \n",c,c);
80
 
 return((c>=192) && (c!=215) && (c!=222) && (c!=223) && (c!=247) &&(c!=254));
81
 
}
82
 
 
83
 
/* Convert a speciall character from html to iso_8859_1 or ascii */
84
 
/* If charset = ANY_CHAR_SET look up in both tables */
85
 
/* If convertion fails, returns '\0' */
86
 
 
87
 
gchar convert_from_html_chars (char* character ,Tchar_entity charset[])
88
 
{
89
 
 gint j;
90
 
 gchar c;
91
 
 DEBUG_MSG("running convert_form_html_chars\n");
92
 
 DEBUG_MSG("character=\" %s \" \n",character);
93
 
 j=0;
94
 
 if (charset==ANY_CHAR_SET)
95
 
   {
96
 
     c = convert_from_html_chars(character,iso8859_1_charset);
97
 
     if (c !='\0')
98
 
       return c;
99
 
     else
100
 
       return(convert_from_html_chars(character,ascii_charset));
101
 
   }
102
 
 else
103
 
 {
104
 
 DEBUG_MSG("Character to convert= \" %s \" \n",character);
105
 
 while (charset[j].entity != NULL)
106
 
 {
107
 
   if (strcmp(charset[j].entity,character)==0)
108
 
     {
109
 
      DEBUG_MSG("match \"%s\"\n",charset[j].entity);
110
 
      DEBUG_MSG("converted character='%c' \n", charset[j].id);
111
 
      return (charset[j].id);
112
 
     }
113
 
   else
114
 
      j++;
115
 
 }
116
 
 return ('\0'); /* if could not be converted */
117
 
 }
118
 
}
119
 
 
120
 
gchar* convert_char_iso_to_html (unsigned char c)
121
 
/*  also converts ascii chars */
122
 
{
123
 
 gint i;
124
 
 DEBUG_MSG("Converting iso char '%c' to html \n",c);
125
 
 if (c>=160)
126
 
   return(iso8859_1_charset[c-160].entity);
127
 
 else
128
 
   for (i=0;i<3;i++)
129
 
      if (ascii_charset[i].id==c)
130
 
        return(ascii_charset[i].entity);
131
 
   DEBUG_MSG("Unconverted\n");
132
 
   return(NULL); /* if cannot be converted */
133
 
}
134
 
 
135
 
gchar* convert_string_iso_to_html (gchar* string)
136
 
{
137
 
 gchar* converted_string;
138
 
 gchar* converted_char;
139
 
 gchar* p;
140
 
 DEBUG_MSG("Converting string \"%s\" from iso to html\n",string);
141
 
 converted_string = g_malloc(8*strlen(string));
142
 
  /* for the converted string we need at most 8 times the original length
143
 
  This function is designed to save time , not memory */
144
 
  p = converted_string;
145
 
   while (*string !='\0')
146
 
   {
147
 
     converted_char = convert_char_iso_to_html(*string);
148
 
     if (converted_char==NULL)
149
 
       {
150
 
       *p = *string;
151
 
       p ++;
152
 
       }
153
 
     else
154
 
       {
155
 
       DEBUG_MSG("Converted char: %s\n",converted_char);
156
 
       while (*converted_char !='\0')
157
 
         {
158
 
          *p = * converted_char;
159
 
          p++;
160
 
          converted_char++;
161
 
         }
162
 
       };
163
 
     string++;
164
 
    }
165
 
   *p ='\0';
166
 
  DEBUG_MSG("Converted string:\"%s\" \n",converted_string);
167
 
  return(converted_string);
168
 
}
169
 
 
170
 
 
 
183
static void convert_unichar_to_htmlstring(gunichar unichar, gchar *deststring, gboolean ascii, gboolean iso) {
 
184
        if (ascii) {
 
185
                gint j=0;
 
186
                while (ascii_charset[j].id != 0) {
 
187
                        if (ascii_charset[j].id == unichar) {
 
188
                                deststring[0]='\0';
 
189
                                strncat(deststring, ascii_charset[j].entity, 8);
 
190
                                return;
 
191
                        }
 
192
                        j++;
 
193
                }
 
194
        }
 
195
        if (iso) {
 
196
                if (unichar >= 160 && unichar < 256) {
 
197
                        deststring[0]='\0';
 
198
                        strncat(deststring, iso8859_1_charset[unichar - 160].entity, 8);
 
199
                        return;
 
200
                }
 
201
        }
 
202
        {
 
203
                gint len= g_unichar_to_utf8(unichar, deststring);
 
204
                deststring[len] = '\0';
 
205
        }
 
206
}
 
207
 
 
208
/* utf8string MUST BE VALIDATED UTF8 otherwise this function is broken!!
 
209
so text from the TextBuffer is OK to use */
 
210
gchar *convert_string_utf8_to_html(const gchar *utf8string, gboolean ascii, gboolean iso) {
 
211
        if (!utf8string || utf8string[0] == '\0' || (!ascii && !iso)) {
 
212
                return g_strdup(utf8string);
 
213
        } else {
 
214
                /* optimize for speed, not for memory usage because that is very temporary */
 
215
                gchar *converted_string = g_malloc0(8 * strlen(utf8string)*sizeof(gchar));
 
216
                const gchar *srcp = utf8string;
 
217
                gunichar unichar = g_utf8_get_char(srcp);
 
218
                DEBUG_MSG("convert_string_utf8_to_html, utf8string='%s'\n", utf8string);
 
219
                while (unichar) {
 
220
                        gchar converted[9];
 
221
                        convert_unichar_to_htmlstring(unichar, converted, ascii, iso);
 
222
                        converted_string = strncat(converted_string, converted, 8);
 
223
                        srcp = g_utf8_next_char(srcp);
 
224
                        unichar = g_utf8_get_char (srcp);
 
225
                }
 
226
                DEBUG_MSG("convert_string_utf8_to_html, converted string='%s'\n", converted_string);
 
227
                return converted_string;
 
228
        }
 
229
}