976
976
idx = html_text_get_item_index (text, painter, offset, &offset);
977
977
if (need_ascent_descent) {
978
update_asc_dsc (painter, pi->entries [idx].glyph_item.item, &ascent, &descent);
979
font = pi->entries [idx].glyph_item.item->analysis.font;
980
language = pi->entries [idx].glyph_item.item->analysis.language;
978
update_asc_dsc (painter, pi->entries[idx].glyph_item.item, &ascent, &descent);
979
font = pi->entries[idx].glyph_item.item->analysis.font;
980
language = pi->entries[idx].glyph_item.item->analysis.language;
982
982
while (len > 0) {
985
985
if (*s == '\t') {
986
986
gint skip = 8 - (line_offset % 8);
987
width += skip*pi->entries [idx].widths [offset];
987
width += skip*pi->entries[idx].widths[offset];
988
988
line_offset += skip;
990
width += pi->entries [idx].widths [offset];
990
width += pi->entries[idx].widths[offset];
996
996
if (html_text_pi_forward (pi, &idx, &offset) && idx != old_idx)
997
if (len > 0 && (need_ascent_descent) && (pi->entries [idx].glyph_item.item->analysis.font != font
998
|| pi->entries [idx].glyph_item.item->analysis.language != language)) {
999
update_asc_dsc (painter, pi->entries [idx].glyph_item.item, &ascent, &descent);
997
if (len > 0 && (need_ascent_descent) && (pi->entries[idx].glyph_item.item->analysis.font != font
998
|| pi->entries[idx].glyph_item.item->analysis.language != language)) {
999
update_asc_dsc (painter, pi->entries[idx].glyph_item.item, &ascent, &descent);
1002
1002
s = g_utf8_next_char (s);
1435
1435
if (text->pi && text->pi->attrs)
1436
1436
html_text_remove_unwanted_line_breaks (text->text, text->text_len, text->pi->attrs);
1438
for (i = 0, cur = items; i < text->pi->n; i ++, cur = cur->next)
1439
text->pi->entries [i].glyph_item.item = (PangoItem *) cur->data;
1438
for (i = 0, cur = items; i < text->pi->n; i++, cur = cur->next)
1439
text->pi->entries[i].glyph_item.item = (PangoItem *) cur->data;
1441
for (i = 0; i < text->pi->n; i ++) {
1441
for (i = 0; i < text->pi->n; i++) {
1442
1442
PangoItem *item;
1443
1443
PangoGlyphString *glyphs;
1445
item = text->pi->entries [i].glyph_item.item;
1446
glyphs = text->pi->entries [i].glyph_item.glyphs = pango_glyph_string_new ();
1445
item = text->pi->entries[i].glyph_item.item;
1446
glyphs = text->pi->entries[i].glyph_item.glyphs = pango_glyph_string_new ();
1448
1448
/* printf ("item pos %d len %d\n", item->offset, item->length); */
1450
text->pi->entries [i].widths = g_new (PangoGlyphUnit, item->num_chars);
1451
if (text->text [item->offset] == '\t')
1450
text->pi->entries[i].widths = g_new (PangoGlyphUnit, item->num_chars);
1451
if (text->text[item->offset] == '\t')
1452
1452
html_text_shape_tab (text, glyphs);
1454
1454
pango_shape (text->text + item->offset, item->length, &item->analysis, glyphs);
1455
1455
html_tmp_fix_pango_glyph_string_get_logical_widths (glyphs, text->text + item->offset, item->length,
1456
item->analysis.level, text->pi->entries [i].widths);
1456
item->analysis.level, text->pi->entries[i].widths);
1459
1459
g_list_free (items);
1514
1514
if (html_text_pi_backward (pi, &ii, &io)) {
1515
1515
s = g_utf8_prev_char (s);
1517
if (pi->attrs [offset].is_white) {
1517
if (pi->attrs[offset].is_white) {
1518
1518
if (*s == '\t' && offset > 1) {
1519
1519
gint skip = 8, co = offset - 1;
1522
1522
s = g_utf8_prev_char (s);
1524
1524
if (*s != '\t')
1526
1526
} while (s && co > 0 && *s != '\t');
1528
ww += skip*pi->entries [ii].widths [io];
1528
ww += skip*pi->entries[ii].widths[io];
1530
ww += pi->entries [ii].widths [io];
1530
ww += pi->entries[ii].widths[io];
1571
1571
line_offset = html_text_get_line_offset (text, painter, 0);
1572
1572
s = text->text;
1573
1573
while (offset < text->text_len) {
1574
if (offset > 0 && html_text_is_line_break (pi->attrs [offset]))
1574
if (offset > 0 && html_text_is_line_break (pi->attrs[offset]))
1575
1575
update_mw (text, painter, offset, &last_offset, &ww, &mw, ii, io, s, line_offset);
1577
1577
if (*s == '\t') {
1578
1578
gint skip = 8 - (line_offset % 8);
1579
ww += skip*pi->entries [ii].widths [io];
1579
ww += skip*pi->entries[ii].widths[io];
1580
1580
line_offset += skip;
1582
ww += pi->entries [ii].widths [io];
1582
ww += pi->entries[ii].widths[io];
1586
1586
s = g_utf8_next_char (s);
1589
1589
html_text_pi_forward (pi, &ii, &io);
2874
2874
parent_class = &html_object_class;
2877
/* almost identical copy of glib's _g_utf8_make_valid() */
2878
offset_to_pointer_validated (const gchar *str, glong offset, gint *chars_out)
2879
_html_text_utf8_make_valid (const gchar *name, gint len)
2880
const gchar *s = str;
2885
gunichar wc = g_utf8_get_char_validated (s, -1);
2886
if (wc == (gunichar)-1 || wc == (gunichar)-2)
2888
s = g_utf8_next_char (s);
2882
const gchar *remainder, *invalid;
2883
gint remaining_bytes, valid_bytes, total_bytes;
2885
g_return_val_if_fail (name != NULL, NULL);
2890
remaining_bytes = strlen (name);
2893
while (offset-- && *s) {
2894
gunichar wc = g_utf8_get_char_validated (s, -1);
2895
if (wc == (gunichar)-1 || wc == (gunichar)-2)
2897
s = g_utf8_next_char (s);
2892
const gchar *start = name, *end = name;
2895
gunichar uc = g_utf8_get_char_validated (end, -1);
2897
if (uc == (gunichar) -2 || uc == (gunichar) -1) {
2899
} else if (uc == 0) {
2902
end = g_utf8_next_char (end);
2908
remaining_bytes = end - start;
2911
total_bytes = remaining_bytes;
2913
while (remaining_bytes != 0) {
2914
if (g_utf8_validate (remainder, remaining_bytes, &invalid))
2916
valid_bytes = invalid - remainder;
2919
string = g_string_sized_new (remaining_bytes);
2921
g_string_append_len (string, remainder, valid_bytes);
2922
/* append U+FFFD REPLACEMENT CHARACTER */
2923
g_string_append (string, "\357\277\275");
2925
remaining_bytes -= valid_bytes + 1;
2926
remainder = invalid + 1;
2930
return g_strndup (name, total_bytes);
2932
g_string_append (string, remainder);
2934
g_assert (g_utf8_validate (string->str, -1, NULL));
2936
return g_string_free (string, FALSE);
2908
2940
* html_text_sanitize:
2909
* @str: text string (in/out)
2941
* @str_in: text string to sanitize (in)
2942
* @str_out: newly allocated text string sanitized (out)
2910
2943
* @len: length of text, in characters (in/out). (A value of
2911
2944
* -1 on input means to use all characters in @str)
2913
* Validates a UTF-8 string up to the given number of characters;
2914
* if the string is invalid, on output, "[?]" will be stored in
2915
* @str and 3 in @len, otherwise @str will be left unchanged,
2916
* and @len will be left unchanged if non-negative, otherwise
2917
* replaced with the number of characters in @str.
2946
* Validates a UTF-8 string up to the given number of characters.
2919
2948
* Return value: number of bytes in the output value of @str
2922
html_text_sanitize (const gchar **str, gint *len)
2951
html_text_sanitize (const gchar *str_in, gchar **str_out, gint *len)
2926
g_return_val_if_fail (str != NULL, 0);
2953
g_return_val_if_fail (str_in != NULL, 0);
2954
g_return_val_if_fail (str_out != NULL, 0);
2927
2955
g_return_val_if_fail (len != NULL, 0);
2929
end = offset_to_pointer_validated (*str, *len, len);
2957
*str_out = _html_text_utf8_make_valid (str_in, *len);
2958
g_return_val_if_fail (*str_out != NULL, 0);
2960
*len = g_utf8_strlen (*str_out, -1);
2961
return strlen (*str_out);
3145
3167
typedef struct _HTMLMagicInsertMatch HTMLMagicInsertMatch;
3147
static HTMLMagicInsertMatch mim [] = {
3169
static HTMLMagicInsertMatch mim[] = {
3148
3170
/* prefixed expressions */
3149
3171
{ "(news|telnet|nntp|file|http|ftp|sftp|https|webcal)://([-a-z0-9]+(:[-a-z0-9]+)?@)?[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(([.])?/[-a-z0-9_$.+!*(),;:@%&=?/~#']*[^]'.}>\\) ,?!;:\"]?)?", NULL, NULL },
3150
3172
{ "(sip|h323|callto):([-_a-z0-9.'\\+]+(:[0-9]{1,5})?(/[-_a-z0-9.']+)?)(@([-_a-z0-9.%=?]+|([0-9]{1,3}.){3}[0-9]{1,3})?)?(:[0-9]{1,5})?", NULL, NULL },
3234
3256
str = g_utf8_next_char (str);
3237
for (i = 0; i < G_N_ELEMENTS (mim); i++) {
3238
if (mim [i].preg && !regexec (mim [i].preg, str, 2, pmatch, 0)) {
3239
paste_link (engine, text,
3240
g_utf8_pointer_to_offset (text->text, str + pmatch [0].rm_so),
3241
g_utf8_pointer_to_offset (text->text, str + pmatch [0].rm_eo), mim [i].prefix);
3259
gboolean done = FALSE;
3260
guint32 str_offset = 0, str_length = strlen (str);
3264
for (i = 0; i < G_N_ELEMENTS (mim); i++) {
3265
if (mim[i].preg && !regexec (mim[i].preg, str + str_offset, 2, pmatch, 0)) {
3266
paste_link (engine, text,
3267
g_utf8_pointer_to_offset (text->text, str + str_offset + pmatch[0].rm_so),
3268
g_utf8_pointer_to_offset (text->text, str + str_offset + pmatch[0].rm_eo), mim[i].prefix);
3270
str_offset += pmatch[0].rm_eo + 1;
3271
done = str_offset >= str_length;
3248
html_undo_level_end (engine->undo);
3278
html_undo_level_end (engine->undo, engine);
3249
3279
html_cursor_jump_to_position_no_spell (engine->cursor, engine, saved_position);