~paulbrianstewart/ubuntu/oneiric/tellico/852247-Formatting-Fix

« back to all changes in this revision

Viewing changes to src/rtf2html/rtf2html.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Stephan Hermann
  • Date: 2008-01-31 19:33:05 UTC
  • mfrom: (0.1.13 upstream)
  • Revision ID: james.westby@ubuntu.com-20080131193305-9l01m5gfhykl6pkl
Tags: 1.3-1ubuntu1
* Merge from debian unstable, remaining changes:
  - debian/control: build-dep on kdepim-dev
  - debian/control: drop versioned python from tellico-data suggests
  - debian/rules: call dh_icons

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*  This is RTF to HTML converter, implemented as a text filter, generally.
 
2
    Copyright (C) 2003 Valentin Lavrinenko, vlavrinenko@users.sourceforge.net
 
3
 
 
4
    available at http://rtf2html.sf.net
 
5
 
 
6
    Original available under the terms of the GNU LGPL2, and according
 
7
    to those terms, relicensed under the GNU GPL2 for inclusion in Tellico */
 
8
 
 
9
/***************************************************************************
 
10
 *                                                                         *
 
11
 *   This program is free software; you can redistribute it and/or modify  *
 
12
 *   it under the terms of version 2 of the GNU General Public License as  *
 
13
 *   published by the Free Software Foundation;                            *
 
14
 *                                                                         *
 
15
 ***************************************************************************/
 
16
 
 
17
#include "rtf2html.h"
 
18
#include "rtf_table.h"
 
19
#include "rtf_tools.h"
 
20
#include "rtf_keyword.h"
 
21
#include "fmt_opts.h"
 
22
 
 
23
#include <cstdlib>
 
24
#include <stdexcept>
 
25
#include <fstream>
 
26
#include <iostream>
 
27
#include <string>
 
28
 
 
29
using Tellico::RTF2HTML;
 
30
using namespace rtf;
 
31
 
 
32
RTF2HTML::RTF2HTML(const QString& text) : m_text(text) {
 
33
}
 
34
 
 
35
QString RTF2HTML::toHTML() const {
 
36
   std::string str_in = m_text;
 
37
 
 
38
   std::string::iterator buf_in=str_in.begin(), buf_in_end=str_in.end();
 
39
   colorvect colortbl;
 
40
   fontmap fonttbl;
 
41
   std::string title;
 
42
 
 
43
   bool bAsterisk=false;
 
44
   fo_stack foStack;
 
45
   formatting_options cur_options;
 
46
   std::string html;
 
47
   html_text par_html(cur_options);
 
48
 
 
49
   /* CellDefs in rtf are really queer. We'll keep a list of them in main()
 
50
      and will give an iterator into this list to a row */
 
51
   table_cell_defs_list CellDefsList;
 
52
   table_cell_defs_list::iterator CurCellDefs;
 
53
   table_cell_def* tcdCurCellDef=new table_cell_def;
 
54
   table_cell* tcCurCell=new table_cell;
 
55
   table_row* trCurRow=new table_row;
 
56
   table* tblCurTable=new table;
 
57
   int iLastRowLeft=0, iLastRowHeight=0;
 
58
   std::string t_str;
 
59
 
 
60
   bool bInTable=false;
 
61
   int iDocWidth=12240;
 
62
   int iMarginLeft=1800;
 
63
   while(buf_in!=buf_in_end)
 
64
   {
 
65
      switch (*buf_in)
 
66
      {
 
67
      case '\\':
 
68
      {
 
69
         rtf_keyword kw(++buf_in);
 
70
         if (kw.is_control_char())
 
71
            switch (kw.control_char())
 
72
            {
 
73
            case '\\': case '{': case '}':
 
74
               par_html.write(kw.control_char());
 
75
               break;
 
76
            case '\'':
 
77
            {
 
78
               std::string stmp(1,*buf_in++);
 
79
               stmp+=*buf_in++;
 
80
               int code=std::strtol(stmp.c_str(), NULL, 16);
 
81
               switch (code)
 
82
               {
 
83
                  case 167:
 
84
                     par_html.write("&bull;");
 
85
                     break;
 
86
                  case 188:
 
87
                     par_html.write("&hellip;");
 
88
                     break;
 
89
                  default:
 
90
                     par_html.write((char)code);
 
91
               }
 
92
               break;
 
93
            }
 
94
            case '*':
 
95
               bAsterisk=true;
 
96
               break;
 
97
            case '~':
 
98
               par_html.write("&nbsp;");
 
99
               break;
 
100
            case '\n':
 
101
               par_html.write("<br><br>");
 
102
               break;
 
103
            }
 
104
         else //kw.is_control_char
 
105
            if (bAsterisk)
 
106
            {
 
107
               bAsterisk=false;
 
108
               skip_group(buf_in);
 
109
            }
 
110
            else
 
111
            {
 
112
               switch (kw.keyword())
 
113
               {
 
114
               case rtf_keyword::rkw_filetbl:
 
115
               case rtf_keyword::rkw_stylesheet:
 
116
               case rtf_keyword::rkw_header:
 
117
               case rtf_keyword::rkw_footer: case rtf_keyword::rkw_headerf:
 
118
               case rtf_keyword::rkw_footerf: case rtf_keyword::rkw_pict:
 
119
               case rtf_keyword::rkw_object:
 
120
                  // we'll skip such groups
 
121
                  skip_group(buf_in);
 
122
                  break;
 
123
               // document title
 
124
               case rtf_keyword::rkw_info:
 
125
               {
 
126
                  int depth=1;
 
127
                  bool in_title=false;
 
128
                  while (depth>0)
 
129
                  {
 
130
//                     std::cout<<std::string(buf_in).substr(0,20)<<"\t"<<depth<<std::endl;
 
131
                     switch (*buf_in)
 
132
                     {
 
133
                     case '\\':
 
134
                     {
 
135
                        rtf_keyword kw(++buf_in);
 
136
                        if (kw.keyword()==rtf_keyword::rkw_title)
 
137
                           in_title=true;
 
138
                        break;
 
139
                     }
 
140
                     case '{': ++depth; ++buf_in; break;
 
141
                     case '}': --depth; ++buf_in; in_title=false; break;
 
142
                     default: if (in_title) title+=*buf_in; ++buf_in; break;
 
143
                     }
 
144
                  }
 
145
                  break;
 
146
               }
 
147
               // color table
 
148
               case rtf_keyword::rkw_colortbl:
 
149
               {
 
150
                  color clr;
 
151
                  while (*buf_in!='}')
 
152
                  {
 
153
                     switch (*buf_in)
 
154
                     {
 
155
                     case '\\':
 
156
                     {
 
157
                        rtf_keyword kw(++buf_in);
 
158
                        switch (kw.keyword())
 
159
                        {
 
160
                        case rtf_keyword::rkw_red:
 
161
                           clr.r=kw.parameter();
 
162
                           break;
 
163
                        case rtf_keyword::rkw_green:
 
164
                           clr.g=kw.parameter();
 
165
                           break;
 
166
                        case rtf_keyword::rkw_blue:
 
167
                           clr.b=kw.parameter();
 
168
                           break;
 
169
                        default: break;
 
170
                        }
 
171
                        break;
 
172
                     }
 
173
                     case ';':
 
174
                        colortbl.push_back(clr);
 
175
                        ++buf_in;
 
176
                        break;
 
177
                     default:
 
178
                        ++buf_in;
 
179
                        break;
 
180
                     }
 
181
                  }
 
182
                  ++buf_in;
 
183
                  break;
 
184
               }
 
185
               // font table
 
186
               case rtf_keyword::rkw_fonttbl:
 
187
               {
 
188
                  font fnt;
 
189
                  int font_num;
 
190
                  bool full_name=false;
 
191
                  bool in_font=false;
 
192
                  while (! (*buf_in=='}' && !in_font))
 
193
                  {
 
194
                     switch (*buf_in)
 
195
                     {
 
196
                     case '\\':
 
197
                     {
 
198
                        rtf_keyword kw(++buf_in);
 
199
                        if (kw.is_control_char() && kw.control_char()=='*')
 
200
                           skip_group(buf_in);
 
201
                        else
 
202
                           switch (kw.keyword())
 
203
                           {
 
204
                           case rtf_keyword::rkw_f:
 
205
                              font_num=kw.parameter();
 
206
                              break;
 
207
                           case rtf_keyword::rkw_fprq:
 
208
                              fnt.pitch=kw.parameter();
 
209
                              break;
 
210
                           case rtf_keyword::rkw_fcharset:
 
211
                              fnt.charset=kw.parameter();
 
212
                              break;
 
213
                           case rtf_keyword::rkw_fnil:
 
214
                              fnt.family=font::ff_none;
 
215
                              break;
 
216
                           case rtf_keyword::rkw_froman:
 
217
                              fnt.family=font::ff_serif;
 
218
                              break;
 
219
                           case rtf_keyword::rkw_fswiss:
 
220
                              fnt.family=font::ff_sans_serif;
 
221
                              break;
 
222
                           case rtf_keyword::rkw_fmodern:
 
223
                              fnt.family=font::ff_monospace;
 
224
                              break;
 
225
                           case rtf_keyword::rkw_fscript:
 
226
                              fnt.family=font::ff_cursive;
 
227
                              break;
 
228
                           case rtf_keyword::rkw_fdecor:
 
229
                              fnt.family=font::ff_fantasy;
 
230
                              break;
 
231
                           default: break;
 
232
                           }
 
233
                        break;
 
234
                     }
 
235
                     case '{':
 
236
                        in_font=true;
 
237
                        ++buf_in;
 
238
                        break;
 
239
                     case '}':
 
240
                        in_font=false;
 
241
                        fonttbl.insert(std::make_pair(font_num, fnt));
 
242
                        fnt=font();
 
243
                        full_name=false;
 
244
                        ++buf_in;
 
245
                        break;
 
246
                     case ';':
 
247
                        full_name=true;
 
248
                        ++buf_in;
 
249
                        break;
 
250
                     default:
 
251
                        if (!full_name && in_font)
 
252
                           fnt.name+=*buf_in;
 
253
                        ++buf_in;
 
254
                        break;
 
255
                     }
 
256
                  }
 
257
                  ++buf_in;
 
258
                  break;
 
259
               }
 
260
               // special characters
 
261
               case rtf_keyword::rkw_line: case rtf_keyword::rkw_softline:
 
262
                  par_html.write("<br>");
 
263
                  break;
 
264
               case rtf_keyword::rkw_tab:
 
265
                  par_html.write("&nbsp;&nbsp;");  // maybe, this can be done better
 
266
                  break;
 
267
               case rtf_keyword::rkw_enspace: case rtf_keyword::rkw_emspace:
 
268
                  par_html.write("&nbsp;");
 
269
                  break;
 
270
               case rtf_keyword::rkw_qmspace:
 
271
                  par_html.write("&thinsp;");
 
272
                  break;
 
273
               case rtf_keyword::rkw_endash:
 
274
                  par_html.write("&ndash;");
 
275
                  break;
 
276
               case rtf_keyword::rkw_emdash:
 
277
                  par_html.write("&mdash;");
 
278
                  break;
 
279
               case rtf_keyword::rkw_bullet:
 
280
                  par_html.write("&bull;");
 
281
                  break;
 
282
               case rtf_keyword::rkw_lquote:
 
283
                  par_html.write("&lsquo;");
 
284
                  break;
 
285
               case rtf_keyword::rkw_rquote:
 
286
                  par_html.write("&rsquo;");
 
287
                  break;
 
288
               case rtf_keyword::rkw_ldblquote:
 
289
                  par_html.write("&ldquo;");
 
290
                  break;
 
291
               case rtf_keyword::rkw_rdblquote:
 
292
                  par_html.write("&rdquo;");
 
293
                  break;
 
294
               // paragraph formatting
 
295
               case rtf_keyword::rkw_ql:
 
296
                  cur_options.papAlign=formatting_options::align_left;
 
297
                  break;
 
298
               case rtf_keyword::rkw_qr:
 
299
                  cur_options.papAlign=formatting_options::align_right;
 
300
                  break;
 
301
               case rtf_keyword::rkw_qc:
 
302
                  cur_options.papAlign=formatting_options::align_center;
 
303
                  break;
 
304
               case rtf_keyword::rkw_qj:
 
305
                  cur_options.papAlign=formatting_options::align_justify;
 
306
                  break;
 
307
               case rtf_keyword::rkw_fi:
 
308
                  cur_options.papFirst=(int)rint(kw.parameter()/20);
 
309
                  break;
 
310
               case rtf_keyword::rkw_li:
 
311
                  cur_options.papLeft=(int)rint(kw.parameter()/20);
 
312
                  break;
 
313
               case rtf_keyword::rkw_ri:
 
314
                  cur_options.papRight=(int)rint(kw.parameter()/20);
 
315
                  break;
 
316
               case rtf_keyword::rkw_sb:
 
317
                  cur_options.papBefore=(int)rint(kw.parameter()/20);
 
318
                  break;
 
319
               case rtf_keyword::rkw_sa:
 
320
                  cur_options.papAfter=(int)rint(kw.parameter()/20);
 
321
                  break;
 
322
               case rtf_keyword::rkw_pard:
 
323
                  cur_options.papBefore=cur_options.papAfter=0;
 
324
                  cur_options.papLeft=cur_options.papRight=0;
 
325
                  cur_options.papFirst=0;
 
326
                  cur_options.papAlign=formatting_options::align_left;
 
327
                  cur_options.papInTbl=false;
 
328
                  break;
 
329
               case rtf_keyword::rkw_par:
 
330
               case rtf_keyword::rkw_sect:
 
331
                  t_str=cur_options.get_par_str()+par_html.str()
 
332
                        +"&nbsp;"+par_html.close()+"</p>\n";
 
333
                  if (!bInTable)
 
334
                  {
 
335
                     html+=t_str;
 
336
                  }
 
337
                  else
 
338
                  {
 
339
                     if (cur_options.papInTbl)
 
340
                     {
 
341
                        tcCurCell->Text+=t_str;
 
342
                     }
 
343
                     else
 
344
                     {
 
345
                        html+=tblCurTable->make()+t_str;
 
346
                        bInTable=false;
 
347
                        tblCurTable=new table;
 
348
                     }
 
349
                  }
 
350
                  par_html.clear();
 
351
                  break;
 
352
               // character formatting
 
353
               case rtf_keyword::rkw_super:
 
354
                  cur_options.chpVAlign=
 
355
                     kw.parameter()==0?formatting_options::va_normal
 
356
                                      :formatting_options::va_sup;
 
357
                  break;
 
358
               case rtf_keyword::rkw_sub:
 
359
                  cur_options.chpVAlign=
 
360
                     kw.parameter()==0?formatting_options::va_normal
 
361
                                      :formatting_options::va_sub;
 
362
                  break;
 
363
               case rtf_keyword::rkw_b:
 
364
                  cur_options.chpBold=!(kw.parameter()==0);
 
365
                  break;
 
366
               case rtf_keyword::rkw_i:
 
367
                  cur_options.chpItalic=!(kw.parameter()==0);
 
368
                  break;
 
369
               case rtf_keyword::rkw_ul:
 
370
                  cur_options.chpUnderline=!(kw.parameter()==0);
 
371
                  break;
 
372
               case rtf_keyword::rkw_ulnone:
 
373
                  cur_options.chpUnderline=false;
 
374
                  break;
 
375
               case rtf_keyword::rkw_fs:
 
376
                  cur_options.chpFontSize=kw.parameter();
 
377
                  break;
 
378
               case rtf_keyword::rkw_cf:
 
379
                  cur_options.chpFColor=colortbl[kw.parameter()];
 
380
                  break;
 
381
               case rtf_keyword::rkw_cb:
 
382
                  cur_options.chpBColor=colortbl[kw.parameter()];
 
383
                  break;
 
384
               case rtf_keyword::rkw_highlight:
 
385
                  cur_options.chpHighlight=kw.parameter();
 
386
                  break;
 
387
               case rtf_keyword::rkw_f:
 
388
                  cur_options.chpFont=fonttbl[kw.parameter()];
 
389
                  break;
 
390
               case rtf_keyword::rkw_plain:
 
391
                  cur_options.chpBold=cur_options.chpItalic
 
392
                        =cur_options.chpUnderline=false;
 
393
                  cur_options.chpVAlign=formatting_options::va_normal;
 
394
                  cur_options.chpFontSize=cur_options.chpHighlight=0;
 
395
                  cur_options.chpFColor=cur_options.chpBColor=color();
 
396
                  cur_options.chpFont=font();
 
397
                  break;
 
398
               // table formatting
 
399
               case rtf_keyword::rkw_intbl:
 
400
                  cur_options.papInTbl=true;
 
401
                  break;
 
402
               case rtf_keyword::rkw_trowd:
 
403
                  CurCellDefs=CellDefsList.insert(CellDefsList.end(),
 
404
                                                  table_cell_defs());
 
405
               case rtf_keyword::rkw_row:
 
406
                  if (!trCurRow->Cells.empty())
 
407
                  {
 
408
                     trCurRow->CellDefs=CurCellDefs;
 
409
                     if (trCurRow->Left==-1000)
 
410
                        trCurRow->Left=iLastRowLeft;
 
411
                     if (trCurRow->Height==-1000)
 
412
                        trCurRow->Height=iLastRowHeight;
 
413
                     tblCurTable->push_back(trCurRow);
 
414
                     trCurRow=new table_row;
 
415
                  }
 
416
                  bInTable=true;
 
417
                  break;
 
418
               case rtf_keyword::rkw_cell:
 
419
                  t_str=cur_options.get_par_str()+par_html.str()
 
420
                        +"&nbsp;"+par_html.close()+"</p>\n";
 
421
                  tcCurCell->Text+=t_str;
 
422
                  par_html.clear();
 
423
                  trCurRow->Cells.push_back(tcCurCell);
 
424
                  tcCurCell=new table_cell;
 
425
                  break;
 
426
               case rtf_keyword::rkw_cellx:
 
427
                  tcdCurCellDef->Right=kw.parameter();
 
428
                  CurCellDefs->push_back(tcdCurCellDef);
 
429
                  tcdCurCellDef=new table_cell_def;
 
430
                  break;
 
431
               case rtf_keyword::rkw_trleft:
 
432
                  trCurRow->Left=kw.parameter();
 
433
                  iLastRowLeft=kw.parameter();
 
434
                  break;
 
435
               case rtf_keyword::rkw_trrh:
 
436
                  trCurRow->Height=kw.parameter();
 
437
                  iLastRowHeight=kw.parameter();
 
438
                  break;
 
439
               case rtf_keyword::rkw_clvmgf:
 
440
                  tcdCurCellDef->FirstMerged=true;
 
441
                  break;
 
442
               case rtf_keyword::rkw_clvmrg:
 
443
                  tcdCurCellDef->Merged=true;
 
444
                  break;
 
445
               case rtf_keyword::rkw_clbrdrb:
 
446
                  tcdCurCellDef->BorderBottom=true;
 
447
                  tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderBottom);
 
448
                  break;
 
449
               case rtf_keyword::rkw_clbrdrt:
 
450
                  tcdCurCellDef->BorderTop=true;
 
451
                  tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderTop);
 
452
                  break;
 
453
               case rtf_keyword::rkw_clbrdrl:
 
454
                  tcdCurCellDef->BorderLeft=true;
 
455
                  tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderLeft);
 
456
                  break;
 
457
               case rtf_keyword::rkw_clbrdrr:
 
458
                  tcdCurCellDef->BorderRight=true;
 
459
                  tcdCurCellDef->ActiveBorder=&(tcdCurCellDef->BorderRight);
 
460
                  break;
 
461
               case rtf_keyword::rkw_brdrnone:
 
462
                  if (tcdCurCellDef->ActiveBorder!=NULL)
 
463
                  {
 
464
                     *(tcdCurCellDef->ActiveBorder)=false;
 
465
                  }
 
466
                  break;
 
467
               case rtf_keyword::rkw_clvertalt:
 
468
                  tcdCurCellDef->VAlign=table_cell_def::valign_top;
 
469
                  break;
 
470
               case rtf_keyword::rkw_clvertalc:
 
471
                  tcdCurCellDef->VAlign=table_cell_def::valign_center;
 
472
                  break;
 
473
               case rtf_keyword::rkw_clvertalb:
 
474
                  tcdCurCellDef->VAlign=table_cell_def::valign_bottom;
 
475
                  break;
 
476
               // page formatting
 
477
               case rtf_keyword::rkw_paperw:
 
478
                  iDocWidth=kw.parameter();
 
479
                  break;
 
480
               case rtf_keyword::rkw_margl:
 
481
                  iMarginLeft=kw.parameter();
 
482
                  break;
 
483
               default: break;
 
484
               }
 
485
            }
 
486
         break;
 
487
      }
 
488
      case '{':
 
489
         // perform group opening actions here
 
490
         foStack.push(cur_options);
 
491
         ++buf_in;
 
492
         break;
 
493
      case '}':
 
494
         // perform group closing actions here
 
495
         cur_options=foStack.top();
 
496
         foStack.pop();
 
497
         ++buf_in;
 
498
         break;
 
499
      case 13:
 
500
      case 10:
 
501
         ++buf_in;
 
502
         break;
 
503
      case '<':
 
504
         par_html.write("&lt;");
 
505
         ++buf_in;
 
506
         break;
 
507
      case '>':
 
508
         par_html.write("&gt;");
 
509
         ++buf_in;
 
510
         break;
 
511
/*      case ' ':
 
512
         par_html.write("&ensp;");
 
513
         ++buf_in;
 
514
         break;*/
 
515
      default:
 
516
         par_html.write(*buf_in++);
 
517
      }
 
518
   }
 
519
 
 
520
   t_str=cur_options.get_par_str()+par_html.str()
 
521
        +"&nbsp;"+par_html.close()+"</p>\n";
 
522
   html+=t_str;
 
523
 
 
524
   delete tcCurCell;
 
525
   delete trCurRow;
 
526
   delete tblCurTable;
 
527
   delete tcdCurCellDef;
 
528
 
 
529
   return html;
 
530
}
 
531