1
/***************************************************************************
2
* Copyright (C) 2004-2010 by Thomas Fischer *
3
* fischer@unix-ag.uni-kl.de *
5
* This program is free software; you can redistribute it and/or modify *
6
* it under the terms of the GNU General Public License as published by *
7
* the Free Software Foundation; either version 2 of the License, or *
8
* (at your option) any later version. *
10
* This program is distributed in the hope that it will be useful, *
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13
* GNU General Public License for more details. *
15
* You should have received a copy of the GNU General Public License *
16
* along with this program; if not, write to the *
17
* Free Software Foundation, Inc., *
18
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19
***************************************************************************/
22
#include <QStringList>
27
#include "encoderlatex.h"
29
EncoderLaTeX *encoderLaTeX = NULL;
31
static struct Decomposition {
32
const char *latexCommand;
42
/*{"x", 0x0305}, OVERLINE */
45
/*{"x", 0x0309}, HOOK ABOVE */
49
/*{"x", 0x030d}, VERTICAL LINE ABOVE */
50
/*{"x", 0x030e}, DOUBLE VERTICAL LINE ABOVE */
51
/*{"x", 0x030f}, DOUBLE GRAVE ACCENT */
52
/*{"x", 0x0310}, CANDRABINDU */
53
/*{"x", 0x0311}, INVERTED BREVE */
54
/*{"x", 0x0312}, TURNED COMMA ABOVE */
55
/*{"x", 0x0313}, COMMA ABOVE */
56
/*{"x", 0x0314}, REVERSED COMMA ABOVE */
88
static const int decompositionscount = sizeof(decompositions) / sizeof(decompositions[ 0 ]) ;
90
static const struct EncoderLaTeXCommandMapping {
94
commandmappingdatalatex[] = {
102
{"ldots", 0x2026}, /** \ldots must be before \l */
114
static const int commandmappingdatalatexcount = sizeof(commandmappingdatalatex) / sizeof(commandmappingdatalatex[ 0 ]) ;
116
/** Command can be either
119
(3) <space>, line end,
120
(4) \following_command (including \<space>, which must be maintained!),
121
(5) } (end of entry or group)
123
const char *expansionsCmd[] = {"\\{\\\\%1\\}", "\\\\%1\\{\\}", "\\\\%1(\\n|\\r|\\\\|\\})", "\\\\%1\\s"};
124
static const int expansionscmdcount = sizeof(expansionsCmd) / sizeof(expansionsCmd[0]);
126
static const struct EncoderLaTeXModCharMapping {
127
const char *modifier;
129
unsigned int unicode;
131
modcharmappingdatalatex[] = {
132
{"\\\\`", "A", 0x00C0},
133
{"\\\\'", "A", 0x00C1},
134
{"\\\\\\^", "A", 0x00C2},
135
{"\\\\~", "A", 0x00C3},
136
{"\\\\\"", "A", 0x00C4},
137
{"\\\\r", "A", 0x00C5},
139
{"\\\\c", "C", 0x00C7},
140
{"\\\\`", "E", 0x00C8},
141
{"\\\\'", "E", 0x00C9},
142
{"\\\\\\^", "E", 0x00CA},
143
{"\\\\\"", "E", 0x00CB},
144
{"\\\\`", "I", 0x00CC},
145
{"\\\\'", "I", 0x00CD},
146
{"\\\\\\^", "I", 0x00CE},
147
{"\\\\\"", "I", 0x00CF},
149
{"\\\\~", "N", 0x00D1},
150
{"\\\\`", "O", 0x00D2},
151
{"\\\\'", "O", 0x00D3},
152
{"\\\\\\^", "O", 0x00D4},
154
{"\\\\\"", "O", 0x00D6},
156
{"\\\\", "O", 0x00D8},
157
{"\\\\`", "U", 0x00D9},
158
{"\\\\'", "U", 0x00DA},
159
{"\\\\\\^", "U", 0x00DB},
160
{"\\\\\"", "U", 0x00DC},
161
{"\\\\'", "Y", 0x00DD},
163
{"\\\\\"", "s", 0x00DF},
164
{"\\\\`", "a", 0x00E0},
165
{"\\\\'", "a", 0x00E1},
166
{"\\\\\\^", "a", 0x00E2},
167
{"\\\\~", "a", 0x00E3},
168
{"\\\\\"", "a", 0x00E4},
169
{"\\\\r", "a", 0x00E5},
171
{"\\\\c", "c", 0x00E7},
172
{"\\\\`", "e", 0x00E8},
173
{"\\\\'", "e", 0x00E9},
174
{"\\\\\\^", "e", 0x00EA},
175
{"\\\\\"", "e", 0x00EB},
176
{"\\\\`", "i", 0x00EC},
177
{"\\\\'", "i", 0x00ED},
178
{"\\\\'", "\\\\i", 0x00ED},
179
{"\\\\\\^", "i", 0x00EE},
182
{"\\\\~", "n", 0x00F1},
183
{"\\\\`", "o", 0x00F2},
184
{"\\\\'", "o", 0x00F3},
185
{"\\\\\\^", "o", 0x00F4},
187
{"\\\\\"", "o", 0x00F6},
189
{"\\\\", "o", 0x00F8},
190
{"\\\\`", "u", 0x00F9},
191
{"\\\\'", "u", 0x00FA},
192
{"\\\\\\^", "u", 0x00FB},
193
{"\\\\\"", "u", 0x00FC},
194
{"\\\\'", "y", 0x00FD},
199
{"\\\\u", "A", 0x0102},
200
{"\\\\u", "a", 0x0103},
203
{"\\\\'", "C", 0x0106},
204
{"\\\\'", "c", 0x0107},
209
{"\\\\v", "C", 0x010C},
210
{"\\\\v", "c", 0x010D},
211
{"\\\\v", "D", 0x010E},
221
{"\\\\c", "E", 0x0118},
222
{"\\\\c", "e", 0x0119},
223
{"\\\\v", "E", 0x011A},
224
{"\\\\v", "e", 0x011B},
227
{"\\\\u", "G", 0x011E},
228
{"\\\\u", "g", 0x011F},
241
{"\\\\u", "I", 0x012C},
242
{"\\\\u", "i", 0x012D},
254
{"\\\\'", "L", 0x0139},
255
{"\\\\'", "l", 0x013A},
264
{"\\\\'", "N", 0x0143},
265
{"\\\\'", "n", 0x0144},
268
{"\\\\v", "N", 0x0147},
269
{"\\\\v", "n", 0x0148},
275
{"\\\\u", "O", 0x014E},
276
{"\\\\u", "o", 0x014F},
277
{"\\\\H", "O", 0x0150},
278
{"\\\\H", "o", 0x0151},
281
{"\\\\'", "R", 0x0154},
282
{"\\\\'", "r", 0x0155},
285
{"\\\\v", "R", 0x0158},
286
{"\\\\v", "r", 0x0159},
287
{"\\\\'", "S", 0x015A},
288
{"\\\\'", "s", 0x015B},
291
{"\\\\c", "S", 0x015E},
292
{"\\\\c", "s", 0x015F},
293
{"\\\\v", "S", 0x0160},
294
{"\\\\v", "s", 0x0161},
297
{"\\\\v", "T", 0x0164},
305
{"\\\\u", "U", 0x016C},
306
{"\\\\u", "u", 0x016D},
307
{"\\\\r", "U", 0x016E},
308
{"\\\\r", "u", 0x016F},
317
{"\\\\\"", "Y", 0x0178},
318
{"\\\\'", "Z", 0x0179},
319
{"\\\\'", "z", 0x017A},
322
{"\\\\v", "Z", 0x017D},
323
{"\\\\v", "z", 0x017E},
326
{"\\\\v", "A", 0x01CD},
327
{"\\\\v", "a", 0x01CE},
328
{"\\\\v", "G", 0x01E6},
329
{"\\\\v", "g", 0x01E7}
332
const char *expansionsMod1[] = {"\\{%1\\{%2\\}\\}", "\\{%1 %2\\}", "%1\\{%2\\}"};
333
static const int expansionsmod1count = sizeof(expansionsMod1) / sizeof(expansionsMod1[0]);
334
const char *expansionsMod2[] = {"\\{%1%2\\}", "%1%2\\{\\}", "%1%2"};
335
static const int expansionsmod2count = sizeof(expansionsMod2) / sizeof(expansionsMod2[0]);
337
static const int modcharmappingdatalatexcount = sizeof(modcharmappingdatalatex) / sizeof(modcharmappingdatalatex[ 0 ]) ;
339
static const struct EncoderLaTeXCharMapping {
341
unsigned int unicode;
344
charmappingdatalatex[] = {
345
{"\\\\#", 0x0023, "\\#"},
346
{"\\\\&", 0x0026, "\\&"},
347
{"\\\\_", 0x005F, "\\_"},
348
{"!`", 0x00A1, "!`"},
349
{"\"<", 0x00AB, "\"<"},
350
{"\">", 0x00BB, "\">"},
351
{"[?]`", 0x00BF, "?`"},
352
{"---", 0x2014, "---"}, ///< has to be befor 0x2013, otherwise it would be interpreted as --{}-
353
{"--", 0x2013, "--"},
354
{"``", 0x201C, "``"},
358
static const int charmappingdatalatexcount = sizeof(charmappingdatalatex) / sizeof(charmappingdatalatex[ 0 ]) ;
361
* Private class to store internal variables that should not be visible
362
* in the interface as defined in the header file.
364
class EncoderLaTeX::EncoderLaTeXPrivate
367
struct CombinedMappingItem {
372
struct CharMappingItem {
378
QList<CombinedMappingItem> combinedMapping;
379
QList<CharMappingItem> charMapping;
381
void buildCombinedMapping() {
382
for (int i = 0; i < decompositionscount; i++) {
383
CombinedMappingItem item;
384
item.regExp = QRegExp("(.)" + QString(QChar(decompositions[i].unicode)));
385
item.latex = decompositions[i].latexCommand;
386
combinedMapping.append(item);
390
void buildCharMapping() {
391
/** encoding and decoding for digraphs such as -- or ?` */
392
for (int i = 0; i < charmappingdatalatexcount; i++) {
393
CharMappingItem charMappingItem;
394
charMappingItem.regExp = QRegExp(charmappingdatalatex[ i ].regexp);
395
charMappingItem.unicode = QChar(charmappingdatalatex[ i ].unicode);
396
charMappingItem.latex = QString(charmappingdatalatex[ i ].latex);
397
charMapping.append(charMappingItem);
400
/** encoding and decoding for commands such as \AA or \ss */
401
for (int i = 0; i < commandmappingdatalatexcount; ++i) {
402
/** different types of writing such as {\AA} or \AA{} possible */
403
for (int j = 0; j < expansionscmdcount; ++j) {
404
CharMappingItem charMappingItem;
405
charMappingItem.regExp = QRegExp(QString(expansionsCmd[j]).arg(commandmappingdatalatex[i].letters));
406
charMappingItem.unicode = QChar(commandmappingdatalatex[i].unicode);
407
if (charMappingItem.regExp.numCaptures() > 0)
408
charMappingItem.unicode += QString("\\1");
409
charMappingItem.latex = QString("{\\%1}").arg(commandmappingdatalatex[i].letters);
410
charMapping.append(charMappingItem);
414
/** encoding and decoding for letters such as \"a */
415
for (int i = 0; i < modcharmappingdatalatexcount; ++i) {
416
QString modifierRegExp = QString(modcharmappingdatalatex[i].modifier);
417
QString modifier = modifierRegExp;
418
modifier.replace("\\^", "^").replace("\\\\", "\\");
420
/** first batch of replacement rules, where no separator is required between modifier and character (e.g. \"a) */
421
if (!modifierRegExp.at(modifierRegExp.length() - 1).isLetter())
422
for (int j = 0; j < expansionsmod2count; ++j) {
423
CharMappingItem charMappingItem;
424
charMappingItem.regExp = QRegExp(QString(expansionsMod2[j]).arg(modifierRegExp).arg(modcharmappingdatalatex[i].letter));
425
charMappingItem.unicode = QChar(modcharmappingdatalatex[i].unicode);
426
charMappingItem.latex = QString("{%1%2}").arg(modifier).arg(modcharmappingdatalatex[i].letter);
427
charMapping.append(charMappingItem);
430
/** second batch of replacement rules, where a separator is required between modifier and character (e.g. \v{g}) */
431
for (int j = 0; j < expansionsmod1count; ++j) {
432
CharMappingItem charMappingItem;
433
charMappingItem.regExp = QRegExp(QString(expansionsMod1[j]).arg(modifierRegExp).arg(modcharmappingdatalatex[i].letter));
434
charMappingItem.unicode = QChar(modcharmappingdatalatex[i].unicode);
435
charMappingItem.latex = QString("%1{%2}").arg(modifier).arg(modcharmappingdatalatex[i].letter);
436
charMapping.append(charMappingItem);
442
EncoderLaTeX::EncoderLaTeX()
443
: Encoder(), d(new EncoderLaTeX::EncoderLaTeXPrivate)
445
d->buildCharMapping();
446
d->buildCombinedMapping();
449
EncoderLaTeX::~EncoderLaTeX()
454
QString EncoderLaTeX::decode(const QString & text)
456
const QString splitMarker = "|KBIBTEX|";
458
/** start-stop marker ensures that each text starts and stops
459
* with plain text and not with an inline math environment.
460
* This invariant is exploited implicitly in the code below. */
461
const QString startStopMarker = "|STARTSTOP|";
462
QString result = startStopMarker + text + startStopMarker;
464
/** Collect (all?) urls from the BibTeX file and store them in urls */
465
/** Problem is that the replace function below will replace
466
* character sequences in the URL rendering the URL invalid.
467
* Later, all URLs will be replaced back to their original
468
* in the hope nothing breaks ... */
470
QRegExp httpRegExp("(ht|f)tps?://[^\"} ]+");
471
httpRegExp.setMinimal(false);
474
pos = result.indexOf(httpRegExp, pos);
477
QString url = httpRegExp.cap(0);
482
decomposedUTF8toLaTeX(result);
484
/** split text into math and non-math regions */
485
QStringList intermediate = result.split('$', QString::SkipEmptyParts);
486
QStringList::Iterator it = intermediate.begin();
487
while (it != intermediate.end()) {
489
* Sometimes we split strings like "\$", which is not intended.
490
* So, we have to manually fix things by checking for strings
491
* ending with "\" and append both the removed dollar sign and
492
* the following string (which was never supposed to be an
493
* independent string). Finally, we remove the unnecessary
494
* string and continue.
496
if ((*it).endsWith("\\")) {
497
QStringList::Iterator cur = it;
499
(*cur).append('$').append(*it);
500
it = intermediate.erase(it);
506
for (QStringList::Iterator it = intermediate.begin(); it != intermediate.end(); ++it) {
507
if (!result.isEmpty()) result.append(splitMarker);
513
if (it == intermediate.end())
516
if ((*it).length() > 256)
517
kWarning() << "Very long math equation using $ found, maybe due to broken inline math: " << (*it).left(48);
520
for (QList<EncoderLaTeXPrivate::CharMappingItem>::ConstIterator cmit = d->charMapping.begin(); cmit != d->charMapping.end(); ++cmit)
521
result.replace((*cmit).regExp, (*cmit).unicode);
522
QStringList transformed = result.split(splitMarker, QString::SkipEmptyParts);
525
for (QStringList::Iterator itt = transformed.begin(), iti = intermediate.begin(); itt != transformed.end() && iti != intermediate.end(); ++itt, ++iti) {
529
if (iti == intermediate.end())
532
result.append("$").append(*iti).append("$");
535
/** Reinserting original URLs as explained above */
539
pos = result.indexOf(httpRegExp, pos);
542
int len = httpRegExp.cap(0).length();
543
result = result.left(pos - 1).append(urls[idx++]).append(result.mid(pos + len - 1));
547
return result.replace(startStopMarker, "");
550
QString EncoderLaTeX::encode(const QString & text)
552
const QString splitMarker = "|KBIBTEX|";
554
/** start-stop marker ensures that each text starts and stops
555
* with plain text and not with an inline math environment.
556
* This invariant is exploited implicitly in the code below. */
557
const QString startStopMarker = "|STARTSTOP|";
558
QString result = startStopMarker + text + startStopMarker;
560
/** Collect (all?) urls from the BibTeX file and store them in urls */
561
/** Problem is that the replace function below will replace
562
* character sequences in the URL rendering the URL invalid.
563
* Later, all URLs will be replaced back to their original
564
* in the hope nothing breaks ... */
566
QRegExp httpRegExp("(ht|f)tps?://[^\"} ]+");
567
httpRegExp.setMinimal(false);
570
pos = result.indexOf(httpRegExp, pos);
573
QString url = httpRegExp.cap(0);
578
/** split text into math and non-math regions */
579
QStringList intermediate = result.split('$', QString::SkipEmptyParts);
580
QStringList::Iterator it = intermediate.begin();
581
while (it != intermediate.end()) {
583
* Sometimes we split strings like "\$", which is not intended.
584
* So, we have to manually fix things by checking for strings
585
* ending with "\" and append both the removed dollar sign and
586
* the following string (which was never supposed to be an
587
* independent string). Finally, we remove the unnecessary
588
* string and continue.
590
if ((*it).endsWith("\\")) {
591
QStringList::Iterator cur = it;
593
(*cur).append('$').append(*it);
594
it = intermediate.erase(it);
600
for (QStringList::Iterator it = intermediate.begin(); it != intermediate.end(); ++it) {
601
if (!result.isEmpty()) result.append(splitMarker);
604
if (it == intermediate.end())
606
if ((*it).length() > 256)
607
qDebug() << "Very long math equation using $ found, maybe due to broken inline math:" << (*it).left(48) << endl;
610
for (QList<EncoderLaTeXPrivate::CharMappingItem>::ConstIterator cmit = d->charMapping.begin(); cmit != d->charMapping.end(); ++cmit)
611
result.replace((*cmit).unicode, (*cmit).latex);
613
QStringList transformed = result.split(splitMarker, QString::KeepEmptyParts, Qt::CaseSensitive);
616
for (QStringList::Iterator itt = transformed.begin(), iti = intermediate.begin(); itt != transformed.end() && iti != intermediate.end(); ++itt, ++iti) {
620
if (iti == intermediate.end())
623
result.append("$").append(*iti).append("$");
626
/** \url accepts unquotet & and _
627
May introduce new problem tough */
628
if (result.contains("\\url{"))
629
result.replace("\\&", "&").replace("\\_", "_").replace(QChar(0x2013), "--").replace("\\#", "#");
631
decomposedUTF8toLaTeX(result);
633
/** Reinserting original URLs as explained above */
637
pos = result.indexOf(httpRegExp, pos);
640
int len = httpRegExp.cap(0).length();
641
result = result.left(pos - 1).append(urls[idx++]).append(result.mid(pos + len - 1));
645
return result.replace(startStopMarker, "");
648
QString EncoderLaTeX::encode(const QString &text, const QChar &replace)
650
QString result = text;
651
for (QList<EncoderLaTeXPrivate::CharMappingItem>::ConstIterator it = d->charMapping.begin(); it != d->charMapping.end(); ++it)
652
if ((*it).unicode == replace)
653
result.replace((*it).unicode, (*it).latex);
657
QString& EncoderLaTeX::decomposedUTF8toLaTeX(QString &text)
659
for (QList<EncoderLaTeXPrivate::CombinedMappingItem>::Iterator it = d->combinedMapping.begin(); it != d->combinedMapping.end(); ++it) {
660
int i = (*it).regExp.indexIn(text);
662
QString a = (*it).regExp.cap(1);
663
text = text.left(i) + "\\" + (*it).latex + "{" + a + "}" + text.mid(i + 2);
664
i = (*it).regExp.indexIn(text, i + 1);
672
EncoderLaTeX* EncoderLaTeX::currentEncoderLaTeX()
674
if (encoderLaTeX == NULL)
675
encoderLaTeX = new EncoderLaTeX();
680
void EncoderLaTeX::deleteCurrentEncoderLaTeX()
682
if (encoderLaTeX != NULL) {
688
QString& EncoderLaTeX::convertToPlainAscii(QString &text)
690
for (int i = 0; i < modcharmappingdatalatexcount; ++i) {
691
QChar c = QChar(modcharmappingdatalatex[i].unicode);
692
if (text.indexOf(c) >= 0)
693
text = text.replace(c, QString(modcharmappingdatalatex[i].letter));
695
for (int i = 0; i < commandmappingdatalatexcount; ++i) {
696
QChar c = QChar(commandmappingdatalatex[i].unicode);
697
if (text.indexOf(c) >= 0)
698
text = text.replace(c, QString(commandmappingdatalatex[i].letters));