2
#include "util/text/utf8.h"
3
#include "util/text/wrap_text.h"
5
bool WordWrapper::IsCJK(uint32_t c) {
10
// CJK characters can be wrapped more freely.
11
bool result = (c >= 0x1100 && c <= 0x11FF); // Hangul Jamo.
12
result = result || (c >= 0x2E80 && c <= 0x2FFF); // Kangxi Radicals etc.
14
result = result || (c >= 0x3040 && c <= 0x31FF); // Hiragana, Katakana, Hangul Compatibility Jamo etc.
15
result = result || (c >= 0x3200 && c <= 0x32FF); // CJK Enclosed
16
result = result || (c >= 0x3300 && c <= 0x33FF); // CJK Compatibility
17
result = result || (c >= 0x3400 && c <= 0x4DB5); // CJK Unified Ideographs Extension A
19
result = result || (c >= 0x3040 && c <= 0x4DB5); // Above collapsed
21
result = result || (c >= 0x4E00 && c <= 0x9FBB); // CJK Unified Ideographs
22
result = result || (c >= 0xAC00 && c <= 0xD7AF); // Hangul Syllables
23
result = result || (c >= 0xF900 && c <= 0xFAD9); // CJK Compatibility Ideographs
24
result = result || (c >= 0x20000 && c <= 0x2A6D6); // CJK Unified Ideographs Extension B
25
result = result || (c >= 0x2F800 && c <= 0x2FA1D); // CJK Compatibility Supplement
29
bool WordWrapper::IsPunctuation(uint32_t c) {
31
// TODO: This list of punctuation is very incomplete.
38
case 0x00AD: // SOFT HYPHEN
39
case 0x3001: // IDEOGRAPHIC COMMA
40
case 0x3002: // IDEOGRAPHIC FULL STOP
41
case 0x06D4: // ARABIC FULL STOP
42
case 0xFF01: // FULLWIDTH EXCLAMATION MARK
43
case 0xFF09: // FULLWIDTH RIGHT PARENTHESIS
44
case 0xFF1F: // FULLWIDTH QUESTION MARK
52
bool WordWrapper::IsSpace(uint32_t c) {
56
case 0x2002: // EN SPACE
57
case 0x2003: // EM SPACE
58
case 0x3000: // IDEOGRAPHIC SPACE
66
bool WordWrapper::IsShy(uint32_t c) {
67
return c == 0x00AD; // SOFT HYPHEN
70
std::string WordWrapper::Wrapped() {
77
void WordWrapper::WrapBeforeWord() {
78
if (x_ + wordWidth_ > maxW_) {
79
if (IsShy(out_[out_.size() - 1])) {
80
// Soft hyphen, replace it with a real hyphen since we wrapped at it.
81
// TODO: There's an edge case here where the hyphen might not fit.
82
out_[out_.size() - 1] = '-';
86
forceEarlyWrap_ = false;
90
void WordWrapper::AppendWord(int endIndex, bool addNewline) {
92
// This will include the newline.
93
out_ += std::string(str_ + lastIndex_, endIndex - lastIndex_);
97
lastIndex_ = endIndex;
100
void WordWrapper::Wrap() {
103
// First, let's check if it fits as-is.
104
size_t len = strlen(str_);
105
if (MeasureWidth(str_, len) <= maxW_) {
106
// If it fits, we don't need to go through each character.
111
for (UTF8 utf(str_); !utf.end(); ) {
112
int beforeIndex = utf.byteIndex();
113
uint32_t c = utf.next();
114
int afterIndex = utf.byteIndex();
116
// Is this a newline character, hard wrapping?
118
// This will include the newline character.
119
AppendWord(afterIndex, false);
122
// We wrapped once, so stop forcing.
123
forceEarlyWrap_ = false;
127
float newWordWidth = 0.0f;
129
newWordWidth = wordWidth_;
131
// Measure the entire word for kerning purposes. May not be 100% perfect.
132
newWordWidth = MeasureWidth(str_ + lastIndex_, afterIndex - lastIndex_);
135
// Is this the end of a word (space)?
136
if (wordWidth_ > 0.0f && IsSpace(c)) {
137
AppendWord(afterIndex, false);
138
// We include the space in the x increase.
139
// If the space takes it over, we'll wrap on the next word.
145
// Can the word fit on a line even all by itself so far?
146
if (wordWidth_ > 0.0f && newWordWidth > maxW_) {
147
// Nope. Let's drop what's there so far onto its own line.
148
if (x_ > 0.0f && x_ + wordWidth_ > maxW_ && beforeIndex > lastIndex_) {
149
// Let's put as many characters as will fit on the previous line.
150
// This word can't fit on one line even, so it's going to be cut into pieces anyway.
151
// Better to avoid huge gaps, in that case.
152
forceEarlyWrap_ = true;
154
// Now rewind back to where the word started so we can wrap at the opportune moment.
156
while (utf.byteIndex() > lastIndex_) {
161
// Now, add the word so far (without this latest character) and break.
162
AppendWord(beforeIndex, true);
165
forceEarlyWrap_ = false;
166
// The current character will be handled as part of the next word.
170
wordWidth_ = newWordWidth;
172
// Is this the end of a word via punctuation / CJK?
173
if (wordWidth_ > 0.0f && (IsCJK(c) || IsPunctuation(c) || forceEarlyWrap_)) {
174
// CJK doesn't require spaces, so we treat each letter as its own word.
175
AppendWord(afterIndex, false);
181
// Now insert the rest of the string - the last word.
182
AppendWord((int)len, false);