~macslow/nux/nux.fix-839476

« back to all changes in this revision

Viewing changes to Nux/TextView/Unicode.cpp

Committer: Neil Jagdish Patel
Date: 2010-09-01 22:11:16 UTC
Revision ID: neil.patel@canonical.com-20100901221116-4hb351fcg6s5nka0

Initial Nux integration

files added:
Nux

Nux/AbstractBackground.cpp

Nux/AbstractBackground.h

Nux/AbstractButton.cpp

Nux/AbstractButton.h

Nux/AbstractComboBox.cpp

Nux/AbstractComboBox.h

Nux/AbstractPaintLayer.cpp

Nux/AbstractPaintLayer.h

Nux/AbstractSeparator.cpp

Nux/AbstractSeparator.h

Nux/AbstractThread.cpp

Nux/AbstractThread.h

Nux/ActionItem.cpp

Nux/ActionItem.h

Nux/AnimatedTextureArea.cpp

Nux/AnimatedTextureArea.h

Nux/BaseArea.cpp

Nux/BaseArea.h

Nux/BaseObject.cpp

Nux/BaseObject.h

Nux/BaseWindow.cpp

Nux/BaseWindow.h

Nux/BezierCurveControl.cpp

Nux/BezierCurveControl.h

Nux/BezierCurveControl2.cpp

Nux/BezierCurveControl2.h

Nux/Button.cpp

Nux/Button.h

Nux/CMakeLists.txt

Nux/Camera.cpp

Nux/Camera.h

Nux/CheckBox.cpp

Nux/CheckBox.h

Nux/ClientArea.cpp

Nux/ClientArea.h

Nux/ColorArea.cpp

Nux/ColorArea.h

Nux/ColorEditor.cpp

Nux/ColorEditor.h

Nux/ColorGradient.cpp

Nux/ColorGradient.h

Nux/ColorPickerDialog.cpp

Nux/ColorPickerDialog.h

Nux/ColorPreview.cpp

Nux/ColorPreview.h

Nux/ColorRangeValuator.cpp

Nux/ColorRangeValuator.h

Nux/ComboBoxComplex.cpp

Nux/ComboBoxComplex.h

Nux/ComboBoxSimple.cpp

Nux/ComboBoxSimple.h

Nux/CurveControl.cpp

Nux/CurveControl.h

Nux/Dialog.cpp

Nux/Dialog.h

Nux/DoubleValidator.cpp

Nux/DoubleValidator.h

Nux/EditTextBox.cpp

Nux/EditTextBox.h

Nux/FileSelector.cpp

Nux/FileSelector.h

Nux/FloatingWindow.cpp

Nux/FloatingWindow.h

Nux/FolderTreeItem.cpp

Nux/FolderTreeItem.h

Nux/FunctionGraph.cpp

Nux/FunctionGraph.h

Nux/GfxServer.cpp

Nux/GfxServer.h

Nux/GfxSimulation.cpp

Nux/GfxSimulation.h

Nux/GroupBox.cpp

Nux/GroupBox.h

Nux/GroupBox2.cpp

Nux/GroupBox2.h

Nux/HLayout.cpp

Nux/HLayout.h

Nux/HScrollBar.cpp

Nux/HScrollBar.h

Nux/HSeparator.cpp

Nux/HSeparator.h

Nux/HSplitter.cpp

Nux/HSplitter.h

Nux/HToolBar.cpp

Nux/HToolBar.h

Nux/HexRegExpValidator.cpp

Nux/HexRegExpValidator.h

Nux/Histogram.cpp

Nux/Histogram.h

Nux/HueRangeValuator.cpp

Nux/HueRangeValuator.h

Nux/IntegerValidator.cpp

Nux/IntegerValidator.h

Nux/InterfaceControl.cpp

Nux/InterfaceControl.h

Nux/KeyboardHandler.cpp

Nux/KeyboardHandler.h

Nux/Layout.cpp

Nux/Layout.h

Nux/ListControl.cpp

Nux/ListControl.h

Nux/Matrix3Editor.cpp

Nux/Matrix3Editor.h

Nux/Matrix3Preview.cpp

Nux/Matrix3Preview.h

Nux/Matrix4Editor.cpp

Nux/Matrix4Editor.h

Nux/Matrix4Preview.cpp

Nux/Matrix4Preview.h

Nux/MenuBar.cpp

Nux/MenuBar.h

Nux/MenuPage.cpp

Nux/MenuPage.h

Nux/MouseAreaCtrl.cpp

Nux/MouseAreaCtrl.h

Nux/MouseHandler.cpp

Nux/MouseHandler.h

Nux/NodeItem.cpp

Nux/NodeItem.h

Nux/NodeNetProtocol.cpp

Nux/NodeNetProtocol.h

Nux/NumericValuator.cpp

Nux/NumericValuator.h

Nux/Nux.cpp

Nux/Nux.h

Nux/NuxGlobalInitializer.cpp

Nux/NuxGlobalInitializer.h

Nux/PaintLayer.cpp

Nux/PaintLayer.h

Nux/Painter.cpp

Nux/Painter.h

Nux/Panel.cpp

Nux/Panel.h

Nux/PopUpWindow.cpp

Nux/PopUpWindow.h

Nux/PropertyItem

Nux/PropertyItem/CheckBoxProperty.cpp

Nux/PropertyItem/CheckBoxProperty.h

Nux/PropertyItem/ColorGradientPropertyItem.cpp

Nux/PropertyItem/ColorGradientPropertyItem.h

Nux/PropertyItem/ColorPreviewPropertyItem.cpp

Nux/PropertyItem/ColorPreviewPropertyItem.h

Nux/PropertyItem/ComboBoxListPropertyItem.cpp

Nux/PropertyItem/ComboBoxListPropertyItem.h

Nux/PropertyItem/ComboBoxPropertyItem.cpp

Nux/PropertyItem/ComboBoxPropertyItem.h

Nux/PropertyItem/DoubleValuatorPropertyItem.cpp

Nux/PropertyItem/DoubleValuatorPropertyItem.h

Nux/PropertyItem/EditTextLinePropertyItem.cpp

Nux/PropertyItem/EditTextLinePropertyItem.h

Nux/PropertyItem/FilePropertyItem.cpp

Nux/PropertyItem/FilePropertyItem.h

Nux/PropertyItem/IntegerValuatorPropertyItem.cpp

Nux/PropertyItem/IntegerValuatorPropertyItem.h

Nux/PropertyItem/Matrix3PreviewPropertyItem.cpp

Nux/PropertyItem/Matrix3PreviewPropertyItem.h

Nux/PropertyItem/Matrix4PreviewPropertyItem.cpp

Nux/PropertyItem/Matrix4PreviewPropertyItem.h

Nux/PropertyItem/RGBAProperty.cpp

Nux/PropertyItem/RGBAProperty.h

Nux/PropertyItem/RGBProperty.cpp

Nux/PropertyItem/RGBProperty.h

Nux/PropertyItem/RangeValueIntegerPropertyItem.cpp

Nux/PropertyItem/RangeValueIntegerPropertyItem.h

Nux/PropertyItem/RangeValuePropertyItem.cpp

Nux/PropertyItem/RangeValuePropertyItem.h

Nux/PropertyItem/SpinBoxDoublePropertyItem.cpp

Nux/PropertyItem/SpinBoxDoublePropertyItem.h

Nux/PropertyItem/SpinBoxPropertyItem.cpp

Nux/PropertyItem/SpinBoxPropertyItem.h

Nux/PropertyItem/SplineCurvePropertyItem.cpp

Nux/PropertyItem/SplineCurvePropertyItem.h

Nux/PropertyItem/Vector3Property.cpp

Nux/PropertyItem/Vector3Property.h

Nux/PropertyItem/Vector3ValuatorPropertyItem.cpp

Nux/PropertyItem/Vector3ValuatorPropertyItem.h

Nux/PropertyItem/Vector4Property.cpp

Nux/PropertyItem/Vector4Property.h

Nux/PropertyList.cpp

Nux/PropertyList.h

Nux/RGBValuator.cpp

Nux/RGBValuator.h

Nux/RadioButton.cpp

Nux/RadioButton.h

Nux/RadioButtonGroup.cpp

Nux/RadioButtonGroup.h

Nux/RangeValue.cpp

Nux/RangeValue.h

Nux/RangeValueInteger.cpp

Nux/RangeValueInteger.h

Nux/Readme.txt

Nux/RegExpValidators

Nux/ScrollBar.cpp

Nux/ScrollBar.h

Nux/ScrollView.cpp

Nux/ScrollView.h

Nux/SpinBox.cpp

Nux/SpinBox.h

Nux/SpinBoxDouble.cpp

Nux/SpinBoxDouble.h

Nux/SpinBox_Logic.cpp

Nux/SpinBox_Logic.h

Nux/SplineCurveDialog.cpp

Nux/SplineCurveDialog.h

Nux/SplineCurveEditor.cpp

Nux/SplineCurveEditor.h

Nux/SplineCurvePreview.cpp

Nux/SplineCurvePreview.h

Nux/StackManager.cpp

Nux/StackManager.h

Nux/StaticTextBox.cpp

Nux/StaticTextBox.h

Nux/SystemThread.cpp

Nux/SystemThread.h

Nux/TabView.cpp

Nux/TabView.h

Nux/TableCtrl.cpp

Nux/TableCtrl.h

Nux/TableItem.cpp

Nux/TableItem.h

Nux/TextView

Nux/TextView/Sequence.cpp

Nux/TextView/Sequence.h

Nux/TextView/TextDocument.cpp

Nux/TextView/TextDocument.h

Nux/TextView/TextView.cpp

Nux/TextView/TextView.h

Nux/TextView/TextViewFile.cpp

Nux/TextView/TextViewFont.cpp

Nux/TextView/TextViewInternal.h

Nux/TextView/TextViewKeyNav.cpp

Nux/TextView/TextViewMouse.cpp

Nux/TextView/TextViewPaint.cpp

Nux/TextView/TextViewScroll.cpp

Nux/TextView/TextViewSyntax.cpp

Nux/TextView/TextViewUsp.cpp

Nux/TextView/TextViewWidget.cpp

Nux/TextView/TextViewWidget.h

Nux/TextView/Unicode.c

Nux/TextView/Unicode.cpp

Nux/TextView/Unicode.h

Nux/TextView/codepages.h

Nux/TextView/racursor.h

Nux/TextViewGCC

Nux/TextViewGCC/Sequence.cpp

Nux/TextViewGCC/Sequence.h

Nux/TextViewGCC/TextDocument.cpp

Nux/TextViewGCC/TextDocument.h

Nux/TextViewGCC/TextView.cpp

Nux/TextViewGCC/TextView.h

Nux/TextViewGCC/TextViewFile.cpp

Nux/TextViewGCC/TextViewFont.cpp

Nux/TextViewGCC/TextViewInternal.h

Nux/TextViewGCC/TextViewKeyNav.cpp

Nux/TextViewGCC/TextViewMouse.cpp

Nux/TextViewGCC/TextViewPaint.cpp

Nux/TextViewGCC/TextViewScroll.cpp

Nux/TextViewGCC/TextViewSyntax.cpp

Nux/TextViewGCC/TextViewUsp.cpp

Nux/TextViewGCC/TextViewWidget.cpp

Nux/TextViewGCC/TextViewWidget.h

Nux/TextViewGCC/Unicode.c

Nux/TextViewGCC/Unicode.cpp

Nux/TextViewGCC/Unicode.h

Nux/TextViewGCC/codepages.h

Nux/TextViewGCC/racursor.h

Nux/TextureArea.cpp

Nux/TextureArea.h

Nux/Theme.cpp

Nux/Theme.h

Nux/TimeGraph.cpp

Nux/TimeGraph.h

Nux/TimerManager.cpp

Nux/TimerManager.h

Nux/TimerProc.cpp

Nux/TimerProc.h

Nux/ToolButton.cpp

Nux/ToolButton.h

Nux/TreeControl.cpp

Nux/TreeControl.h

Nux/Utils.cpp

Nux/Utils.h

Nux/VLayout.cpp

Nux/VLayout.h

Nux/VScrollBar.cpp

Nux/VScrollBar.h

Nux/VSeparator.cpp

Nux/VSeparator.h

Nux/VSplitter.cpp

Nux/VSplitter.h

Nux/VToolBar.cpp

Nux/VToolBar.h

Nux/Validator.cpp

Nux/Validator.h

Nux/Valuator.cpp

Nux/Valuator.h

Nux/ValuatorDouble.cpp

Nux/ValuatorDouble.h

Nux/ValuatorInt.cpp

Nux/ValuatorInt.h

Nux/Vector3Valuator.cpp

Nux/Vector3Valuator.h

Nux/Vector3ValuatorDouble.cpp

Nux/Vector3ValuatorDouble.h

Nux/WidgetMetrics.cpp

Nux/WidgetMetrics.h

Nux/WidgetSmartPointer.cpp

Nux/WidgetSmartPointer.h

Nux/WindowCompositor.cpp

Nux/WindowCompositor.h

Nux/WindowThread.cpp

Nux/WindowThread.h

Show diffs side-by-side

added added

removed removed

Nux/TextView/Unicode.cpp

* This program is free software: you can redistribute it and/or modify it

* under the terms of the GNU Lesser General Public License version 3, as

* published by the Free Software Foundation.

* This program is distributed in the hope that it will be useful, but

* WITHOUT ANY WARRANTY; without even the implied warranties of

* MERCHANTABILITY, SATISFACTORY QUALITY or FITNESS FOR A PARTICULAR

* PURPOSE. See the applicable version of the GNU Lesser General Public

* License for more details.

* You should have received a copy of both the GNU Lesser General Public

* License version 3 along with this program. If not, see

* <http://www.gnu.org/licenses/>

* Authored by: Jay Taoko <jay.taoko_AT_gmail_DOT_com>

#include "Nux.h"

#include "Unicode.h"

NAMESPACE_BEGIN_GUI

// utf8_to_utf32

// Converts a single codepoint in the specified UTF-8 stream of text

// into a UTF-32 value

// Illegal sequences are converted to the unicode replacement character

// utf8str - [in] buffer containing UTF-8 text

// utf8len - [in] number of code-units (bytes) available in buffer

// pch32 - [out] single UTF-32 value

// Returns number of bytes processed from utf8str

size_t utf8_to_utf32(t_UTF8 *utf8str, size_t utf8len, t_UTF32 *pch32)

{

t_UTF8 ch = *utf8str++;

t_UTF32 val32 = 0;

size_t trailing = 0;

size_t len = 1;

size_t i;

static t_UTF32 nonshortest[] =

{

0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff

};

// validate parameters

if(utf8str == 0 || utf8len <= 0 || pch32 == 0)

return 0;

// look for plain ASCII first as this is most likely

if(ch < 0x80)

{

*pch32 = (t_UTF32)ch;

return 1;

}

// LEAD-byte of 2-byte seq: 110xxxxx 10xxxxxx

else if((ch & 0xE0) == 0xC0)

{

trailing = 1;

val32 = ch & 0x1F;

}

// LEAD-byte of 3-byte seq: 1110xxxx 10xxxxxx 10xxxxxx

else if((ch & 0xF0) == 0xE0)

{

trailing = 2;

val32 = ch & 0x0F;

}

// LEAD-byte of 4-byte seq: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

else if((ch & 0xF8) == 0xF0)

{

trailing = 3;

val32 = ch & 0x07;

}

// ILLEGAL 5-byte seq: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

else if((ch & 0xFC) == 0xF8)

{

// range-checking the t_UTF32 result will catch this

trailing = 4;

val32 = ch & 0x03;

}

// ILLEGAL 6-byte seq: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

else if((ch & 0xFE) == 0xFC)

{

// range-checking the t_UTF32 result will catch this

trailing = 5;

val32 = ch & 0x01;

}

// ILLEGAL continuation (trailing) byte by itself

else if((ch & 0xC0) == 0x80)

{

100

*pch32 = UNI_REPLACEMENT_CHAR;

101

return 1;

102

}

103

// any other ILLEGAL form.

104

else

105

{

106

*pch32 = UNI_REPLACEMENT_CHAR;

107

return 1;

108

}

109

110

// process trailing bytes

111

for(i = 0; i < trailing && len < utf8len; i++)

112

{

113

ch = *utf8str++;

114

115

// Valid trail-byte: 10xxxxxx

116

if((ch & 0xC0) == 0x80)

117

{

118

val32 = (val32 << 6) + (ch & 0x7f);

119

len++;

120

}

121

// Anything else is an error

122

else

123

{

124

*pch32 = UNI_REPLACEMENT_CHAR;

125

return len;

126

}

127

}

128

129

// did we decode a full utf-8 sequence?

130

if(val32 < nonshortest[trailing] || i != trailing)

131

*pch32 = UNI_REPLACEMENT_CHAR;

132

else

133

*pch32 = val32;

134

135

return len;

136

}

137

138

139

// utf32_to_utf8

140

141

// Converts the specified UTF-32 value to UTF-8

142

143

// ch32 - [in] single utf-32 value

144

// utf8str - [out] buffer to receive UTF-8 text

145

// utf8len - [in] size of utf8 buffer in bytes

146

147

// Returns number of bytes stored in utf8str

148

149

size_t utf32_to_utf8(t_UTF8 *utf8str, size_t utf8len, t_UTF32 ch32)

150

{

151

size_t len = 0;

152

153

// validate parameters

154

if(utf8str == 0 || utf8len == 0)

155

return 0;

156

157

// ASCII is the easiest

158

if(ch32 < 0x80)

159

{

160

*utf8str = (t_UTF8)ch32;

161

return 1;

162

}

163

164

// make sure we have a legal utf32 char

165

if(ch32 > UNI_MAX_LEGAL_UTF32)

166

ch32 = UNI_REPLACEMENT_CHAR;

167

168

// cannot encode the surrogate range

169

if(ch32 >= UNI_SUR_HIGH_START && ch32 <= UNI_SUR_LOW_END)

170

ch32 = UNI_REPLACEMENT_CHAR;

171

172

// 2-byte sequence

173

if(ch32 < 0x800 && utf8len >= 2)

174

{

175

*utf8str++ = (t_UTF8)((ch32 >> 6) | 0xC0);

176

*utf8str++ = (t_UTF8)((ch32 & 0x3f) | 0x80);

177

len = 2;

178

}

179

// 3-byte sequence

180

else if(ch32 < 0x10000 && utf8len >= 3)

181

{

182

*utf8str++ = (t_UTF8)((ch32 >> 12) | 0xE0);

183

*utf8str++ = (t_UTF8)((ch32 >> 6) & 0x3f | 0x80);

184

*utf8str++ = (t_UTF8)((ch32 & 0x3f) | 0x80);

185

len = 3;

186

}

187

// 4-byte sequence

188

else if(ch32 <= UNI_MAX_LEGAL_UTF32 && utf8len >= 4)

189

{

190

*utf8str++ = (t_UTF8)((ch32 >> 18) | 0xF0);

191

*utf8str++ = (t_UTF8)((ch32 >> 12) & 0x3f | 0x80);

192

*utf8str++ = (t_UTF8)((ch32 >> 6) & 0x3f | 0x80);

193

*utf8str++ = (t_UTF8)((ch32 & 0x3f) | 0x80);

194

len = 4;

195

}

196

197

// 5/6 byte sequences never occur because we limit using UNI_MAX_LEGAL_UTF32

198

199

return len;

200

}

201

202

203

// utf8_to_utf16

204

205

// Convert the specified UTF-8 stream of text to UTF-16

206

207

// 1. The maximum number possible of whole UTF-16 characters are stored in wstr

208

// 2. Illegal sequences are converted to the unicode replacement character

209

// 3. Returns the number of bytes processeed from utf8str

210

211

// utf8str - [in] buffer containing utf-8 text

212

// utf8len - [in] number of code-units (bytes) in buffer

213

// utf16str - [out] receives resulting utf-16 text

214

// utf16len - [in/out] on input, specifies the size (in UTF16s) of utf16str

215

// on output, holds actual number of UTF16s stored in utf16str

216

217

// Returns the number of bytes processed from utf8str

218

219

size_t utf8_to_utf16(t_UTF8 *utf8str, size_t utf8len, t_UTF16 *utf16str, size_t *utf16len)

220

{

221

t_UTF16 *utf16start = utf16str;

222

t_UTF8 *utf8start = utf8str;

223

224

size_t len;

225

size_t tmp16len;

226

t_UTF32 ch32;

227

228

while(utf8len > 0 && *utf16len > 0)

229

{

230

// convert to utf-32

231

len = utf8_to_utf32(utf8str, utf8len, &ch32);

232

utf8str += len;

233

utf8len -= len;

234

235

// convert to utf-16

236

tmp16len = *utf16len;

237

len = utf32_to_utf16(&ch32, 1, utf16str, &tmp16len);

238

utf16str += len;

239

(*utf16len) -= len;

240

}

241

242

*utf16len = utf16str - utf16start;

243

return utf8str - utf8start;

244

}

245

246

247

// utf16_to_utf8

248

249

// Convert the specified UTF-16 stream of text to UTF-8

250

251

// 1. As many whole codepoints as possible are stored in utf8str

252

// 2. Illegal sequences are converted to the unicode replacement character

253

254

// utf16str - [in] buffer containing utf-16 text

255

// utf16len - [in] number of code-units (UTF16s) in buffer

256

// utf8str - [out] receives resulting utf-8 text

257

// utf8len - [in/out] on input, specifies the size (in bytes) of utf8str

258

// on output, holds actual number of bytes stored in utf8str

259

260

// Returns the number of characters (UTF16s) processed from utf16str

261

262

size_t utf16_to_utf8(t_UTF16 *utf16str, size_t utf16len, t_UTF8 *utf8str, size_t *utf8len)

263

{

264

t_UTF16 * utf16start = utf16str;

265

t_UTF8 * utf8start = utf8str;

266

size_t len;

267

t_UTF32 ch32;

268

size_t ch32len;

269

270

while(utf16len > 0 && *utf8len > 0)

271

{

272

// convert to utf-32

273

ch32len = 1;

274

len = utf16_to_utf32(utf16str, utf16len, &ch32, &ch32len);

275

utf16str += len;

276

utf16len -= len;

277

278

// convert to utf-8

279

len = utf32_to_utf8(utf8str, *utf8len, ch32);

280

utf8str += len;

281

(*utf8len) -= len;

282

}

283

284

*utf8len = utf8str - utf8start;

285

return utf16str - utf16start;

286

}

287

288

289

// ascii_to_utf16

290

291

// Converts plain ASCII string to UTF-16

292

293

// asciistr - [in] buffer containing ASCII characters

294

// asciilen - [in] number of characters in buffer

295

// utf16str - [out] receives the resulting UTF-16 text

296

// utf16len - [in/out] on input, specifies length of utf16 buffer,

297

// on output, holds number of chars stored in utf16str

298

299

// Returns number of characters processed from asciistr

300

301

size_t ascii_to_utf16(t_UTF8 *asciistr, size_t asciilen, t_UTF16 *utf16str, size_t *utf16len)

302

{

303

size_t len = Min(*utf16len, asciilen);

304

305

MultiByteToWideChar(CP_ACP, 0, (CCHAR*)asciistr, len, (WCHAR *)utf16str, len);

306

*utf16len = len;

307

return len;

308

}

309

310

311

// utf16_to_ascii

312

313

// Converts UTF-16 to plain ASCII (lossy)

314

315

// utf16str - [in] buffer containing t_UTF16 characters

316

// utf16len - [in] number of WCHARs in buffer

317

// asciistr - [out] receives the resulting UTF-16 text

318

// asciilen - [in/out] on input, specifies length of ascii buffer,

319

// on output, holds number of chars stored in asciistr

320

321

// Returns number of characters processed from utf16str

322

323

size_t utf16_to_ascii(t_UTF16 *utf16str, size_t utf16len, t_UTF8 *asciistr, size_t *asciilen)

324

{

325

size_t len = Min(utf16len, *asciilen);

326

327

WideCharToMultiByte(CP_ACP, 0, INL_REINTERPRET_CAST(LPCWSTR, utf16str), len, (LPSTR)asciistr, *asciilen, 0, 0);

328

*asciilen = len;

329

return len;

330

}

331

332

333

// copy_utf8

334

335

// Copies UTF-8 string from src to dest

336

337

// src - [in] buffer containing utf-8 text

338

// srclen - [in] number of code-units in src

339

// dest - [out] receives resulting string

340

// destlen - [in/out] on input, specifies length of dest buffer

341

// on output, holds number of UTF8s stored in dest

342

343

// returns number of CHARs processed from src

344

345

size_t copy_utf8(t_UTF8 *src, size_t srclen, t_UTF8 *dest, size_t *destlen)

346

{

347

size_t len = Min(*destlen, srclen);

348

memcpy(dest, src, len * sizeof(t_UTF8));

349

350

*destlen = len;

351

return len;

352

}

353

354

355

// copy_utf16

356

357

// Copies UTF-16 string from src to dest

358

359

// src - [in] buffer containing utf-16 text

360

// srclen - [in] number of code-units in src

361

// dest - [out] receives resulting string

362

// destlen - [in/out] on input, specifies length of dest buffer

363

// on output, holds number of UTF16s stored in dest

364

365

// returns number of WCHARs processed from src

366

367

size_t copy_utf16(t_UTF16 *src, size_t srclen, t_UTF16 *dest, size_t *destlen)

368

{

369

size_t len = Min(*destlen, srclen);

370

memcpy(dest, src, len * sizeof(t_UTF16));

371

372

*destlen = len;

373

return len;

374

}

375

376

377

// swap_utf16

378

379

// Copies UTF-16 string from src to dest, performing endianess swap

380

// for each code-unit

381

382

// src - [in] buffer containing utf-16 text

383

// srclen - [in] number of code-units in src

384

// dest - [out] receives resulting word-swapped string

385

// destlen - [in/out] on input, specifies length of dest buffer

386

// on output, holds number of UTF16s stored in dest

387

388

// Returns number of WCHARs processed from src

389

390

size_t swap_utf16(t_UTF16 *src, size_t srclen, t_UTF16 *dest, size_t *destlen)

391

{

392

size_t len = Min(*destlen, srclen);

393

size_t i;

394

395

for(i = 0; i < len; i++)

396

dest[i] = SWAPWORD(src[i]);

397

398

*destlen = len;

399

return len;

400

}

401

402

403

// utf32_to_utf16

404

405

// Converts the specified UTF-32 stream of text to UTF-16

406

407

// utf32str - [in] buffer containing utf-32 text

408

// utf32len - [in] number of characters (UTF32s) in utf32str

409

// utf16str - [out] receives resulting utf-16 text

410

// utf16len - [in/out] on input, specifies the size (in UTF16s) of utf16str

411

// on output, holds actual number of t_UTF16 values stored in utf16str

412

413

// returns number of UTF32s processed from utf32str

414

415

size_t utf32_to_utf16(t_UTF32 *utf32str, size_t utf32len, t_UTF16 *utf16str, size_t *utf16len)

416

{

417

t_UTF16 *utf16start = utf16str;

418

t_UTF32 *utf32start = utf32str;

419

420

while(utf32len > 0 && *utf16len > 0)

421

{

422

t_UTF32 ch32 = *utf32str++;

423

utf32len--;

424

425

// target is a character <= 0xffff

426

if(ch32 < 0xfffe)

427

{

428

// make sure we don't represent anything in t_UTF16 surrogate range

429

// (this helps protect against non-shortest forms)

430

if(ch32 >= UNI_SUR_HIGH_START && ch32 <= UNI_SUR_LOW_END)

431

{

432

*utf16str++ = UNI_REPLACEMENT_CHAR;

433

(*utf16len)--;

434

}

435

else

436

{

437

*utf16str++ = (WORD)ch32;

438

(*utf16len)--;

439

}

440

}

441

// FFFE and FFFF are illegal mid-stream

442

else if(ch32 == 0xfffe || ch32 == 0xffff)

443

{

444

*utf16str++ = UNI_REPLACEMENT_CHAR;

445

(*utf16len)--;

446

}

447

// target is illegal Unicode value

448

else if(ch32 > UNI_MAX_UTF16)

449

{

450

*utf16str++ = UNI_REPLACEMENT_CHAR;

451

(*utf16len)--;

452

}

453

// target is in range 0xffff - 0x10ffff

454

else if(*utf16len >= 2)

455

{

456

ch32 -= 0x0010000;

457

458

*utf16str++ = (WORD)((ch32 >> 10) + UNI_SUR_HIGH_START);

459

*utf16str++ = (WORD)((ch32 & 0x3ff) + UNI_SUR_LOW_START);

460

461

(*utf16len)-=2;

462

}

463

else

464

{

465

// no room to store result

466

break;

467

}

468

}

469

470

*utf16len = utf16str - utf16start;

471

return utf32str - utf32start;

472

}

473

474

475

// utf16_to_utf32

476

477

// Converts the specified UTF-16 stream of text to UTF-32

478

479

// utf16str - [in] buffer containing utf-16 text

480

// utf16len - [in] number of code-units (UTF16s) in utf16str

481

// utf32str - [out] receives resulting utf-32 text

482

// utf32len - [in/out] on input, specifies the size (in UTF32s) of utf32str

483

// on output, holds actual number of t_UTF32 values stored in utf32str

484

485

// returns number of UTF16s processed from utf16str

486

487

size_t utf16_to_utf32(t_UTF16 *utf16str, size_t utf16len, t_UTF32 *utf32str, size_t *utf32len)

488

{

489

t_UTF16 *utf16start = utf16str;

490

t_UTF32 *utf32start = utf32str;

491

492

while(utf16len > 0 && *utf32len > 0)

493

{

494

t_UTF32 ch = *utf16str;

495

496

// first of a surrogate pair?

497

if(ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END && utf16len >= 2)

498

{

499

// get the second half of the pair

500

t_UTF32 ch2 = *(utf16str + 1);

501

502

// valid trailing surrogate unit?

503

if(ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)

504

{

505

ch = ((ch - UNI_SUR_HIGH_START) << 10) +

506

((ch2 - UNI_SUR_LOW_START) + 0x00010000);

507

508

utf16str++;

509

utf16len--;

510

}

511

// illegal character

512

else

513

{

514

ch = UNI_REPLACEMENT_CHAR;

515

}

516

}

517

518

*utf32str++ = ch;

519

(*utf32len)--;

520

521

utf16str++;

522

utf16len--;

523

}

524

525

*utf32len = utf32str - utf32start;

526

return utf16str - utf16start;

527

}

528

529

530

// utf16be_to_utf32

531

532

// Converts the specified big-endian UTF-16 stream of text to UTF-32

533

534

// utf16str - [in] buffer containing utf-16 big-endian text

535

// utf16len - [in] number of code-units (UTF16s) in utf16str

536

// utf32str - [out] receives resulting utf-32 text

537

// utf32len - [in/out] on input, specifies the size (in UTF32s) of utf32str

538

// on output, holds actual number of t_UTF32 values stored in utf32str

539

540

// returns number of UTF16s processed from utf16str

541

542

size_t utf16be_to_utf32(t_UTF16 *utf16str, size_t utf16len, t_UTF32 *utf32str, size_t *utf32len)

543

{

544

t_UTF16 *utf16start = utf16str;

545

t_UTF32 *utf32start = utf32str;

546

547

while(utf16len > 0 && *utf32len > 0)

548

{

549

t_UTF32 ch = SWAPWORD(*utf16str);

550

551

// first of a surrogate pair?

552

if(ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END && utf16len >= 2)

553

{

554

t_UTF32 ch2 = SWAPWORD(*(utf16str + 1));

555

556

// valid trailing surrogate unit?

557

if(ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)

558

{

559

ch = ((ch - UNI_SUR_HIGH_START) << 10) +

560

((ch2 - UNI_SUR_LOW_START) + 0x00010000);

561

562

utf16str++;

563

utf16len--;

564

}

565

// illegal character

566

else

567

{

568

ch = UNI_REPLACEMENT_CHAR;

569

}

570

}

571

572

*utf32str++ = ch;

573

(*utf32len)--;

574

575

utf16str++;

576

utf16len--;

577

}

578

579

*utf32len = utf32str - utf32start;

580

return utf16str - utf16start;

581

}

582

583

NAMESPACE_END_GUI

Older »