~ubuntu-branches/ubuntu/hardy/ntfs-3g/hardy

« back to all changes in this revision

Viewing changes to libntfs-3g/unistr.c

Committer: Bazaar Package Importer
Author(s): Florent Mertens
Date: 2006-09-27 12:00:49 UTC
Revision ID: james.westby@ubuntu.com-20060927120049-1hk9p6a42k58cv55

Tags: upstream-20060920

Import upstream version 20060920

files added:

AUTHORS

COPYING

CREDITS

ChangeLog

INSTALL

Makefile.am

Makefile.in

NEWS

README

aclocal.m4

autogen.sh

compile

config.guess

config.h.in

config.sub

configure

configure.ac

depcomp

include

include/Makefile.am

include/Makefile.in

include/ntfs-3g

include/ntfs-3g/Makefile.am

include/ntfs-3g/Makefile.in

include/ntfs-3g/attrib.h

include/ntfs-3g/attrlist.h

include/ntfs-3g/bitmap.h

include/ntfs-3g/bootsect.h

include/ntfs-3g/collate.h

include/ntfs-3g/compat.h

include/ntfs-3g/compress.h

include/ntfs-3g/debug.h

include/ntfs-3g/device.h

include/ntfs-3g/device_io.h

include/ntfs-3g/dir.h

include/ntfs-3g/endians.h

include/ntfs-3g/index.h

include/ntfs-3g/inode.h

include/ntfs-3g/layout.h

include/ntfs-3g/lcnalloc.h

include/ntfs-3g/list.h

include/ntfs-3g/logfile.h

include/ntfs-3g/logging.h

include/ntfs-3g/mft.h

include/ntfs-3g/misc.h

include/ntfs-3g/mst.h

include/ntfs-3g/ntfstime.h

include/ntfs-3g/runlist.h

include/ntfs-3g/security.h

include/ntfs-3g/support.h

include/ntfs-3g/types.h

include/ntfs-3g/unistr.h

include/ntfs-3g/version.h

include/ntfs-3g/volume.h

install-sh

libntfs-3g

libntfs-3g/Makefile.am

libntfs-3g/Makefile.in

libntfs-3g/attrib.c

libntfs-3g/attrlist.c

libntfs-3g/bitmap.c

libntfs-3g/bootsect.c

libntfs-3g/collate.c

libntfs-3g/compat.c

libntfs-3g/compress.c

libntfs-3g/debug.c

libntfs-3g/device.c

libntfs-3g/device_io.c

libntfs-3g/dir.c

libntfs-3g/index.c

libntfs-3g/inode.c

libntfs-3g/lcnalloc.c

libntfs-3g/logfile.c

libntfs-3g/logging.c

libntfs-3g/mft.c

libntfs-3g/misc.c

libntfs-3g/mst.c

libntfs-3g/runlist.c

libntfs-3g/security.c

libntfs-3g/unistr.c

libntfs-3g/unix_io.c

libntfs-3g/version.c

libntfs-3g/volume.c

libntfs-3g/win32_io.c

ltmain.sh

missing

src/Makefile.am

src/Makefile.in

src/ntfs-3g.8.in

src/ntfs-3g.c

src/utils.c

src/utils.h

Show diffs side-by-side

added added

removed removed

libntfs-3g/unistr.c

/**

* unistr.c - Unicode string handling. Part of the Linux-NTFS project.

* This program/include file is free software; you can redistribute it and/or

* modify it under the terms of the GNU General Public License as published

* by the Free Software Foundation; either version 2 of the License, or

* (at your option) any later version.

* This program/include file is distributed in the hope that it will be

* useful, but WITHOUT ANY WARRANTY; without even the implied warranty

* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with this program (in the main directory of the Linux-NTFS

* distribution in the file COPYING); if not, write to the Free Software

* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

#ifdef HAVE_CONFIG_H

#include "config.h"

#endif

#ifdef HAVE_STDIO_H

#include <stdio.h>

#endif

#ifdef HAVE_STDLIB_H

#include <stdlib.h>

#endif

#ifdef HAVE_WCHAR_H

#include <wchar.h>

#endif

#ifdef HAVE_STRING_H

#include <string.h>

#endif

#ifdef HAVE_ERRNO_H

#include <errno.h>

#endif

#include "attrib.h"

#include "types.h"

#include "unistr.h"

#include "debug.h"

#include "logging.h"

#include "misc.h"

* IMPORTANT

* =========

* All these routines assume that the Unicode characters are in little endian

* encoding inside the strings!!!

* This is used by the name collation functions to quickly determine what

* characters are (in)valid.

#if 0

static const u8 legal_ansi_char_array[0x40] = {

0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,

0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,

0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,

0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,

0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,

0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,

};

#endif

/**

* ntfs_names_are_equal - compare two Unicode names for equality

* @s1: name to compare to @s2

* @s1_len: length in Unicode characters of @s1

* @s2: name to compare to @s1

* @s2_len: length in Unicode characters of @s2

* @ic: ignore case bool

* @upcase: upcase table (only if @ic == IGNORE_CASE)

* @upcase_size: length in Unicode characters of @upcase (if present)

* Compare the names @s1 and @s2 and return TRUE (1) if the names are

* identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,

* the @upcase table is used to perform a case insensitive comparison.

BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,

const ntfschar *s2, size_t s2_len,

const IGNORE_CASE_BOOL ic,

const ntfschar *upcase, const u32 upcase_size)

{

if (s1_len != s2_len)

return FALSE;

if (!s1_len)

100

return TRUE;

101

if (ic == CASE_SENSITIVE)

102

return ntfs_ucsncmp(s1, s2, s1_len) ? FALSE: TRUE;

103

return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? FALSE:

104

TRUE;

105

}

106

107

/**

108

* ntfs_names_collate - collate two Unicode names

109

* @name1: first Unicode name to compare

110

* @name1_len: length of first Unicode name to compare

111

* @name2: second Unicode name to compare

112

* @name2_len: length of second Unicode name to compare

113

* @err_val: if @name1 contains an invalid character return this value

114

* @ic: either CASE_SENSITIVE or IGNORE_CASE

115

* @upcase: upcase table (ignored if @ic is CASE_SENSITIVE)

116

* @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE)

117

118

* ntfs_names_collate() collates two Unicode names and returns:

119

120

* -1 if the first name collates before the second one,

121

* 0 if the names match,

122

* 1 if the second name collates before the first one, or

123

* @err_val if an invalid character is found in @name1 during the comparison.

124

125

* The following characters are considered invalid: '"', '*', '<', '>' and '?'.

126

127

int ntfs_names_collate(const ntfschar *name1, const u32 name1_len,

128

const ntfschar *name2, const u32 name2_len,

129

const int err_val __attribute__((unused)),

130

const IGNORE_CASE_BOOL ic, const ntfschar *upcase,

131

const u32 upcase_len)

132

{

133

u32 cnt;

134

ntfschar c1, c2;

135

136

#ifdef DEBUG

137

if (!name1 || !name2 || (ic && (!upcase || !upcase_len))) {

138

ntfs_log_debug("ntfs_names_collate received NULL pointer!\n");

139

exit(1);

140

}

141

#endif

142

for (cnt = 0; cnt < min(name1_len, name2_len); ++cnt) {

143

c1 = le16_to_cpu(*name1);

144

name1++;

145

c2 = le16_to_cpu(*name2);

146

name2++;

147

if (ic) {

148

if (c1 < upcase_len)

149

c1 = le16_to_cpu(upcase[c1]);

150

if (c2 < upcase_len)

151

c2 = le16_to_cpu(upcase[c2]);

152

}

153

#if 0

154

if (c1 < 64 && legal_ansi_char_array[c1] & 8)

155

return err_val;

156

#endif

157

if (c1 < c2)

158

return -1;

159

if (c1 > c2)

160

return 1;

161

}

162

if (name1_len < name2_len)

163

return -1;

164

if (name1_len == name2_len)

165

return 0;

166

/* name1_len > name2_len */

167

#if 0

168

c1 = le16_to_cpu(*name1);

169

if (c1 < 64 && legal_ansi_char_array[c1] & 8)

170

return err_val;

171

#endif

172

return 1;

173

}

174

175

/**

176

* ntfs_ucsncmp - compare two little endian Unicode strings

177

* @s1: first string

178

* @s2: second string

179

* @n: maximum unicode characters to compare

180

181

* Compare the first @n characters of the Unicode strings @s1 and @s2,

182

* The strings in little endian format and appropriate le16_to_cpu()

183

* conversion is performed on non-little endian machines.

184

185

* The function returns an integer less than, equal to, or greater than zero

186

* if @s1 (or the first @n Unicode characters thereof) is found, respectively,

187

* to be less than, to match, or be greater than @s2.

188

189

int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)

190

{

191

ntfschar c1, c2;

192

size_t i;

193

194

#ifdef DEBUG

195

if (!s1 || !s2) {

196

ntfs_log_debug("ntfs_wcsncmp() received NULL pointer!\n");

197

exit(1);

198

}

199

#endif

200

for (i = 0; i < n; ++i) {

201

c1 = le16_to_cpu(s1[i]);

202

c2 = le16_to_cpu(s2[i]);

203

if (c1 < c2)

204

return -1;

205

if (c1 > c2)

206

return 1;

207

if (!c1)

208

break;

209

}

210

return 0;

211

}

212

213

/**

214

* ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case

215

* @s1: first string

216

* @s2: second string

217

* @n: maximum unicode characters to compare

218

* @upcase: upcase table

219

* @upcase_size: upcase table size in Unicode characters

220

221

* Compare the first @n characters of the Unicode strings @s1 and @s2,

222

* ignoring case. The strings in little endian format and appropriate

223

* le16_to_cpu() conversion is performed on non-little endian machines.

224

225

* Each character is uppercased using the @upcase table before the comparison.

226

227

* The function returns an integer less than, equal to, or greater than zero

228

* if @s1 (or the first @n Unicode characters thereof) is found, respectively,

229

* to be less than, to match, or be greater than @s2.

230

231

int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,

232

const ntfschar *upcase, const u32 upcase_size)

233

{

234

ntfschar c1, c2;

235

size_t i;

236

237

#ifdef DEBUG

238

if (!s1 || !s2 || !upcase) {

239

ntfs_log_debug("ntfs_wcsncasecmp() received NULL pointer!\n");

240

exit(1);

241

}

242

#endif

243

for (i = 0; i < n; ++i) {

244

if ((c1 = le16_to_cpu(s1[i])) < upcase_size)

245

c1 = le16_to_cpu(upcase[c1]);

246

if ((c2 = le16_to_cpu(s2[i])) < upcase_size)

247

c2 = le16_to_cpu(upcase[c2]);

248

if (c1 < c2)

249

return -1;

250

if (c1 > c2)

251

return 1;

252

if (!c1)

253

break;

254

}

255

return 0;

256

}

257

258

/**

259

* ntfs_ucsnlen - determine the length of a little endian Unicode string

260

* @s: pointer to Unicode string

261

* @maxlen: maximum length of string @s

262

263

* Return the number of Unicode characters in the little endian Unicode

264

* string @s up to a maximum of maxlen Unicode characters, not including

265

* the terminating (ntfschar)'\0'. If there is no (ntfschar)'\0' between @s

266

* and @s + @maxlen, @maxlen is returned.

267

268

* This function never looks beyond @s + @maxlen.

269

270

u32 ntfs_ucsnlen(const ntfschar *s, u32 maxlen)

271

{

272

u32 i;

273

274

for (i = 0; i < maxlen; i++) {

275

if (!le16_to_cpu(s[i]))

276

break;

277

}

278

return i;

279

}

280

281

/**

282

* ntfs_ucsndup - duplicate little endian Unicode string

283

* @s: pointer to Unicode string

284

* @maxlen: maximum length of string @s

285

286

* Return a pointer to a new little endian Unicode string which is a duplicate

287

* of the string s. Memory for the new string is obtained with ntfs_malloc(3),

288

* and can be freed with free(3).

289

290

* A maximum of @maxlen Unicode characters are copied and a terminating

291

* (ntfschar)'\0' little endian Unicode character is added.

292

293

* This function never looks beyond @s + @maxlen.

294

295

* Return a pointer to the new little endian Unicode string on success and NULL

296

* on failure with errno set to the error code.

297

298

ntfschar *ntfs_ucsndup(const ntfschar *s, u32 maxlen)

299

{

300

ntfschar *dst;

301

u32 len;

302

303

len = ntfs_ucsnlen(s, maxlen);

304

dst = ntfs_malloc((len + 1) * sizeof(ntfschar));

305

if (dst) {

306

memcpy(dst, s, len * sizeof(ntfschar));

307

dst[len] = cpu_to_le16(L'\0');

308

}

309

return dst;

310

}

311

312

/**

313

* ntfs_name_upcase - Map an Unicode name to its uppercase equivalent

314

* @name:

315

* @name_len:

316

* @upcase:

317

* @upcase_len:

318

319

* Description...

320

321

* Returns:

322

323

void ntfs_name_upcase(ntfschar *name, u32 name_len, const ntfschar *upcase,

324

const u32 upcase_len)

325

{

326

u32 i;

327

ntfschar u;

328

329

for (i = 0; i < name_len; i++)

330

if ((u = le16_to_cpu(name[i])) < upcase_len)

331

name[i] = upcase[u];

332

}

333

334

/**

335

* ntfs_file_value_upcase - Convert a filename to upper case

336

* @file_name_attr:

337

* @upcase:

338

* @upcase_len:

339

340

* Description...

341

342

* Returns:

343

344

void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr,

345

const ntfschar *upcase, const u32 upcase_len)

346

{

347

ntfs_name_upcase((ntfschar*)&file_name_attr->file_name,

348

file_name_attr->file_name_length, upcase, upcase_len);

349

}

350

351

/**

352

* ntfs_file_values_compare - Which of two filenames should be listed first

353

* @file_name_attr1:

354

* @file_name_attr2:

355

* @err_val:

356

* @ic:

357

* @upcase:

358

* @upcase_len:

359

360

* Description...

361

362

* Returns:

363

364

int ntfs_file_values_compare(const FILE_NAME_ATTR *file_name_attr1,

365

const FILE_NAME_ATTR *file_name_attr2,

366

const int err_val, const IGNORE_CASE_BOOL ic,

367

const ntfschar *upcase, const u32 upcase_len)

368

{

369

return ntfs_names_collate((ntfschar*)&file_name_attr1->file_name,

370

file_name_attr1->file_name_length,

371

(ntfschar*)&file_name_attr2->file_name,

372

file_name_attr2->file_name_length,

373

err_val, ic, upcase, upcase_len);

374

}

375

376

/**

377

* ntfs_ucstombs - convert a little endian Unicode string to a multibyte string

378

* @ins: input Unicode string buffer

379

* @ins_len: length of input string in Unicode characters

380

* @outs: on return contains the (allocated) output multibyte string

381

* @outs_len: length of output buffer in bytes

382

383

* Convert the input little endian, 2-byte Unicode string @ins, of length

384

* @ins_len into the multibyte string format dictated by the current locale.

385

386

* If *@outs is NULL, the function allocates the string and the caller is

387

* responsible for calling free(*@outs); when finished with it.

388

389

* On success the function returns the number of bytes written to the output

390

* string *@outs (>= 0), not counting the terminating NULL byte. If the output

391

* string buffer was allocated, *@outs is set to it.

392

393

* On error, -1 is returned, and errno is set to the error code. The following

394

* error codes can be expected:

395

* EINVAL Invalid arguments (e.g. @ins or @outs is NULL).

396

* EILSEQ The input string cannot be represented as a multibyte

397

* sequence according to the current locale.

398

* ENAMETOOLONG Destination buffer is too small for input string.

399

* ENOMEM Not enough memory to allocate destination buffer.

400

401

int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs,

402

int outs_len)

403

{

404

char *mbs;

405

wchar_t wc;

406

int i, o, mbs_len;

407

int cnt = 0;

408

#ifdef HAVE_MBSINIT

409

mbstate_t mbstate;

410

#endif

411

412

if (!ins || !outs) {

413

errno = EINVAL;

414

return -1;

415

}

416

mbs = *outs;

417

mbs_len = outs_len;

418

if (mbs && !mbs_len) {

419

errno = ENAMETOOLONG;

420

return -1;

421

}

422

if (!mbs) {

423

mbs_len = (ins_len + 1) * MB_CUR_MAX;

424

mbs = ntfs_malloc(mbs_len);

425

if (!mbs)

426

return -1;

427

}

428

#ifdef HAVE_MBSINIT

429

memset(&mbstate, 0, sizeof(mbstate));

430

#else

431

wctomb(NULL, 0);

432

#endif

433

for (i = o = 0; i < ins_len; i++) {

434

/* Reallocate memory if necessary or abort. */

435

if ((int)(o + MB_CUR_MAX) > mbs_len) {

436

char *tc;

437

if (mbs == *outs) {

438

errno = ENAMETOOLONG;

439

return -1;

440

}

441

tc = ntfs_malloc((mbs_len + 64) & ~63);

442

if (!tc)

443

goto err_out;

444

memcpy(tc, mbs, mbs_len);

445

mbs_len = (mbs_len + 64) & ~63;

446

free(mbs);

447

mbs = tc;

448

}

449

/* Convert the LE Unicode character to a CPU wide character. */

450

wc = (wchar_t)le16_to_cpu(ins[i]);

451

if (!wc)

452

break;

453

/* Convert the CPU endian wide character to multibyte. */

454

#ifdef HAVE_MBSINIT

455

cnt = wcrtomb(mbs + o, wc, &mbstate);

456

#else

457

cnt = wctomb(mbs + o, wc);

458

#endif

459

if (cnt == -1)

460

goto err_out;

461

if (cnt <= 0) {

462

ntfs_log_debug("Eeek. cnt <= 0, cnt = %i\n", cnt);

463

errno = EINVAL;

464

goto err_out;

465

}

466

o += cnt;

467

}

468

#ifdef HAVE_MBSINIT

469

/* Make sure we are back in the initial state. */

470

if (!mbsinit(&mbstate)) {

471

ntfs_log_debug("Eeek. mbstate not in initial state!\n");

472

errno = EILSEQ;

473

goto err_out;

474

}

475

#endif

476

/* Now write the NULL character. */

477

mbs[o] = '\0';

478

if (*outs != mbs)

479

*outs = mbs;

480

return o;

481

err_out:

482

if (mbs != *outs) {

483

int eo = errno;

484

free(mbs);

485

errno = eo;

486

}

487

return -1;

488

}

489

490

/**

491

* ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string

492

* @ins: input multibyte string buffer

493

* @outs: on return contains the (allocated) output Unicode string

494

* @outs_len: length of output buffer in Unicode characters

495

496

* Convert the input multibyte string @ins, from the current locale into the

497

* corresponding little endian, 2-byte Unicode string.

498

499

* If *@outs is NULL, the function allocates the string and the caller is

500

* responsible for calling free(*@outs); when finished with it.

501

502

* On success the function returns the number of Unicode characters written to

503

* the output string *@outs (>= 0), not counting the terminating Unicode NULL

504

* character. If the output string buffer was allocated, *@outs is set to it.

505

506

* On error, -1 is returned, and errno is set to the error code. The following

507

* error codes can be expected:

508

* EINVAL Invalid arguments (e.g. @ins or @outs is NULL).

509

* EILSEQ The input string cannot be represented as a Unicode

510

* string according to the current locale.

511

* ENAMETOOLONG Destination buffer is too small for input string.

512

* ENOMEM Not enough memory to allocate destination buffer.

513

514

int ntfs_mbstoucs(const char *ins, ntfschar **outs, int outs_len)

515

{

516

ntfschar *ucs;

517

const char *s;

518

wchar_t wc;

519

int i, o, cnt, ins_len, ucs_len, ins_size;

520

#ifdef HAVE_MBSINIT

521

mbstate_t mbstate;

522

#endif

523

524

if (!ins || !outs) {

525

errno = EINVAL;

526

return -1;

527

}

528

ucs = *outs;

529

ucs_len = outs_len;

530

if (ucs && !ucs_len) {

531

errno = ENAMETOOLONG;

532

return -1;

533

}

534

/* Determine the size of the multi-byte string in bytes. */

535

ins_size = strlen(ins);

536

/* Determine the length of the multi-byte string. */

537

s = ins;

538

#if defined(HAVE_MBSINIT)

539

memset(&mbstate, 0, sizeof(mbstate));

540

ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate);

541

#ifdef __CYGWIN32__

542

if (!ins_len && *ins) {

543

/* Older Cygwin had broken mbsrtowcs() implementation. */

544

ins_len = strlen(ins);

545

}

546

#endif

547

#elif !defined(DJGPP)

548

ins_len = mbstowcs(NULL, s, 0);

549

#else

550

/* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */

551

ins_len = strlen(ins);

552

#endif

553

if (ins_len == -1)

554

return ins_len;

555

#ifdef HAVE_MBSINIT

556

if ((s != ins) || !mbsinit(&mbstate)) {

557

#else

558

if (s != ins) {

559

#endif

560

errno = EILSEQ;

561

return -1;

562

}

563

/* Add the NULL terminator. */

564

ins_len++;

565

if (!ucs) {

566

ucs_len = ins_len;

567

ucs = ntfs_malloc(ucs_len * sizeof(ntfschar));

568

if (!ucs)

569

return -1;

570

}

571

#ifdef HAVE_MBSINIT

572

memset(&mbstate, 0, sizeof(mbstate));

573

#else

574

mbtowc(NULL, NULL, 0);

575

#endif

576

for (i = o = cnt = 0; i < ins_size; i += cnt, o++) {

577

/* Reallocate memory if necessary or abort. */

578

if (o >= ucs_len) {

579

ntfschar *tc;

580

if (ucs == *outs) {

581

errno = ENAMETOOLONG;

582

return -1;

583

}

584

585

* We will never get here but hey, it's only a bit of

586

* extra code...

587

588

ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63;

589

tc = (ntfschar*)realloc(ucs, ucs_len);

590

if (!tc)

591

goto err_out;

592

ucs = tc;

593

ucs_len /= sizeof(ntfschar);

594

}

595

/* Convert the multibyte character to a wide character. */

596

#ifdef HAVE_MBSINIT

597

cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate);

598

#else

599

cnt = mbtowc(&wc, ins + i, ins_size - i);

600

#endif

601

if (!cnt)

602

break;

603

if (cnt == -1)

604

goto err_out;

605

if (cnt < -1) {

606

ntfs_log_trace("Eeek. cnt = %i\n", cnt);

607

errno = EINVAL;

608

goto err_out;

609

}

610

/* Make sure we are not overflowing the NTFS Unicode set. */

611

if ((unsigned long)wc >= (unsigned long)(1 <<

612

(8 * sizeof(ntfschar)))) {

613

errno = EILSEQ;

614

goto err_out;

615

}

616

/* Convert the CPU wide character to a LE Unicode character. */

617

ucs[o] = cpu_to_le16(wc);

618

}

619

#ifdef HAVE_MBSINIT

620

/* Make sure we are back in the initial state. */

621

if (!mbsinit(&mbstate)) {

622

ntfs_log_trace("Eeek. mbstate not in initial state!\n");

623

errno = EILSEQ;

624

goto err_out;

625

}

626

#endif

627

/* Now write the NULL character. */

628

ucs[o] = cpu_to_le16(L'\0');

629

if (*outs != ucs)

630

*outs = ucs;

631

return o;

632

err_out:

633

if (ucs != *outs) {

634

int eo = errno;

635

free(ucs);

636

errno = eo;

637

}

638

return -1;

639

}

640

641

/**

642

* ntfs_upcase_table_build - build the default upcase table for NTFS

643

* @uc: destination buffer where to store the built table

644

* @uc_len: size of destination buffer in bytes

645

646

* ntfs_upcase_table_build() builds the default upcase table for NTFS and

647

* stores it in the caller supplied buffer @uc of size @uc_len.

648

649

* Note, @uc_len must be at least 128kiB in size or bad things will happen!

650

651

void ntfs_upcase_table_build(ntfschar *uc, u32 uc_len)

652

{

653

static int uc_run_table[][3] = { /* Start, End, Add */

654

{0x0061, 0x007B, -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72, 74},

655

{0x00E0, 0x00F7, -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76, 86},

656

{0x00F8, 0x00FF, -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},

657

{0x0256, 0x0258, -205}, {0x1F00, 0x1F08, 8}, {0x1F78, 0x1F7A, 128},

658

{0x028A, 0x028C, -217}, {0x1F10, 0x1F16, 8}, {0x1F7A, 0x1F7C, 112},

659

{0x03AC, 0x03AD, -38}, {0x1F20, 0x1F28, 8}, {0x1F7C, 0x1F7E, 126},

660

{0x03AD, 0x03B0, -37}, {0x1F30, 0x1F38, 8}, {0x1FB0, 0x1FB2, 8},

661

{0x03B1, 0x03C2, -32}, {0x1F40, 0x1F46, 8}, {0x1FD0, 0x1FD2, 8},

662

{0x03C2, 0x03C3, -31}, {0x1F51, 0x1F52, 8}, {0x1FE0, 0x1FE2, 8},

663

{0x03C3, 0x03CC, -32}, {0x1F53, 0x1F54, 8}, {0x1FE5, 0x1FE6, 7},

664

{0x03CC, 0x03CD, -64}, {0x1F55, 0x1F56, 8}, {0x2170, 0x2180, -16},

665

{0x03CD, 0x03CF, -63}, {0x1F57, 0x1F58, 8}, {0x24D0, 0x24EA, -26},

666

{0x0430, 0x0450, -32}, {0x1F60, 0x1F68, 8}, {0xFF41, 0xFF5B, -32},

667

{0}

668

};

669

static int uc_dup_table[][2] = { /* Start, End */

670

{0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},

671

{0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},

672

{0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},

673

{0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},

674

{0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},

675

{0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},

676

{0}

677

};

678

static int uc_byte_table[][2] = { /* Offset, Value */

679

{0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},

680

{0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},

681

{0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},

682

{0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},

683

{0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},

684

{0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},

685

{0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},

686

{0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},

687

{0}

688

};

689

int i, r;

690

691

memset((char*)uc, 0, uc_len);

692

uc_len >>= 1;

693

if (uc_len > 65536)

694

uc_len = 65536;

695

for (i = 0; (u32)i < uc_len; i++)

696

uc[i] = i;

697

for (r = 0; uc_run_table[r][0]; r++)

698

for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)

699

uc[i] += uc_run_table[r][2];

700

for (r = 0; uc_dup_table[r][0]; r++)

701

for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)

702

uc[i + 1]--;

703

for (r = 0; uc_byte_table[r][0]; r++)

704

uc[uc_byte_table[r][0]] = uc_byte_table[r][1];

705

}

706

707

/**

708

* ntfs_str2ucs - convert a string to a valid NTFS file name

709

* @s: input string

710

* @len: length of output buffer in Unicode characters

711

712

* Convert the input @s string into the corresponding little endian,

713

* 2-byte Unicode string. The length of the converted string is less

714

* or equal to the maximum length allowed by the NTFS format (255).

715

716

* If @s is NULL then return AT_UNNAMED.

717

718

* On success the function returns the Unicode string in an allocated

719

* buffer and the caller is responsible to free it when it's not needed

720

* anymore.

721

722

* On error NULL is returned and errno is set to the error code.

723

724

ntfschar *ntfs_str2ucs(const char *s, int *len)

725

{

726

ntfschar *ucs = NULL;

727

728

if (s && ((*len = ntfs_mbstoucs(s, &ucs, 0)) == -1)) {

729

ntfs_log_perror("Couldn't convert '%s' to Unicode", s);

730

return NULL;

731

}

732

if (*len > NTFS_MAX_NAME_LEN) {

733

free(ucs);

734

errno = ENAMETOOLONG;

735

return NULL;

736

}

737

if (!ucs || !*len) {

738

ucs = AT_UNNAMED;

739

*len = 0;

740

}

741

return ucs;

742

}

743

744

/**

745

* ntfs_ucsfree - free memory allocated by ntfs_str2ucs()

746

* @ucs input string to be freed

747

748

* Free memory at @ucs and which was allocated by ntfs_str2ucs.

749

750

* Return value: none.

751

752

void ntfs_ucsfree(ntfschar *ucs)

753

{

754

if (ucs && (ucs != AT_UNNAMED))

755

free(ucs);

756

}

757

Older »