~alinuxninja/nginx-edge/trunk

/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */

292

#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)

293

294

/**

295

* Given the lead character, how many bytes are taken by this code point.

296

* ICU does not deal with code points >0x10ffff

297

* unless necessary for advancing in the byte stream.

298

299

* These length macros take into account that for values >0x10ffff

300

* the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff

301

* with 3 bytes.

302

* Code point comparisons need to be in uint32_t because UChar32

303

* may be a signed type, and negative values must be recognized.

304

305

* @deprecated ICU 2.4. Use U8_LENGTH instead, see utf_old.h.

306

307

#if 1

308

# define UTF8_CHAR_LENGTH(c) \

309

((uint32_t)(c)<=0x7f ? 1 : \

310

((uint32_t)(c)<=0x7ff ? 2 : \

311

((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \

312

) \

313

)

314

#else

315

# define UTF8_CHAR_LENGTH(c) \

316

((uint32_t)(c)<=0x7f ? 1 : \

317

((uint32_t)(c)<=0x7ff ? 2 : \

318

((uint32_t)(c)<=0xffff ? 3 : \

319

((uint32_t)(c)<=0x10ffff ? 4 : \

320

((uint32_t)(c)<=0x3ffffff ? 5 : \

321

((uint32_t)(c)<=0x7fffffff ? 6 : 3) \

322

) \

323

) \

324

) \

325

) \

326

)

327

#endif

328

329

/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */

330

#define UTF8_MAX_CHAR_LENGTH 4

331

332

/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */

333

#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)

334

335

/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */

336

#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \

337

int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \

338

UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \

339

UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \

340

}

341

342

/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */

343

#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \

344

int32_t _utf8_get_char_safe_index=(int32_t)(i); \

345

UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \

346

UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \

347

}

348

349

/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */

350

#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \

351

(c)=(s)[(i)++]; \

352

if((uint8_t)((c)-0xc0)<0x35) { \

353

uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \

354

UTF8_MASK_LEAD_BYTE(c, __count); \

355

switch(__count) { \

356

/* each following branch falls through to the next one */ \

357

case 3: \

358

(c)=((c)<<6)|((s)[(i)++]&0x3f); \

359

case 2: \

360

(c)=((c)<<6)|((s)[(i)++]&0x3f); \

361

case 1: \

362

(c)=((c)<<6)|((s)[(i)++]&0x3f); \

363

/* no other branches to optimize switch() */ \

364

break; \

365

} \

366

} \

367

}

368

369

/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */

370

#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \

371

if((uint32_t)(c)<=0x7f) { \

372

(s)[(i)++]=(uint8_t)(c); \

373

} else { \

374

if((uint32_t)(c)<=0x7ff) { \

375

(s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \

376

} else { \

377

if((uint32_t)(c)<=0xffff) { \

378

(s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \

379

} else { \

380

(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \

381

(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \

382

} \

383

(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \

384

} \

385

(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \

386

} \

387

}

388

389

/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */

390

#define UTF8_FWD_1_UNSAFE(s, i) { \

391

(i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \

392

}

393

394

/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */

395

#define UTF8_FWD_N_UNSAFE(s, i, n) { \

396

int32_t __N=(n); \

397

while(__N>0) { \

398

UTF8_FWD_1_UNSAFE(s, i); \

399

--__N; \

400

} \

401

}

402

403

/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */

404

#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \

405

while(UTF8_IS_TRAIL((s)[i])) { --(i); } \

406

}

407

408

/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */

409

#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \

410

(c)=(s)[(i)++]; \

411

if((c)>=0x80) { \

412

if(UTF8_IS_LEAD(c)) { \

413

(c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \

414

} else { \

415

(c)=UTF8_ERROR_VALUE_1; \

416

} \

417

} \

418

}

419

420

/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */

421

#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \

422

if((uint32_t)(c)<=0x7f) { \

423

(s)[(i)++]=(uint8_t)(c); \

424

} else { \

425

(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \

426

} \

427

}

428

429

/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */

430

#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)

431

432

/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */

433

#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)

434

435

/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */

436

#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)

437

438

/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */

439

#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \

440

(c)=(s)[--(i)]; \

441

if(UTF8_IS_TRAIL(c)) { \

442

uint8_t __b, __count=1, __shift=6; \

443

444

/* c is a trail byte */ \

445

(c)&=0x3f; \

446

for(;;) { \

447

__b=(s)[--(i)]; \

448

if(__b>=0xc0) { \

449

UTF8_MASK_LEAD_BYTE(__b, __count); \

450

(c)|=(UChar32)__b<<__shift; \

451

break; \

452

} else { \

453

(c)|=(UChar32)(__b&0x3f)<<__shift; \

454

++__count; \

455

__shift+=6; \

456

} \

457

} \

458

} \

459

}

460

461

/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */

462

#define UTF8_BACK_1_UNSAFE(s, i) { \

463

while(UTF8_IS_TRAIL((s)[--(i)])) {} \

464

}

465

466

/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */

467

#define UTF8_BACK_N_UNSAFE(s, i, n) { \

468

int32_t __N=(n); \

469

while(__N>0) { \

470

UTF8_BACK_1_UNSAFE(s, i); \

471

--__N; \

472

} \

473

}

474

475

/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */

476

#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \

477

UTF8_BACK_1_UNSAFE(s, i); \

478

UTF8_FWD_1_UNSAFE(s, i); \

479

}

480

481

/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */

482

#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \

483

(c)=(s)[--(i)]; \

484

if((c)>=0x80) { \

485

if((c)<=0xbf) { \

486

(c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \

487

} else { \

488

(c)=UTF8_ERROR_VALUE_1; \

489

} \

490

} \

491

}

492

493

/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */

494

#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)

495

496

/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */

497

#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)

498

499

/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */

500

#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)

501

502

/* Formerly utf16.h --------------------------------------------------------- */

503

504

/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */

505

#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)

506

507

/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */

508

#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)

509

510

/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */

511

#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)

512

513

/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */

514

#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)

515

516

/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */

517

#define UTF16_GET_PAIR_VALUE(first, second) \

518

(((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)

519

520

/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */

521

#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)

522

523

/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */

524

#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)

525

526

/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */

527

#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)

528

529

/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */

530

#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)

531

532

/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */

533

#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)

534

535

/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */

536

#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)

537

538

/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */

539

#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)

540

541

/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */

542

#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)

543

544

/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */

545

#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)

546

547

/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */

548

#define UTF16_MAX_CHAR_LENGTH 2

549

550

/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */

551

#define UTF16_ARRAY_SIZE(size) (size)

552

553

/**

554

* Get a single code point from an offset that points to any

555

* of the code units that belong to that code point.

556

* Assume 0<=i<length.

557

558

* This could be used for iteration together with

559

* UTF16_CHAR_LENGTH() and UTF_IS_ERROR(),

560

* but the use of UTF16_NEXT_CHAR[_UNSAFE]() and

561

* UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that.

562

* @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.

563

564

#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \

565

(c)=(s)[i]; \

566

if(UTF_IS_SURROGATE(c)) { \

567

if(UTF_IS_SURROGATE_FIRST(c)) { \

568

(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \

569

} else { \

570

(c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \

571

} \

572

} \

573

}

574

575

/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */

576

#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \

577

(c)=(s)[i]; \

578

if(UTF_IS_SURROGATE(c)) { \

579

uint16_t __c2; \

580

if(UTF_IS_SURROGATE_FIRST(c)) { \

581

if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \

582

(c)=UTF16_GET_PAIR_VALUE((c), __c2); \

583

/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \

584

} else if(strict) {\

585

/* unmatched first surrogate */ \

586

(c)=UTF_ERROR_VALUE; \

587

} \

588

} else { \

589

if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \

590

(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \

591

/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \

592

} else if(strict) {\

593

/* unmatched second surrogate */ \

594

(c)=UTF_ERROR_VALUE; \

595

} \

596

} \

597

} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \

598

(c)=UTF_ERROR_VALUE; \

599

} \

600

}

601

602

/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */

603

#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \

604

(c)=(s)[(i)++]; \

605

if(UTF_IS_FIRST_SURROGATE(c)) { \

606

(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \

607

} \

608

}

609

610

/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */

611

#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \

612

if((uint32_t)(c)<=0xffff) { \

613

(s)[(i)++]=(uint16_t)(c); \

614

} else { \

615

(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \

616

(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \

617

} \

618

}

619

620

/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */

621

#define UTF16_FWD_1_UNSAFE(s, i) { \

622

if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \

623

++(i); \

624

} \

625

}

626

627

/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */

628

#define UTF16_FWD_N_UNSAFE(s, i, n) { \

629

int32_t __N=(n); \

630

while(__N>0) { \

631

UTF16_FWD_1_UNSAFE(s, i); \

632

--__N; \

633

} \

634

}

635

636

/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */

637

#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \

638

if(UTF_IS_SECOND_SURROGATE((s)[i])) { \

639

--(i); \

640

} \

641

}

642

643

/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */

644

#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \

645

(c)=(s)[(i)++]; \

646

if(UTF_IS_FIRST_SURROGATE(c)) { \

647

uint16_t __c2; \

648

if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \

649

++(i); \

650

(c)=UTF16_GET_PAIR_VALUE((c), __c2); \

651

/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \

652

} else if(strict) {\

653

/* unmatched first surrogate */ \

654

(c)=UTF_ERROR_VALUE; \

655

} \

656

} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \

657

/* unmatched second surrogate or other non-character */ \

658

(c)=UTF_ERROR_VALUE; \

659

} \

660

}

661

662

/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */

663

#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \

664

if((uint32_t)(c)<=0xffff) { \

665

(s)[(i)++]=(uint16_t)(c); \

666

} else if((uint32_t)(c)<=0x10ffff) { \

667

if((i)+1<(length)) { \

668

(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \

669

(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \

670

} else /* not enough space */ { \

671

(s)[(i)++]=UTF_ERROR_VALUE; \

672

} \

673

} else /* c>0x10ffff, write error value */ { \

674

(s)[(i)++]=UTF_ERROR_VALUE; \

675

} \

676

}

677

678

/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */

679

#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)

680

681

/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */

682

#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)

683

684

/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */

685

#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)

686

687

/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */

688

#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \

689

(c)=(s)[--(i)]; \

690

if(UTF_IS_SECOND_SURROGATE(c)) { \

691

(c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \

692

} \

693

}

694

695

/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */

696

#define UTF16_BACK_1_UNSAFE(s, i) { \

697

if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \

698

--(i); \

699

} \

700

}

701

702

/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */

703

#define UTF16_BACK_N_UNSAFE(s, i, n) { \

704

int32_t __N=(n); \

705

while(__N>0) { \

706

UTF16_BACK_1_UNSAFE(s, i); \

707

--__N; \

708

} \

709

}

710

711

/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */

712

#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \

713

if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \

714

++(i); \

715

} \

716

}

717

718

/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */

719

#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \

720

(c)=(s)[--(i)]; \

721

if(UTF_IS_SECOND_SURROGATE(c)) { \

722

uint16_t __c2; \

723

if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \

724

--(i); \

725

(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \

726

/* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \

727

} else if(strict) {\

728

/* unmatched second surrogate */ \

729

(c)=UTF_ERROR_VALUE; \

730

} \

731

} else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \

732

/* unmatched first surrogate or other non-character */ \

733

(c)=UTF_ERROR_VALUE; \

734

} \

735

}

736

737

/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */

738

#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)

739

740

/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */

741

#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)

742

743

/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */

744

#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)

745

746

/* Formerly utf32.h --------------------------------------------------------- */

747

748

749

* Old documentation:

750

751

* This file defines macros to deal with UTF-32 code units and code points.

752

* Signatures and semantics are the same as for the similarly named macros

753

* in utf16.h.

754

* utf32.h is included by utf.h after unicode/umachine.h

755

* and some common definitions.

756

* Usage: ICU coding guidelines for if() statements should be followed when using these macros.

757

* Compound statements (curly braces {}) must be used for if-else-while...

758

* bodies and all macro statements should be terminated with semicolon.

759

760

761

/* internal definitions ----------------------------------------------------- */

762

763

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

764

#define UTF32_IS_SAFE(c, strict) \

765

(!(strict) ? \

766

(uint32_t)(c)<=0x10ffff : \

767

UTF_IS_UNICODE_CHAR(c))

768

769

770

* For the semantics of all of these macros, see utf16.h.

771

* The UTF-32 versions are trivial because any code point is

772

* encoded using exactly one code unit.

773

774

775

/* single-code point definitions -------------------------------------------- */

776

777

/* classes of code unit values */

778

779

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

780

#define UTF32_IS_SINGLE(uchar) 1

781

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

782

#define UTF32_IS_LEAD(uchar) 0

783

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

784

#define UTF32_IS_TRAIL(uchar) 0

785

786

/* number of code units per code point */

787

788

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

789

#define UTF32_NEED_MULTIPLE_UCHAR(c) 0

790

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

791

#define UTF32_CHAR_LENGTH(c) 1

792

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

793

#define UTF32_MAX_CHAR_LENGTH 1

794

795

/* average number of code units compared to UTF-16 */

796

797

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

798

#define UTF32_ARRAY_SIZE(size) (size)

799

800

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

801

#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \

802

(c)=(s)[i]; \

803

}

804

805

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

806

#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \

807

(c)=(s)[i]; \

808

if(!UTF32_IS_SAFE(c, strict)) { \

809

(c)=UTF_ERROR_VALUE; \

810

} \

811

}

812

813

/* definitions with forward iteration --------------------------------------- */

814

815

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

816

#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \

817

(c)=(s)[(i)++]; \

818

}

819

820

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

821

#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \

822

(s)[(i)++]=(c); \

823

}

824

825

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

826

#define UTF32_FWD_1_UNSAFE(s, i) { \

827

++(i); \

828

}

829

830

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

831

#define UTF32_FWD_N_UNSAFE(s, i, n) { \

832

(i)+=(n); \

833

}

834

835

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

836

#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \

837

}

838

839

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

840

#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \

841

(c)=(s)[(i)++]; \

842

if(!UTF32_IS_SAFE(c, strict)) { \

843

(c)=UTF_ERROR_VALUE; \

844

} \

845

}

846

847

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

848

#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \

849

if((uint32_t)(c)<=0x10ffff) { \

850

(s)[(i)++]=(c); \

851

} else /* c>0x10ffff, write 0xfffd */ { \

852

(s)[(i)++]=0xfffd; \

853

} \

854

}

855

856

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

857

#define UTF32_FWD_1_SAFE(s, i, length) { \

858

++(i); \

859

}

860

861

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

862

#define UTF32_FWD_N_SAFE(s, i, length, n) { \

863

if(((i)+=(n))>(length)) { \

864

(i)=(length); \

865

} \

866

}

867

868

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

869

#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \

870

}

871

872

/* definitions with backward iteration -------------------------------------- */

873

874

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

875

#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \

876

(c)=(s)[--(i)]; \

877

}

878

879

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

880

#define UTF32_BACK_1_UNSAFE(s, i) { \

881

--(i); \

882

}

883

884

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

885

#define UTF32_BACK_N_UNSAFE(s, i, n) { \

886

(i)-=(n); \

887

}

888

889

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

890

#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \

891

}

892

893

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

894

#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \

895

(c)=(s)[--(i)]; \

896

if(!UTF32_IS_SAFE(c, strict)) { \

897

(c)=UTF_ERROR_VALUE; \

898

} \

899

}

900

901

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

902

#define UTF32_BACK_1_SAFE(s, start, i) { \

903

--(i); \

904

}

905

906

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

907

#define UTF32_BACK_N_SAFE(s, start, i, n) { \

908

(i)-=(n); \

909

if((i)<(start)) { \

910

(i)=(start); \

911

} \

912

}

913

914

/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */

915

#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \

916

}

917

918

/* Formerly utf.h, part 2 --------------------------------------------------- */

919

920

/**

921

* Estimate the number of code units for a string based on the number of UTF-16 code units.

922

923

* @deprecated ICU 2.4. Obsolete, see utf_old.h.

924

925

#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)

926

927

/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */

928

#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)

929

930

/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */

931

#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)

932

933

934

/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */

935

#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)

936

937

/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */

938

#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)

939

940

941

/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */

942

#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)

943

944

/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */

945

#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)

946

947

948

/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */

949

#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)

950

951

/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */

952

#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)

953

954

955

/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */

956

#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)

957

958

/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */

959

#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)

960

961

962

/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */

963

#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)

964

965

/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */

966

#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)

967

968

969

/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */

970

#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)

971

972

/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */

973

#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)

974

975

976

/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */

977

#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)

978

979

/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */

980

#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)

981

982

983

/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */

984

#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)

985

986

/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */

987

#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)

988

989

990

/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */

991

#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)

992

993

/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */

994

#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)

995

996

/* Define default macros (UTF-16 "safe") ------------------------------------ */

997

998

/**

999

* Does this code unit alone encode a code point (BMP, not a surrogate)?

1000

* Same as UTF16_IS_SINGLE.

1001

* @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h.

1002

1003

#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)

1004

1005

/**

1006

* Is this code unit the first one of several (a lead surrogate)?

1007

* Same as UTF16_IS_LEAD.

1008

* @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.

1009

1010

#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)

1011

1012

/**

1013

* Is this code unit one of several but not the first one (a trail surrogate)?

1014

* Same as UTF16_IS_TRAIL.

1015

* @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.

1016

1017

#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)

1018

1019

/**

1020

* Does this code point require multiple code units (is it a supplementary code point)?

1021

* Same as UTF16_NEED_MULTIPLE_UCHAR.

1022

* @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead.

1023

1024

#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)

1025

1026

/**

1027

* How many code units are used to encode this code point (1 or 2)?

1028

* Same as UTF16_CHAR_LENGTH.

1029

* @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.

1030

1031

#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)

1032

1033

/**

1034

* How many code units are used at most for any Unicode code point (2)?

1035

* Same as UTF16_MAX_CHAR_LENGTH.

1036

* @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.

1037

1038

#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH

1039

1040

/**

1041

* Set c to the code point that contains the code unit i.

1042

* i could point to the lead or the trail surrogate for the code point.

1043

* i is not modified.

1044

* Same as UTF16_GET_CHAR.

1045

* \pre 0<=i<length

1046

1047

* @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h.

1048

1049

#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)

1050

1051

/**

1052

* Set c to the code point that starts at code unit i

1053

* and advance i to beyond the code units of this code point (post-increment).

1054

* i must point to the first code unit of a code point.

1055

* Otherwise c is set to the trail unit (surrogate) itself.

1056

* Same as UTF16_NEXT_CHAR.

1057

* \pre 0<=i<length

1058

* \post 0<i<=length

1059

1060

* @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h.

1061

1062

#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)

1063

1064

/**

1065

* Append the code units of code point c to the string at index i

1066

* and advance i to beyond the new code units (post-increment).

1067

* The code units beginning at index i will be overwritten.

1068

* Same as UTF16_APPEND_CHAR.

1069

* \pre 0<=c<=0x10ffff

1070

* \pre 0<=i<length

1071

* \post 0<i<=length

1072

1073

* @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h.

1074

1075

#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)

1076

1077

/**

1078

* Advance i to beyond the code units of the code point that begins at i.

1079

* I.e., advance i by one code point.

1080

* Same as UTF16_FWD_1.

1081

* \pre 0<=i<length

1082

* \post 0<i<=length

1083

1084

* @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.

1085

1086

#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)

1087

1088

/**

1089

* Advance i to beyond the code units of the n code points where the first one begins at i.

1090

* I.e., advance i by n code points.

1091

* Same as UT16_FWD_N.

1092

* \pre 0<=i<length

1093

* \post 0<i<=length

1094

1095

* @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.

1096

1097

#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)

1098

1099

/**

1100

* Take the random-access index i and adjust it so that it points to the beginning

1101

* of a code point.

1102

* The input index points to any code unit of a code point and is moved to point to

1103

* the first code unit of the same code point. i is never incremented.

1104

* In other words, if i points to a trail surrogate that is preceded by a matching

1105

* lead surrogate, then i is decremented. Otherwise it is not modified.

1106

* This can be used to start an iteration with UTF_NEXT_CHAR() from a random index.

1107

* Same as UTF16_SET_CHAR_START.

1108

* \pre start<=i<length

1109

* \post start<=i<length

1110

1111

* @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.

1112

1113

#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)

1114

1115

/**

1116

* Set c to the code point that has code units before i

1117

* and move i backward (towards the beginning of the string)

1118

* to the first code unit of this code point (pre-increment).

1119

* i must point to the first code unit after the last unit of a code point (i==length is allowed).

1120

* Same as UTF16_PREV_CHAR.

1121

* \pre start<i<=length

1122

* \post start<=i<length

1123

1124

* @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h.

1125

1126

#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)

1127

1128

/**

1129

* Move i backward (towards the beginning of the string)

1130

* to the first code unit of the code point that has code units before i.

1131

* I.e., move i backward by one code point.

1132

* i must point to the first code unit after the last unit of a code point (i==length is allowed).

1133

* Same as UTF16_BACK_1.

1134

* \pre start<i<=length

1135

* \post start<=i<length

1136

1137

* @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.

1138

1139

#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)

1140

1141

/**

1142

* Move i backward (towards the beginning of the string)

1143

* to the first code unit of the n code points that have code units before i.

1144

* I.e., move i backward by n code points.

1145

* i must point to the first code unit after the last unit of a code point (i==length is allowed).

1146

* Same as UTF16_BACK_N.

1147

* \pre start<i<=length

1148

* \post start<=i<length

1149

1150

* @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.

1151

1152

#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)

1153

1154

/**

1155

* Take the random-access index i and adjust it so that it points beyond

1156

* a code point. The input index points beyond any code unit

1157

* of a code point and is moved to point beyond the last code unit of the same

1158

* code point. i is never decremented.

1159

* In other words, if i points to a trail surrogate that is preceded by a matching

1160

* lead surrogate, then i is incremented. Otherwise it is not modified.

1161

* This can be used to start an iteration with UTF_PREV_CHAR() from a random index.

1162

* Same as UTF16_SET_CHAR_LIMIT.

1163

* \pre start<i<=length

1164

* \post start<i<=length

1165

1166

* @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.

1167

1168

#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)

1169

1170

#endif /* U_HIDE_DEPRECATED_API */

1171

1172

#endif

1173

Older »