~ubuntu-branches/ubuntu/trusty/hyphen/trusty-proposed

« back to all changes in this revision

Viewing changes to hyphen.c

Committer: Package Import Robot
Author(s): Rene Engelhard
Date: 2013-04-19 12:44:40 UTC
mfrom: (3.1.8 sid)
Revision ID: package-import@ubuntu.com-20130419124440-fnrajxs2a224h61i

Tags: 2.8.6-2

upload to unstable

files modified:
ChangeLog

Makefile.in

NEWS

aclocal.m4

configure

configure.in

debian/changelog

doc/Makefile.in

example.c

hyphen.c

ltmain.sh

substrings.c

tests/Makefile.in

tests/rhmin.hyph

tests/rhmin.pat

tests/rhmin.word

tests/test.sh

Show diffs side-by-side

added added

removed removed

hyphen.c

422

}

423

dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);

424

} else {

425

strcpy(dict[k]->cset, dict[0]->cset);

425

strncpy(dict[k]->cset, dict[0]->cset, sizeof(dict[k]->cset)-1);

426

dict[k]->cset[sizeof(dict[k]->cset)-1] = '\0';

426

427

dict[k]->utf8 = dict[0]->utf8;

427

428

}

428

429

435

436

}

436

437

} else if (k == 1) {

437

438

/* default first level: hyphen and ASCII apostrophe */

438

if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab);

439

else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);

440

strcpy(buf, "1-1/=,1,1\n"); // buf rewritten by hnj_hyphen_load here

439

if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN ',-\n", dict[k], hashtab);

440

else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99,-\n", dict[k], hashtab);

441

strncpy(buf, "1-1\n", MAX_CHARS-1); // buf rewritten by hnj_hyphen_load here

442

buf[MAX_CHARS-1] = '\0';

441

443

hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */

442

444

hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */

443

445

if (dict[0]->utf8) {

543

545

const char *word, int word_size,

544

546

char *hyphens)

545

547

{

546

char prep_word_buf[MAX_WORD];

547

548

char *prep_word;

548

549

int i, j, k;

549

550

int state;

552

553

char *match;

553

554

int offset;

554

555

if (word_size + 3 < MAX_WORD)

556

prep_word = prep_word_buf;

557

else

558

prep_word = hnj_malloc (word_size + 3);

556

prep_word = hnj_malloc (word_size + 3);

559

557

560

558

j = 0;

561

559

prep_word[j++] = '.';

662

660

hyphens[i] = '0';

663

661

hyphens[word_size] = '\0';

664

662

665

if (prep_word != prep_word_buf)

666

hnj_free (prep_word);

663

hnj_free (prep_word);

667

664

668

665

return 0;

669

666

}

737

734

int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,

738

735

char *** rep, int ** pos, int ** cut, int rhmin)

739

736

{

740

int i = 1;

737

int i = 0;

741

738

int j;

742

739

743

740

// ignore numbers

744

741

for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;

745

742

746

for (j = word_size - 2; i < rhmin && j > 0; j--) {

743

for (j = word_size - 1; i < rhmin && j > 0; j--) {

747

744

// check length of the non-standard part

748

745

if (*rep && *pos && *cut && (*rep)[j]) {

749

746

char * rh = strchr((*rep)[j], '=');

756

753

} else {

757

754

hyphens[j] = '0';

758

755

}

759

if (!utf8 || (word[j] & 0xc0) != 0xc0) i++;

756

if (!utf8 || (word[j] & 0xc0) == 0xc0 || (word[j] & 0x80) != 0x80) i++;

760

757

}

761

758

return 0;

762

759

}

766

763

char * hyphens, char *** rep, int ** pos, int ** cut,

767

764

int clhmin, int crhmin, int lend, int rend)

768

765

{

769

char prep_word_buf[MAX_WORD];

770

766

char *prep_word;

771

767

int i, j, k;

772

768

int state;

777

773

signed char replindex;

778

774

signed char replcut;

779

775

int offset;

780

int matchlen_buf[MAX_CHARS];

781

int matchindex_buf[MAX_CHARS];

782

char * matchrepl_buf[MAX_CHARS];

783

776

int * matchlen;

784

777

int * matchindex;

785

778

char ** matchrepl;

786

779

int isrepl = 0;

787

780

int nHyphCount;

788

781

789

if (word_size + 3 < MAX_CHARS) {

790

prep_word = prep_word_buf;

791

matchlen = matchlen_buf;

792

matchindex = matchindex_buf;

793

matchrepl = matchrepl_buf;

794

} else {

795

prep_word = hnj_malloc (word_size + 3);

796

matchlen = hnj_malloc ((word_size + 3) * sizeof(int));

797

matchindex = hnj_malloc ((word_size + 3) * sizeof(int));

798

matchrepl = hnj_malloc ((word_size + 3) * sizeof(char *));

799

}

782

size_t prep_word_size = word_size + 3;

783

prep_word = hnj_malloc (prep_word_size);

784

matchlen = hnj_malloc ((word_size + 3) * sizeof(int));

785

matchindex = hnj_malloc ((word_size + 3) * sizeof(int));

786

matchrepl = hnj_malloc ((word_size + 3) * sizeof(char *));

800

787

801

788

j = 0;

802

789

prep_word[j++] = '.';

933

920

for (i = 0; i < word_size; i++) {

934

921

if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) {

935

922

if (rep && pos && cut) {

936

if (!*rep && !*pos && !*cut) {

937

int k;

938

*rep = (char **) malloc(sizeof(char *) * word_size);

939

*pos = (int *) malloc(sizeof(int) * word_size);

940

*cut = (int *) malloc(sizeof(int) * word_size);

941

for (k = 0; k < word_size; k++) {

942

(*rep)[k] = NULL;

943

(*pos)[k] = 0;

944

(*cut)[k] = 0;

945

}

923

if (!*rep)

924

*rep = (char **) calloc(word_size, sizeof(char *));

925

if (!*pos)

926

*pos = (int *) calloc(word_size, sizeof(int));

927

if (!*cut) {

928

*cut = (int *) calloc(word_size, sizeof(int));

946

929

}

947

930

(*rep)[matchindex[i] - 1] = hnj_strdup(matchrepl[matchindex[i]]);

948

931

(*pos)[matchindex[i] - 1] = matchindex[i] - i;

953

936

}

954

937

}

955

938

956

if (matchrepl != matchrepl_buf) {

957

hnj_free (matchrepl);

958

hnj_free (matchlen);

959

hnj_free (matchindex);

960

}

939

hnj_free (matchrepl);

940

hnj_free (matchlen);

941

hnj_free (matchindex);

961

942

962

943

// recursive hyphenation of the first (compound) level segments

963

944

if (dict->nextlevel) {

964

char * rep2_buf[MAX_WORD];

965

int pos2_buf[MAX_WORD];

966

int cut2_buf[MAX_WORD];

967

char hyphens2_buf[MAX_WORD];

968

945

char ** rep2;

969

946

int * pos2;

970

947

int * cut2;

971

948

char * hyphens2;

972

949

int begin = 0;

973

if (word_size < MAX_CHARS) {

974

rep2 = rep2_buf;

975

pos2 = pos2_buf;

976

cut2 = cut2_buf;

977

hyphens2 = hyphens2_buf;

978

} else {

979

rep2 = hnj_malloc (word_size * sizeof(char *));

980

pos2 = hnj_malloc (word_size * sizeof(int));

981

cut2 = hnj_malloc (word_size * sizeof(int));

982

hyphens2 = hnj_malloc (word_size);

983

}

950

951

rep2 = hnj_malloc (word_size * sizeof(char *));

952

pos2 = hnj_malloc (word_size * sizeof(int));

953

cut2 = hnj_malloc (word_size * sizeof(int));

954

hyphens2 = hnj_malloc (word_size + 3);

984

955

for (i = 0; i < word_size; i++) rep2[i] = NULL;

985

956

for (i = 0; i < word_size; i++) if

986

957

(hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {

988

959

int hyph = 0;

989

960

prep_word[i + 2] = '\0';

990

961

/* non-standard hyphenation at compound boundary (Schiffahrt) */

991

if (*rep && *pos && *cut && (*rep)[i]) {

962

if (rep && *rep && *pos && *cut && (*rep)[i]) {

992

963

char * l = strchr((*rep)[i], '=');

993

strcpy(prep_word + 2 + i - (*pos)[i], (*rep)[i]);

964

size_t offset = 2 + i - (*pos)[i];

965

strncpy(prep_word + offset, (*rep)[i], prep_word_size - offset - 1);

966

prep_word[prep_word_size - 1] = '\0';

994

967

if (l) {

995

968

hyph = (l - (*rep)[i]) - (*pos)[i];

996

969

prep_word[2 + i + hyph] = '\0';

1020

993

}

1021

994

prep_word[i + 2] = word[i + 1];

1022

995

if (*rep && *pos && *cut && (*rep)[i]) {

1023

strcpy(prep_word + 1, word);

996

size_t offset = 1;

997

strncpy(prep_word + offset, word, prep_word_size - offset - 1);

998

prep_word[prep_word_size - 1] = '\0';

1024

999

}

1025

1000

}

1026

1001

begin = i + 1;

1037

1012

rep, pos, cut, crhmin);

1038

1013

}

1039

1014

1040

if (rep2 != rep2_buf) {

1041

free(rep2);

1042

free(cut2);

1043

free(pos2);

1044

free(hyphens2);

1045

}

1015

free(rep2);

1016

free(cut2);

1017

free(pos2);

1018

free(hyphens2);

1046

1019

}

1047

1020

1048

if (prep_word != prep_word_buf) hnj_free (prep_word);

1021

hnj_free (prep_word);

1049

1022

return 0;

1050

1023

}

1051

1024

1095

1068

void hnj_hyphen_hyphword(const char * word, int l, const char * hyphens,

1096

1069

char * hyphword, char *** rep, int ** pos, int ** cut)

1097

1070

{

1071

int hyphenslen = l + 5;

1072

1098

1073

int i, j;

1099

1074

for (i = 0, j = 0; i < l; i++, j++) {

1100

1075

if (hyphens[i]&1) {

1101

1076

hyphword[j] = word[i];

1102

1077

if (*rep && *pos && *cut && (*rep)[i]) {

1103

strcpy(hyphword + j - (*pos)[i] + 1, (*rep)[i]);

1078

size_t offset = j - (*pos)[i] + 1;

1079

strncpy(hyphword + offset, (*rep)[i], hyphenslen - offset - 1);

1080

hyphword[hyphenslen-1] = '\0';

1104

1081

j += strlen((*rep)[i]) - (*pos)[i];

1105

1082

i += (*cut)[i] - (*pos)[i];

1106

1083

} else hyphword[++j] = '=';

Older »