~ubuntu-branches/ubuntu/trusty/postgresql-9.3/trusty-proposed

« back to all changes in this revision

Viewing changes to src/backend/tsearch/spell.c

  • Committer: Package Import Robot
  • Author(s): Martin Pitt
  • Date: 2016-03-31 11:04:53 UTC
  • mfrom: (1.1.11) (18.1.4 trusty-security)
  • Revision ID: package-import@ubuntu.com-20160331110453-h6xfs9f11suj3mze
Tags: 9.3.12-0ubuntu0.14.04
* New upstream bug fix release. (LP: #1564268)
  - See http://www.postgresql.org/about/news/1656/ for details.

Show diffs side-by-side

added added

removed removed

Lines of Context:
169
169
        return NULL;
170
170
}
171
171
 
 
172
static char *
 
173
findchar2(char *str, int c1, int c2)
 
174
{
 
175
        while (*str)
 
176
        {
 
177
                if (t_iseq(str, c1) || t_iseq(str, c2))
 
178
                        return str;
 
179
                str += pg_mblen(str);
 
180
        }
 
181
 
 
182
        return NULL;
 
183
}
 
184
 
172
185
 
173
186
/* backward string compare for suffix tree operations */
174
187
static int
457
470
        Conf->naffixes++;
458
471
}
459
472
 
 
473
 
 
474
/* Parsing states for parse_affentry() and friends */
460
475
#define PAE_WAIT_MASK   0
461
 
#define PAE_INMASK      1
 
476
#define PAE_INMASK              1
462
477
#define PAE_WAIT_FIND   2
463
 
#define PAE_INFIND      3
 
478
#define PAE_INFIND              3
464
479
#define PAE_WAIT_REPL   4
465
 
#define PAE_INREPL      5
466
 
 
 
480
#define PAE_INREPL              5
 
481
#define PAE_WAIT_TYPE   6
 
482
#define PAE_WAIT_FLAG   7
 
483
 
 
484
/*
 
485
 * Parse next space-separated field of an .affix file line.
 
486
 *
 
487
 * *str is the input pointer (will be advanced past field)
 
488
 * next is where to copy the field value to, with null termination
 
489
 *
 
490
 * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
 
491
 *
 
492
 * Returns TRUE if we found a field, FALSE if not.
 
493
 */
 
494
static bool
 
495
get_nextfield(char **str, char *next)
 
496
{
 
497
        int                     state = PAE_WAIT_MASK;
 
498
        int                     avail = BUFSIZ;
 
499
 
 
500
        while (**str)
 
501
        {
 
502
                if (state == PAE_WAIT_MASK)
 
503
                {
 
504
                        if (t_iseq(*str, '#'))
 
505
                                return false;
 
506
                        else if (!t_isspace(*str))
 
507
                        {
 
508
                                int                     clen = pg_mblen(*str);
 
509
 
 
510
                                if (clen < avail)
 
511
                                {
 
512
                                        COPYCHAR(next, *str);
 
513
                                        next += clen;
 
514
                                        avail -= clen;
 
515
                                }
 
516
                                state = PAE_INMASK;
 
517
                        }
 
518
                }
 
519
                else    /* state == PAE_INMASK */
 
520
                {
 
521
                        if (t_isspace(*str))
 
522
                        {
 
523
                                *next = '\0';
 
524
                                return true;
 
525
                        }
 
526
                        else
 
527
                        {
 
528
                                int                     clen = pg_mblen(*str);
 
529
 
 
530
                                if (clen < avail)
 
531
                                {
 
532
                                        COPYCHAR(next, *str);
 
533
                                        next += clen;
 
534
                                        avail -= clen;
 
535
                                }
 
536
                        }
 
537
                }
 
538
                *str += pg_mblen(*str);
 
539
        }
 
540
 
 
541
        *next = '\0';
 
542
 
 
543
        return (state == PAE_INMASK);           /* OK if we got a nonempty field */
 
544
}
 
545
 
 
546
/*
 
547
 * Parses entry of an .affix file of MySpell or Hunspell format.
 
548
 *
 
549
 * An .affix file entry has the following format:
 
550
 * - header
 
551
 *       <type>  <flag>  <cross_flag>  <flag_count>
 
552
 * - fields after header:
 
553
 *       <type>  <flag>  <find>  <replace>      <mask>
 
554
 *
 
555
 * str is the input line
 
556
 * field values are returned to type etc, which must be buffers of size BUFSIZ.
 
557
 *
 
558
 * Returns number of fields found; any omitted fields are set to empty strings.
 
559
 */
 
560
static int
 
561
parse_ooaffentry(char *str, char *type, char *flag, char *find,
 
562
                                 char *repl, char *mask)
 
563
{
 
564
        int                     state = PAE_WAIT_TYPE;
 
565
        int                     fields_read = 0;
 
566
        bool            valid = false;
 
567
 
 
568
        *type = *flag = *find = *repl = *mask = '\0';
 
569
 
 
570
        while (*str)
 
571
        {
 
572
                switch (state)
 
573
                {
 
574
                        case PAE_WAIT_TYPE:
 
575
                                valid = get_nextfield(&str, type);
 
576
                                state = PAE_WAIT_FLAG;
 
577
                                break;
 
578
                        case PAE_WAIT_FLAG:
 
579
                                valid = get_nextfield(&str, flag);
 
580
                                state = PAE_WAIT_FIND;
 
581
                                break;
 
582
                        case PAE_WAIT_FIND:
 
583
                                valid = get_nextfield(&str, find);
 
584
                                state = PAE_WAIT_REPL;
 
585
                                break;
 
586
                        case PAE_WAIT_REPL:
 
587
                                valid = get_nextfield(&str, repl);
 
588
                                state = PAE_WAIT_MASK;
 
589
                                break;
 
590
                        case PAE_WAIT_MASK:
 
591
                                valid = get_nextfield(&str, mask);
 
592
                                state = -1;             /* force loop exit */
 
593
                                break;
 
594
                        default:
 
595
                                elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
 
596
                                         state);
 
597
                                break;
 
598
                }
 
599
                if (valid)
 
600
                        fields_read++;
 
601
                else
 
602
                        break;                          /* early EOL */
 
603
                if (state < 0)
 
604
                        break;                          /* got all fields */
 
605
        }
 
606
 
 
607
        return fields_read;
 
608
}
 
609
 
 
610
/*
 
611
 * Parses entry of an .affix file of Ispell format
 
612
 *
 
613
 * An .affix file entry has the following format:
 
614
 * <mask>  >  [-<find>,]<replace>
 
615
 */
467
616
static bool
468
617
parse_affentry(char *str, char *mask, char *find, char *repl)
469
618
{
618
767
        int                     flag = 0;
619
768
        char            flagflags = 0;
620
769
        tsearch_readline_state trst;
621
 
        int                     scanread = 0;
622
 
        char            scanbuf[BUFSIZ];
623
770
        char       *recoded;
624
771
 
625
772
        /* read file to find any flag */
682
829
        }
683
830
        tsearch_readline_end(&trst);
684
831
 
685
 
        sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5);
686
 
 
687
832
        if (!tsearch_readline_begin(&trst, filename))
688
833
                ereport(ERROR,
689
834
                                (errcode(ERRCODE_CONFIG_FILE_ERROR),
692
837
 
693
838
        while ((recoded = tsearch_readline(&trst)) != NULL)
694
839
        {
 
840
                int                     fields_read;
 
841
 
695
842
                if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
696
843
                        goto nextline;
697
844
 
698
 
                scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask);
 
845
                fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
699
846
 
700
847
                if (ptype)
701
848
                        pfree(ptype);
702
849
                ptype = lowerstr_ctx(Conf, type);
703
 
                if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
 
850
                if (fields_read < 4 ||
 
851
                        (STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
704
852
                        goto nextline;
705
853
 
706
 
                if (scanread == 4)
 
854
                if (fields_read == 4)
707
855
                {
708
856
                        if (strlen(sflag) != 1)
709
857
                                goto nextline;
721
869
 
722
870
                        if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
723
871
                                goto nextline;
 
872
                        /* Get flags after '/' (flags are case sensitive) */
 
873
                        if ((ptr = strchr(repl, '/')) != NULL)
 
874
                        {
 
875
                                ptr++;
 
876
                                while (*ptr)
 
877
                                {
 
878
                                        aflg |= Conf->flagval[*(unsigned char *) ptr];
 
879
                                        ptr++;
 
880
                                }
 
881
                        }
 
882
                        /* Get lowercased version of string before '/' */
724
883
                        prepl = lowerstr_ctx(Conf, repl);
725
 
                        /* affix flag */
726
884
                        if ((ptr = strchr(prepl, '/')) != NULL)
727
 
                        {
728
885
                                *ptr = '\0';
729
 
                                ptr = repl + (ptr - prepl) + 1;
730
 
                                while (*ptr)
731
 
                                {
732
 
                                        aflg |= Conf->flagval[*(unsigned char *) ptr];
733
 
                                        ptr++;
734
 
                                }
735
 
                        }
736
886
                        pfind = lowerstr_ctx(Conf, find);
737
887
                        pmask = lowerstr_ctx(Conf, mask);
738
888
                        if (t_iseq(find, '0'))
800
950
 
801
951
                if (STRNCMP(pstr, "compoundwords") == 0)
802
952
                {
803
 
                        s = findchar(pstr, 'l');
 
953
                        /* Find case-insensitive L flag in non-lowercased string */
 
954
                        s = findchar2(recoded, 'l', 'L');
804
955
                        if (s)
805
956
                        {
806
 
                                s = recoded + (s - pstr);               /* we need non-lowercased
807
 
                                                                                                 * string */
808
957
                                while (*s && !t_isspace(s))
809
958
                                        s += pg_mblen(s);
810
959
                                while (*s && t_isspace(s))