~snowball-yiddish-dev/snowball-yiddish/trunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
routines (
           mark_regions
           main_suffix
           consonant_pair
           other_suffix
           undouble
)

externals ( stem )

strings ( ch )

integers ( p1 )

groupings ( v s_ending )

stringescapes {}

/* special characters (in ISO Latin I) */

stringdef ae   hex 'E6'
stringdef ao   hex 'E5'
stringdef o/   hex 'F8'

define v 'aeiouy{ae}{ao}{o/}'

define s_ending  'abcdfghjklmnoprtvyz{ao}'

define mark_regions as (

    $p1 = limit

    goto v gopast non-v  setmark p1
    try ( $p1 < 3  $p1 = 3 )
)

backwardmode (

    define main_suffix as (
        setlimit tomark p1 for ([substring])
        among(

            'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
            'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
            'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
            'erets' 'et' 'eret'
                (delete)
            's'
                (s_ending delete)
        )
    )

    define consonant_pair as (
        test (
            setlimit tomark p1 for ([substring])
            among(
                'gd' // significant in the call from other_suffix
                'dt' 'gt' 'kt'
            )
        )
        next] delete
    )

    define other_suffix as (
        do ( ['st'] 'ig' delete )
        setlimit tomark p1 for ([substring])
        among(
            'ig' 'lig' 'elig' 'els'
                (delete do consonant_pair)
            'l{o/}st'
                (<-'l{o/}s')
        )
    )
    define undouble as (
        setlimit tomark p1 for ([non-v] ->ch)
        ch
        delete
    )
)

define stem as (

    do mark_regions
    backwards (
        do main_suffix
        do consonant_pair
        do other_suffix
        do undouble
    )
)