1
package org.apache.lucene.analysis.br;
4
* Licensed to the Apache Software Foundation (ASF) under one or more
5
* contributor license agreements. See the NOTICE file distributed with
6
* this work for additional information regarding copyright ownership.
7
* The ASF licenses this file to You under the Apache License, Version 2.0
8
* (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing, software
14
* distributed under the License is distributed on an "AS IS" BASIS,
15
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
* See the License for the specific language governing permissions and
17
* limitations under the License.
21
* A stemmer for Brazilian Portuguese words.
23
public class BrazilianStemmer {
35
public BrazilianStemmer() {
39
* Stems the given term to an unique <tt>discriminator</tt>.
41
* @param term The term that should be stemmed.
42
* @return Discriminator for <tt>term</tt>
44
protected String stem( String term ) {
45
boolean altered = false ; // altered the term
50
if ( !isIndexable( CT ) ) {
53
if ( !isStemmable( CT ) ) {
60
TERM = term + ";" +CT ;
79
* Checks a term if it can be processed correctly.
81
* @return true if, and only if, the given term consists in letters.
83
private boolean isStemmable( String term ) {
84
for ( int c = 0; c < term.length(); c++ ) {
85
// Discard terms that contain non-letter characters.
86
if ( !Character.isLetter(term.charAt(c))) {
94
* Checks a term if it can be processed indexed.
96
* @return true if it can be indexed
98
private boolean isIndexable( String term ) {
99
return (term.length() < 30) && (term.length() > 2) ;
103
* See if string is 'a','e','i','o','u'
105
* @return true if is vowel
107
private boolean isVowel( char value ) {
108
return (value == 'a') ||
118
* R1 - is the region after the first non-vowel following a vowel,
119
* or is the null region at the end of the word if there is
122
* @return null or a string representing R1
124
private String getR1( String value ) {
134
i = value.length()-1 ;
135
for (j=0 ; j < i ; j++) {
136
if (isVowel(value.charAt(j))) {
145
// find 1st non-vowel
146
for ( ; j < i ; j++) {
147
if (!(isVowel(value.charAt(j)))) {
156
return value.substring(j+1) ;
162
* RV - IF the second letter is a consonant, RV is the region after
163
* the next following vowel,
165
* OR if the first two letters are vowels, RV is the region
166
* after the next consonant,
168
* AND otherwise (consonant-vowel case) RV is the region after
171
* BUT RV is the end of the word if this positions cannot be
174
* @return null or a string representing RV
176
private String getRV( String value ) {
185
i = value.length()-1 ;
187
// RV - IF the second letter is a consonant, RV is the region after
188
// the next following vowel,
189
if ((i > 0) && !isVowel(value.charAt(1))) {
191
for (j=2 ; j < i ; j++) {
192
if (isVowel(value.charAt(j))) {
198
return value.substring(j+1) ;
203
// RV - OR if the first two letters are vowels, RV is the region
204
// after the next consonant,
206
isVowel(value.charAt(0)) &&
207
isVowel(value.charAt(1))) {
209
for (j=2 ; j < i ; j++) {
210
if (!isVowel(value.charAt(j))) {
216
return value.substring(j+1) ;
220
// RV - AND otherwise (consonant-vowel case) RV is the region after
223
return value.substring(3) ;
230
* 1) Turn to lowercase
235
* @return null or a string transformed
237
private String changeTerm( String value ) {
246
value = value.toLowerCase() ;
247
for (j=0 ; j < value.length() ; j++) {
248
if ((value.charAt(j) == 'á') ||
249
(value.charAt(j) == 'â') ||
250
(value.charAt(j) == 'ã')) {
251
r= r + "a" ; continue ;
253
if ((value.charAt(j) == 'é') ||
254
(value.charAt(j) == 'ê')) {
255
r= r + "e" ; continue ;
257
if (value.charAt(j) == 'í') {
258
r= r + "i" ; continue ;
260
if ((value.charAt(j) == 'ó') ||
261
(value.charAt(j) == 'ô') ||
262
(value.charAt(j) == 'õ')) {
263
r= r + "o" ; continue ;
265
if ((value.charAt(j) == 'ú') ||
266
(value.charAt(j) == 'ü')) {
267
r= r + "u" ; continue ;
269
if (value.charAt(j) == 'ç') {
270
r= r + "c" ; continue ;
272
if (value.charAt(j) == 'ñ') {
273
r= r + "n" ; continue ;
276
r= r+ value.charAt(j) ;
283
* Check if a string ends with a suffix
285
* @return true if the string ends with the specified suffix
287
private boolean suffix( String value, String suffix ) {
290
if ((value == null) || (suffix == null)) {
294
if (suffix.length() > value.length()) {
298
return value.substring(value.length()-suffix.length()).equals(suffix);
302
* Replace a string suffix by another
304
* @return the replaced String
306
private String replaceSuffix( String value, String toReplace, String changeTo ) {
310
if ((value == null) ||
311
(toReplace == null) ||
312
(changeTo == null) ) {
316
vvalue = removeSuffix(value,toReplace) ;
318
if (value.equals(vvalue)) {
321
return vvalue + changeTo ;
326
* Remove a string suffix
328
* @return the String without the suffix
330
private String removeSuffix( String value, String toRemove ) {
332
if ((value == null) ||
333
(toRemove == null) ||
334
!suffix(value,toRemove) ) {
338
return value.substring(0,value.length()-toRemove.length()) ;
342
* See if a suffix is preceded by a String
344
* @return true if the suffix is preceded
346
private boolean suffixPreceded( String value, String suffix, String preceded ) {
348
if ((value == null) ||
350
(preceded == null) ||
351
!suffix(value,suffix) ) {
355
return suffix(removeSuffix(value,suffix),preceded) ;
359
* Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
361
private void createCT( String term ) {
362
CT = changeTerm(term) ;
364
if (CT.length() < 2) return ;
366
// if the first character is ... , remove it
367
if ((CT.charAt(0) == '"') ||
368
(CT.charAt(0) == '\'') ||
369
(CT.charAt(0) == '-') ||
370
(CT.charAt(0) == ',') ||
371
(CT.charAt(0) == ';') ||
372
(CT.charAt(0) == '.') ||
373
(CT.charAt(0) == '?') ||
374
(CT.charAt(0) == '!')
376
CT = CT.substring(1);
379
if (CT.length() < 2) return ;
381
// if the last character is ... , remove it
382
if ((CT.charAt(CT.length()-1) == '-') ||
383
(CT.charAt(CT.length()-1) == ',') ||
384
(CT.charAt(CT.length()-1) == ';') ||
385
(CT.charAt(CT.length()-1) == '.') ||
386
(CT.charAt(CT.length()-1) == '?') ||
387
(CT.charAt(CT.length()-1) == '!') ||
388
(CT.charAt(CT.length()-1) == '\'') ||
389
(CT.charAt(CT.length()-1) == '"')
391
CT = CT.substring(0,CT.length()-1);
397
* Standard suffix removal.
398
* Search for the longest among the following suffixes, and perform
399
* the following actions:
401
* @return false if no ending was removed
403
private boolean step1() {
404
if (CT == null) return false ;
407
if (suffix(CT,"uciones") && suffix(R2,"uciones")) {
408
CT = replaceSuffix(CT,"uciones","u") ; return true;
412
if (CT.length() >= 6) {
413
if (suffix(CT,"imentos") && suffix(R2,"imentos")) {
414
CT = removeSuffix(CT,"imentos") ; return true;
416
if (suffix(CT,"amentos") && suffix(R2,"amentos")) {
417
CT = removeSuffix(CT,"amentos") ; return true;
419
if (suffix(CT,"adores") && suffix(R2,"adores")) {
420
CT = removeSuffix(CT,"adores") ; return true;
422
if (suffix(CT,"adoras") && suffix(R2,"adoras")) {
423
CT = removeSuffix(CT,"adoras") ; return true;
425
if (suffix(CT,"logias") && suffix(R2,"logias")) {
426
replaceSuffix(CT,"logias","log") ; return true;
428
if (suffix(CT,"encias") && suffix(R2,"encias")) {
429
CT = replaceSuffix(CT,"encias","ente") ; return true;
431
if (suffix(CT,"amente") && suffix(R1,"amente")) {
432
CT = removeSuffix(CT,"amente") ; return true;
434
if (suffix(CT,"idades") && suffix(R2,"idades")) {
435
CT = removeSuffix(CT,"idades") ; return true;
440
if (CT.length() >= 5) {
441
if (suffix(CT,"acoes") && suffix(R2,"acoes")) {
442
CT = removeSuffix(CT,"acoes") ; return true;
444
if (suffix(CT,"imento") && suffix(R2,"imento")) {
445
CT = removeSuffix(CT,"imento") ; return true;
447
if (suffix(CT,"amento") && suffix(R2,"amento")) {
448
CT = removeSuffix(CT,"amento") ; return true;
450
if (suffix(CT,"adora") && suffix(R2,"adora")) {
451
CT = removeSuffix(CT,"adora") ; return true;
453
if (suffix(CT,"ismos") && suffix(R2,"ismos")) {
454
CT = removeSuffix(CT,"ismos") ; return true;
456
if (suffix(CT,"istas") && suffix(R2,"istas")) {
457
CT = removeSuffix(CT,"istas") ; return true;
459
if (suffix(CT,"logia") && suffix(R2,"logia")) {
460
CT = replaceSuffix(CT,"logia","log") ; return true;
462
if (suffix(CT,"ucion") && suffix(R2,"ucion")) {
463
CT = replaceSuffix(CT,"ucion","u") ; return true;
465
if (suffix(CT,"encia") && suffix(R2,"encia")) {
466
CT = replaceSuffix(CT,"encia","ente") ; return true;
468
if (suffix(CT,"mente") && suffix(R2,"mente")) {
469
CT = removeSuffix(CT,"mente") ; return true;
471
if (suffix(CT,"idade") && suffix(R2,"idade")) {
472
CT = removeSuffix(CT,"idade") ; return true;
477
if (CT.length() >= 4) {
478
if (suffix(CT,"acao") && suffix(R2,"acao")) {
479
CT = removeSuffix(CT,"acao") ; return true;
481
if (suffix(CT,"ezas") && suffix(R2,"ezas")) {
482
CT = removeSuffix(CT,"ezas") ; return true;
484
if (suffix(CT,"icos") && suffix(R2,"icos")) {
485
CT = removeSuffix(CT,"icos") ; return true ;
487
if (suffix(CT,"icas") && suffix(R2,"icas")) {
488
CT = removeSuffix(CT,"icas") ; return true ;
490
if (suffix(CT,"ismo") && suffix(R2,"ismo")) {
491
CT = removeSuffix(CT,"ismo") ; return true ;
493
if (suffix(CT,"avel") && suffix(R2,"avel")) {
494
CT = removeSuffix(CT,"avel") ; return true ;
496
if (suffix(CT,"ivel") && suffix(R2,"ivel")) {
497
CT = removeSuffix(CT,"ivel") ; return true ;
499
if (suffix(CT,"ista") && suffix(R2,"ista")) {
500
CT = removeSuffix(CT,"ista") ; return true ;
502
if (suffix(CT,"osos") && suffix(R2,"osos")) {
503
CT = removeSuffix(CT,"osos") ; return true ;
505
if (suffix(CT,"osas") && suffix(R2,"osas")) {
506
CT = removeSuffix(CT,"osas") ; return true ;
508
if (suffix(CT,"ador") && suffix(R2,"ador")) {
509
CT = removeSuffix(CT,"ador") ; return true ;
511
if (suffix(CT,"ivas") && suffix(R2,"ivas")) {
512
CT = removeSuffix(CT,"ivas") ; return true ;
514
if (suffix(CT,"ivos") && suffix(R2,"ivos")) {
515
CT = removeSuffix(CT,"ivos") ; return true ;
517
if (suffix(CT,"iras") &&
519
suffixPreceded(CT,"iras","e")) {
520
CT = replaceSuffix(CT,"iras","ir") ; return true ;
525
if (CT.length() >= 3) {
526
if (suffix(CT,"eza") && suffix(R2,"eza")) {
527
CT = removeSuffix(CT,"eza") ; return true ;
529
if (suffix(CT,"ico") && suffix(R2,"ico")) {
530
CT = removeSuffix(CT,"ico") ; return true ;
532
if (suffix(CT,"ica") && suffix(R2,"ica")) {
533
CT = removeSuffix(CT,"ica") ; return true ;
535
if (suffix(CT,"oso") && suffix(R2,"oso")) {
536
CT = removeSuffix(CT,"oso") ; return true ;
538
if (suffix(CT,"osa") && suffix(R2,"osa")) {
539
CT = removeSuffix(CT,"osa") ; return true ;
541
if (suffix(CT,"iva") && suffix(R2,"iva")) {
542
CT = removeSuffix(CT,"iva") ; return true ;
544
if (suffix(CT,"ivo") && suffix(R2,"ivo")) {
545
CT = removeSuffix(CT,"ivo") ; return true ;
547
if (suffix(CT,"ira") &&
549
suffixPreceded(CT,"ira","e")) {
550
CT = replaceSuffix(CT,"ira","ir") ; return true ;
554
// no ending was removed by step1
562
* Search for the longest among the following suffixes in RV,
563
* and if found, delete.
565
* @return false if no ending was removed
567
private boolean step2() {
568
if (RV == null) return false ;
571
if (RV.length() >= 7) {
572
if (suffix(RV,"issemos")) {
573
CT = removeSuffix(CT,"issemos") ; return true;
575
if (suffix(RV,"essemos")) {
576
CT = removeSuffix(CT,"essemos") ; return true;
578
if (suffix(RV,"assemos")) {
579
CT = removeSuffix(CT,"assemos") ; return true;
581
if (suffix(RV,"ariamos")) {
582
CT = removeSuffix(CT,"ariamos") ; return true;
584
if (suffix(RV,"eriamos")) {
585
CT = removeSuffix(CT,"eriamos") ; return true;
587
if (suffix(RV,"iriamos")) {
588
CT = removeSuffix(CT,"iriamos") ; return true;
593
if (RV.length() >= 6) {
594
if (suffix(RV,"iremos")) {
595
CT = removeSuffix(CT,"iremos") ; return true;
597
if (suffix(RV,"eremos")) {
598
CT = removeSuffix(CT,"eremos") ; return true;
600
if (suffix(RV,"aremos")) {
601
CT = removeSuffix(CT,"aremos") ; return true;
603
if (suffix(RV,"avamos")) {
604
CT = removeSuffix(CT,"avamos") ; return true;
606
if (suffix(RV,"iramos")) {
607
CT = removeSuffix(CT,"iramos") ; return true;
609
if (suffix(RV,"eramos")) {
610
CT = removeSuffix(CT,"eramos") ; return true;
612
if (suffix(RV,"aramos")) {
613
CT = removeSuffix(CT,"aramos") ; return true;
615
if (suffix(RV,"asseis")) {
616
CT = removeSuffix(CT,"asseis") ; return true;
618
if (suffix(RV,"esseis")) {
619
CT = removeSuffix(CT,"esseis") ; return true;
621
if (suffix(RV,"isseis")) {
622
CT = removeSuffix(CT,"isseis") ; return true;
624
if (suffix(RV,"arieis")) {
625
CT = removeSuffix(CT,"arieis") ; return true;
627
if (suffix(RV,"erieis")) {
628
CT = removeSuffix(CT,"erieis") ; return true;
630
if (suffix(RV,"irieis")) {
631
CT = removeSuffix(CT,"irieis") ; return true;
637
if (RV.length() >= 5) {
638
if (suffix(RV,"irmos")) {
639
CT = removeSuffix(CT,"irmos") ; return true;
641
if (suffix(RV,"iamos")) {
642
CT = removeSuffix(CT,"iamos") ; return true;
644
if (suffix(RV,"armos")) {
645
CT = removeSuffix(CT,"armos") ; return true;
647
if (suffix(RV,"ermos")) {
648
CT = removeSuffix(CT,"ermos") ; return true;
650
if (suffix(RV,"areis")) {
651
CT = removeSuffix(CT,"areis") ; return true;
653
if (suffix(RV,"ereis")) {
654
CT = removeSuffix(CT,"ereis") ; return true;
656
if (suffix(RV,"ireis")) {
657
CT = removeSuffix(CT,"ireis") ; return true;
659
if (suffix(RV,"asses")) {
660
CT = removeSuffix(CT,"asses") ; return true;
662
if (suffix(RV,"esses")) {
663
CT = removeSuffix(CT,"esses") ; return true;
665
if (suffix(RV,"isses")) {
666
CT = removeSuffix(CT,"isses") ; return true;
668
if (suffix(RV,"astes")) {
669
CT = removeSuffix(CT,"astes") ; return true;
671
if (suffix(RV,"assem")) {
672
CT = removeSuffix(CT,"assem") ; return true;
674
if (suffix(RV,"essem")) {
675
CT = removeSuffix(CT,"essem") ; return true;
677
if (suffix(RV,"issem")) {
678
CT = removeSuffix(CT,"issem") ; return true;
680
if (suffix(RV,"ardes")) {
681
CT = removeSuffix(CT,"ardes") ; return true;
683
if (suffix(RV,"erdes")) {
684
CT = removeSuffix(CT,"erdes") ; return true;
686
if (suffix(RV,"irdes")) {
687
CT = removeSuffix(CT,"irdes") ; return true;
689
if (suffix(RV,"ariam")) {
690
CT = removeSuffix(CT,"ariam") ; return true;
692
if (suffix(RV,"eriam")) {
693
CT = removeSuffix(CT,"eriam") ; return true;
695
if (suffix(RV,"iriam")) {
696
CT = removeSuffix(CT,"iriam") ; return true;
698
if (suffix(RV,"arias")) {
699
CT = removeSuffix(CT,"arias") ; return true;
701
if (suffix(RV,"erias")) {
702
CT = removeSuffix(CT,"erias") ; return true;
704
if (suffix(RV,"irias")) {
705
CT = removeSuffix(CT,"irias") ; return true;
707
if (suffix(RV,"estes")) {
708
CT = removeSuffix(CT,"estes") ; return true;
710
if (suffix(RV,"istes")) {
711
CT = removeSuffix(CT,"istes") ; return true;
713
if (suffix(RV,"areis")) {
714
CT = removeSuffix(CT,"areis") ; return true;
716
if (suffix(RV,"aveis")) {
717
CT = removeSuffix(CT,"aveis") ; return true;
722
if (RV.length() >= 4) {
723
if (suffix(RV,"aria")) {
724
CT = removeSuffix(CT,"aria") ; return true;
726
if (suffix(RV,"eria")) {
727
CT = removeSuffix(CT,"eria") ; return true;
729
if (suffix(RV,"iria")) {
730
CT = removeSuffix(CT,"iria") ; return true;
732
if (suffix(RV,"asse")) {
733
CT = removeSuffix(CT,"asse") ; return true;
735
if (suffix(RV,"esse")) {
736
CT = removeSuffix(CT,"esse") ; return true;
738
if (suffix(RV,"isse")) {
739
CT = removeSuffix(CT,"isse") ; return true;
741
if (suffix(RV,"aste")) {
742
CT = removeSuffix(CT,"aste") ; return true;
744
if (suffix(RV,"este")) {
745
CT = removeSuffix(CT,"este") ; return true;
747
if (suffix(RV,"iste")) {
748
CT = removeSuffix(CT,"iste") ; return true;
750
if (suffix(RV,"arei")) {
751
CT = removeSuffix(CT,"arei") ; return true;
753
if (suffix(RV,"erei")) {
754
CT = removeSuffix(CT,"erei") ; return true;
756
if (suffix(RV,"irei")) {
757
CT = removeSuffix(CT,"irei") ; return true;
759
if (suffix(RV,"aram")) {
760
CT = removeSuffix(CT,"aram") ; return true;
762
if (suffix(RV,"eram")) {
763
CT = removeSuffix(CT,"eram") ; return true;
765
if (suffix(RV,"iram")) {
766
CT = removeSuffix(CT,"iram") ; return true;
768
if (suffix(RV,"avam")) {
769
CT = removeSuffix(CT,"avam") ; return true;
771
if (suffix(RV,"arem")) {
772
CT = removeSuffix(CT,"arem") ; return true;
774
if (suffix(RV,"erem")) {
775
CT = removeSuffix(CT,"erem") ; return true;
777
if (suffix(RV,"irem")) {
778
CT = removeSuffix(CT,"irem") ; return true;
780
if (suffix(RV,"ando")) {
781
CT = removeSuffix(CT,"ando") ; return true;
783
if (suffix(RV,"endo")) {
784
CT = removeSuffix(CT,"endo") ; return true;
786
if (suffix(RV,"indo")) {
787
CT = removeSuffix(CT,"indo") ; return true;
789
if (suffix(RV,"arao")) {
790
CT = removeSuffix(CT,"arao") ; return true;
792
if (suffix(RV,"erao")) {
793
CT = removeSuffix(CT,"erao") ; return true;
795
if (suffix(RV,"irao")) {
796
CT = removeSuffix(CT,"irao") ; return true;
798
if (suffix(RV,"adas")) {
799
CT = removeSuffix(CT,"adas") ; return true;
801
if (suffix(RV,"idas")) {
802
CT = removeSuffix(CT,"idas") ; return true;
804
if (suffix(RV,"aras")) {
805
CT = removeSuffix(CT,"aras") ; return true;
807
if (suffix(RV,"eras")) {
808
CT = removeSuffix(CT,"eras") ; return true;
810
if (suffix(RV,"iras")) {
811
CT = removeSuffix(CT,"iras") ; return true;
813
if (suffix(RV,"avas")) {
814
CT = removeSuffix(CT,"avas") ; return true;
816
if (suffix(RV,"ares")) {
817
CT = removeSuffix(CT,"ares") ; return true;
819
if (suffix(RV,"eres")) {
820
CT = removeSuffix(CT,"eres") ; return true;
822
if (suffix(RV,"ires")) {
823
CT = removeSuffix(CT,"ires") ; return true;
825
if (suffix(RV,"ados")) {
826
CT = removeSuffix(CT,"ados") ; return true;
828
if (suffix(RV,"idos")) {
829
CT = removeSuffix(CT,"idos") ; return true;
831
if (suffix(RV,"amos")) {
832
CT = removeSuffix(CT,"amos") ; return true;
834
if (suffix(RV,"emos")) {
835
CT = removeSuffix(CT,"emos") ; return true;
837
if (suffix(RV,"imos")) {
838
CT = removeSuffix(CT,"imos") ; return true;
840
if (suffix(RV,"iras")) {
841
CT = removeSuffix(CT,"iras") ; return true;
843
if (suffix(RV,"ieis")) {
844
CT = removeSuffix(CT,"ieis") ; return true;
849
if (RV.length() >= 3) {
850
if (suffix(RV,"ada")) {
851
CT = removeSuffix(CT,"ada") ; return true;
853
if (suffix(RV,"ida")) {
854
CT = removeSuffix(CT,"ida") ; return true;
856
if (suffix(RV,"ara")) {
857
CT = removeSuffix(CT,"ara") ; return true;
859
if (suffix(RV,"era")) {
860
CT = removeSuffix(CT,"era") ; return true;
862
if (suffix(RV,"ira")) {
863
CT = removeSuffix(CT,"ava") ; return true;
865
if (suffix(RV,"iam")) {
866
CT = removeSuffix(CT,"iam") ; return true;
868
if (suffix(RV,"ado")) {
869
CT = removeSuffix(CT,"ado") ; return true;
871
if (suffix(RV,"ido")) {
872
CT = removeSuffix(CT,"ido") ; return true;
874
if (suffix(RV,"ias")) {
875
CT = removeSuffix(CT,"ias") ; return true;
877
if (suffix(RV,"ais")) {
878
CT = removeSuffix(CT,"ais") ; return true;
880
if (suffix(RV,"eis")) {
881
CT = removeSuffix(CT,"eis") ; return true;
883
if (suffix(RV,"ira")) {
884
CT = removeSuffix(CT,"ira") ; return true;
886
if (suffix(RV,"ear")) {
887
CT = removeSuffix(CT,"ear") ; return true;
892
if (RV.length() >= 2) {
893
if (suffix(RV,"ia")) {
894
CT = removeSuffix(CT,"ia") ; return true;
896
if (suffix(RV,"ei")) {
897
CT = removeSuffix(CT,"ei") ; return true;
899
if (suffix(RV,"am")) {
900
CT = removeSuffix(CT,"am") ; return true;
902
if (suffix(RV,"em")) {
903
CT = removeSuffix(CT,"em") ; return true;
905
if (suffix(RV,"ar")) {
906
CT = removeSuffix(CT,"ar") ; return true;
908
if (suffix(RV,"er")) {
909
CT = removeSuffix(CT,"er") ; return true;
911
if (suffix(RV,"ir")) {
912
CT = removeSuffix(CT,"ir") ; return true;
914
if (suffix(RV,"as")) {
915
CT = removeSuffix(CT,"as") ; return true;
917
if (suffix(RV,"es")) {
918
CT = removeSuffix(CT,"es") ; return true;
920
if (suffix(RV,"is")) {
921
CT = removeSuffix(CT,"is") ; return true;
923
if (suffix(RV,"eu")) {
924
CT = removeSuffix(CT,"eu") ; return true;
926
if (suffix(RV,"iu")) {
927
CT = removeSuffix(CT,"iu") ; return true;
929
if (suffix(RV,"iu")) {
930
CT = removeSuffix(CT,"iu") ; return true;
932
if (suffix(RV,"ou")) {
933
CT = removeSuffix(CT,"ou") ; return true;
937
// no ending was removed by step2
942
* Delete suffix 'i' if in RV and preceded by 'c'
945
private void step3() {
946
if (RV == null) return ;
948
if (suffix(RV,"i") && suffixPreceded(RV,"i","c")) {
949
CT = removeSuffix(CT,"i") ;
957
* If the word ends with one of the suffixes (os a i o á í ó)
961
private void step4() {
962
if (RV == null) return ;
964
if (suffix(RV,"os")) {
965
CT = removeSuffix(CT,"os") ; return ;
967
if (suffix(RV,"a")) {
968
CT = removeSuffix(CT,"a") ; return ;
970
if (suffix(RV,"i")) {
971
CT = removeSuffix(CT,"i") ; return ;
973
if (suffix(RV,"o")) {
974
CT = removeSuffix(CT,"o") ; return ;
980
* If the word ends with one of ( e é ê) in RV,delete it,
981
* and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
982
* delete the 'u' (or 'i')
984
* Or if the word ends ç remove the cedilha
987
private void step5() {
988
if (RV == null) return ;
990
if (suffix(RV,"e")) {
991
if (suffixPreceded(RV,"e","gu")) {
992
CT = removeSuffix(CT,"e") ;
993
CT = removeSuffix(CT,"u") ;
997
if (suffixPreceded(RV,"e","ci")) {
998
CT = removeSuffix(CT,"e") ;
999
CT = removeSuffix(CT,"i") ;
1003
CT = removeSuffix(CT,"e") ; return ;
1008
* For log and debug purpose
1010
* @return TERM, CT, RV, R1 and R2
1012
public String log() {
1013
return " (TERM = " + TERM + ")" +
1014
" (CT = " + CT +")" +
1015
" (RV = " + RV +")" +
1016
" (R1 = " + R1 +")" +
1017
" (R2 = " + R2 +")" ;