+USING: combinators kernel math parser sequences splitting ;
IN: porter-stemmer
-USING: kernel math parser sequences combinators splitting ;
: consonant? ( i str -- ? )
2dup nth dup "aeiou" member? [
] [
CHAR: y = [
over zero?
- [ 2drop t ] [ [ 1- ] dip consonant? not ] if
+ [ 2drop t ] [ [ 1 - ] dip consonant? not ] if
] [
2drop t
] if
: skip-vowels ( i str -- i str )
2dup bounds-check? [
- 2dup consonant? [ [ 1+ ] dip skip-vowels ] unless
+ 2dup consonant? [ [ 1 + ] dip skip-vowels ] unless
] when ;
: skip-consonants ( i str -- i str )
2dup bounds-check? [
- 2dup consonant? [ [ 1+ ] dip skip-consonants ] when
+ 2dup consonant? [ [ 1 + ] dip skip-consonants ] when
] when ;
: (consonant-seq) ( n i str -- n )
skip-vowels
2dup bounds-check? [
- [ 1+ ] [ 1+ ] [ ] tri* skip-consonants [ 1+ ] dip
+ [ 1 + ] [ 1 + ] [ ] tri* skip-consonants [ 1 + ] dip
(consonant-seq)
] [
2drop
over 1 < [
2drop f
] [
- 2dup nth [ over 1- over nth ] dip = [
+ 2dup nth [ over 1 - over nth ] dip = [
consonant?
] [
2drop f
: consonant-end? ( n seq -- ? )
[ length swap - ] keep consonant? ;
-: last-is? ( str possibilities -- ? ) [ peek ] dip member? ;
+: last-is? ( str possibilities -- ? ) [ last ] dip member? ;
: cvc? ( str -- ? )
{
pick consonant-seq 0 > [ nip ] [ drop ] if append ;
: step1a ( str -- newstr )
- dup peek CHAR: s = [
+ dup last CHAR: s = [
{
{ [ "sses" ?tail ] [ "ss" append ] }
{ [ "ies" ?tail ] [ "i" append ] }
{ [ "bl" ?tail ] [ "ble" append ] }
{ [ "iz" ?tail ] [ "ize" append ] }
{
- [ dup length 1- over double-consonant? ]
+ [ dup length 1 - over double-consonant? ]
[ dup "lsz" last-is? [ but-last-slice ] unless ]
}
{
[ 1 = [ but-last-slice cvc? not ] [ drop f ] if ] if ;
: remove-e ( str -- newstr )
- dup peek CHAR: e = [
+ dup last CHAR: e = [
dup remove-e? [ but-last-slice ] when
] when ;
: ll->l ( str -- newstr )
{
- { [ dup peek CHAR: l = not ] [ ] }
- { [ dup length 1- over double-consonant? not ] [ ] }
+ { [ dup last CHAR: l = not ] [ ] }
+ { [ dup length 1 - over double-consonant? not ] [ ] }
{ [ dup consonant-seq 1 > ] [ but-last-slice ] }
[ ]
} cond ;