USING: fry locals splitting unicode.categories fry grouping sets unicode.case sequences arrays math kernel strings namespaces math.order sorting ; IN: phrases : (number-of-branches) ( seq -- seq ) length >array [ 1+ ] map ; : just-alpha ( seq -- seq ) " " split [ [ alpha? ] filter ] map ; : sentence-possible-phrases ( sentence -- seq ) just-alpha harvest [ (number-of-branches) ] keep '[ _ swap clump [ " " join ] map ] map ; : old-possible-phrases ( sentence -- repeated ) sentence-possible-phrases [ duplicates ] map harvest [ prune >array ] map ; : acronyms ( -- seq ) ! should probably be in a seperate text file, that is read to a variable on main entry. { "Mr." "Ms." "Mrs." "Dr." "N.B.A" "F.B.I" } ; : acronym? ( word -- ? ) acronyms member? ; : final-puncts ( -- seq ) { "?" "!" "." } ; ! : ends-with-punct? ( word -- ? ) ! -- unused. ! peek 1string final-puncts member? ; : cut-at-first-char ( seq char -- head rest ) dupd '[ _ member? ] find 2dup and [ ! find outputs 2 items, one of them or more may be f drop 1+ cut ! 1+ will take the dot (or other sentence-ending) as well. [ blank? ] trim-left ] [ 2drop f f ] if ; : cut-at-first-chars ( seq str/char-seq -- head rest ) dup string? [ concat ] unless cut-at-first-char ; :: var-vector ( VAR -- ) SYMBOL: VAR V{ } clone VAR set ; : +>var ( item var -- ) [ get swap suffix ] [ set ] bi ; ! :: var> ( VAR -- var ) ! -- same as var get ! VAR get ; : 2f? ( input1 input2 -- ? ) 2dup and f = ; : (one-sentence) ( string -- str1/f str2/f ) final-puncts cut-at-first-chars ; : (add-head) ( str -- ) "sentences" +>var ; : (add-remaining) ( str -- ) (add-head) ; : (all-sentences) ( string -- ) dup empty? [ drop ] [ (one-sentence) 2f? [ 2drop (add-remaining) ] [ swap (add-head) ! head is added to a vector variable. (all-sentences) ! called again with rest ] if ] if ; recursive : all-sentences ( string -- sentences ) "sentences" var-vector V{ } "sentences" set (all-sentences) "sentences" get ; ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! : possible-phrases ( paragraph -- repeated ) all-sentences [ >lower sentence-possible-phrases ] map concat concat duplicates harvest prune >array [ [ length ] bi@ <=> ] sort reverse ; ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!