! Copyright (C) 2008 Marc Fauconneau. ! See http://factorcode.org/license.txt for BSD license. USING: kernel arrays math accessors sequences math.vectors math.order sorting binary-search sets assocs fry ; IN: suffix-array ! this suffix array is a sorted array of { word startpos } suffixes ! query is efficient through binary searches : load ( loc -- seq ) first2 [ name>> ] dip tail-slice ; : suffix<=> ( loc loc -- <=> ) [ load ] bi@ <=> ; : suffixes ( word -- suffixes-seq ) dup name>> length [ 2array ] with map ; : new-sa ( words -- sa ) [ suffixes ] map concat [ suffix<=> ] sort ; : prefix<=> ( seq begin -- <=> ) [ swap <=> ] [ head? ] 2bi [ drop +eq+ ] [ ] if ; : find-one-index ( sa begin -- index ) '[ load _ prefix<=> ] search drop ; : expand-slice ( index sa begin -- slice ) '[ load _ head? not ] [ find-last-from ] [ find-from ] 3bi [ drop ] 2bi@ rot ; : query-sa ( begin sa -- matches ) swap dupd [ find-one-index ] 2keep expand-slice [ first ] map prune ; ! usage example : ! clear "test" all-words 10 head new-sa query-sa .