Paste: lots of supporting words
Author: | randy7 |
Mode: | factor |
Date: | Fri, 6 Mar 2009 22:13:34 |
Plain Text |
USING: prettyprint strings kernel arrays math sequences lil-utils
namespaces assocs generalizations fry ;
IN: seperate-text
SYMBOL: vik
: >vik ( n -- )
vik get swap suffix vik set ;
: (next-index) ( sub seq n -- sub seq i/f )
[ 2dup ] dip start* ;
: (all-indices) ( sub seq n/f -- vik )
dup f = not [
(next-index) dup >vik
dup [ 1+ ] when (all-indices)
]
[ 3drop vik get ] if ; recursive
: all-indices ( seq chr -- array )
V{ } clone vik set
swap 0 (all-indices) sift >array ;
: find-all-firsts ( string chr -- seq )
all-indices ;
: find-all-seconds ( string chr -- seq )
[ nip length ] 2keep all-indices
[ + ] with map ;
: 1bi1@ ( obj p q quot -- objp objq )
[ curry ] curry bi@ bi ; inline
: 1bi2* ( obj x y x-quot y-quot -- objx objy )
[ [ curry ] curry ] bi@ bi* bi ; inline
: (match-min) ( seq item -- min ) [ > ] curry filter dup empty? [ drop f ] [ first ] if ;
: close-match ( seq1 seq2 -- seq3 )
[ swap [ (match-min) ] keep swap 2array ] curry map ;
: complement ( seq -- seq' )
{ f f } suffix
[ [ second ] map 0 prefix ] [ [ first ] map ] bi zip
[ { 0 0 } = not ] filter ;
: fix-complements ( pairs string -- pairs' )
length '[ [ first2 dup f = [ drop _ ] when 2array ] map ] call ;
: extract-indices ( str indices -- extracts )
[ first2 rot subseq ] with map ;
: (find-all-indices) ( string from-chr to-chr -- seq1 seq2 )
[ find-all-firsts ] [ find-all-seconds ] 1bi2* ;
: find-all ( string from-chr to-chr -- indices-pairs-seq )
(find-all-indices) close-match ;
: (findings) ( str from-chr to-chr -- str indices )
pick [ find-all ] dip swap ;
: extract-complements ( str from-chr to-chr -- complements )
(findings) complement over fix-complements extract-indices ;
: extract-all ( str from-chr to-chr -- actuals )
(findings) extract-indices ;
: extract-one ( str from-chr to-chr -- array )
(findings) dup empty? [ nip ] [ first 1array extract-indices ] if ;
: extract-both ( str from-chr to-chr -- actuals complements )
(findings) [ ] [ complement ] bi
[ over ] dip swap fix-complements
[ extract-indices ] 1bi1@ ;
: strip-tags ( str -- str )
"<" ">" extract-complements " " join ;
: (all-clean?) ( pair-seq pair -- ? )
any-overlap? not ;
: find-start-end-index ( string find -- seq )
dup (find-all-indices) zip ;
SYMBOL: accum
: >accum ( item -- ) accum set ;
: new-accum ( -- ) V{ } clone >accum ;
: accum> ( -- item ) accum get ;
: (remove-overlaps) ( rest -- )
dup empty? [ drop ] [
[ rest-slice ] [ 1 head first ] bi
accum> swap 2dup any-overlap?
[ 2drop ] [ suffix >accum ] if
(remove-overlaps)
] if ;
: remove-overlaps ( pairs-seq -- pairs-seq' )
new-accum (remove-overlaps) accum> ;
: make-slices ( str pair-seq -- slices-seq )
swap [ [ first2 ] dip <slice> ] curry map ;
: customized-slices ( str pair-seq -- slices-seq )
swap [ [ first2 2dup ] dip <slice> >string 3array ] curry map ;
New Annotation