USING: prettyprint strings kernel arrays math sequences lil-utils namespaces assocs generalizations fry ; IN: seperate-text SYMBOL: vik : >vik ( n -- ) vik get swap suffix vik set ; : (next-index) ( sub seq n -- sub seq i/f ) [ 2dup ] dip start* ; : (all-indices) ( sub seq n/f -- vik ) dup f = not [ (next-index) dup >vik dup [ 1+ ] when (all-indices) ] [ 3drop vik get ] if ; recursive : all-indices ( seq chr -- array ) V{ } clone vik set swap 0 (all-indices) sift >array ; : find-all-firsts ( string chr -- seq ) all-indices ; : find-all-seconds ( string chr -- seq ) [ nip length ] 2keep all-indices [ + ] with map ; ! !!!!!!!!!!!!!!!!!!!! ! some combinators : 1bi1@ ( obj p q quot -- objp objq ) [ curry ] curry bi@ bi ; inline ! example: 5 2 4 [ + ] ... => 7 9 ! 1 object, 2 (bi) to apply (@) to that object using 1 quot. ! hence: 1bi1@ .. a better name is much appreciated. : 1bi2* ( obj x y x-quot y-quot -- objx objy ) [ [ curry ] curry ] bi@ bi* bi ; inline ! example: 1 2 3 [ + ] [ - ] foo => 3 -2 ! takes 1 object 2 objects to apply using a spread (*) and ! 2 quots to match the applied objects. 1bi2* ! !!!!!!!!!!!! : (match-min) ( seq item -- min ) [ > ] curry filter dup empty? [ drop f ] [ first ] if ; : close-match ( seq1 seq2 -- seq3 ) [ swap [ (match-min) ] keep swap 2array ] curry map ; : complement ( seq -- seq' ) { f f } suffix [ [ second ] map 0 prefix ] [ [ first ] map ] bi zip [ { 0 0 } = not ] filter ; : fix-complements ( pairs string -- pairs' ) ! in order to have the complement reach to the end. length '[ [ first2 dup f = [ drop _ ] when 2array ] map ] call ; ! : extract-indices1 ( str indices -- extracts ) ! swap [ [ first2 ] dip [ ] keep clone-like ] curry map ; ! ! about x2 slower than the simple subseq version. ( 200% time of other version ) : extract-indices ( str indices -- extracts ) [ first2 rot subseq ] with map ; : (find-all-indices) ( string from-chr to-chr -- seq1 seq2 ) [ find-all-firsts ] [ find-all-seconds ] 1bi2* ; : find-all ( string from-chr to-chr -- indices-pairs-seq ) (find-all-indices) close-match ; : (findings) ( str from-chr to-chr -- str indices ) pick [ find-all ] dip swap ; : extract-complements ( str from-chr to-chr -- complements ) (findings) complement over fix-complements extract-indices ; : extract-all ( str from-chr to-chr -- actuals ) (findings) extract-indices ; : extract-one ( str from-chr to-chr -- array ) ! HACK: can find just the first ones, for efficiency if it matters. (findings) dup empty? [ nip ] [ first 1array extract-indices ] if ; : extract-both ( str from-chr to-chr -- actuals complements ) (findings) [ ] [ complement ] bi [ over ] dip swap fix-complements [ extract-indices ] 1bi1@ ; : strip-tags ( str -- str ) "<" ">" extract-complements " " join ; ! EXP : (all-clean?) ( pair-seq pair -- ? ) any-overlap? not ; ! : (remove-overlaps) ( seq1 seq2 -- seq2' ) ! [ tuck (all-clean?) [ drop f ] unless ] with map sift ; : find-start-end-index ( string find -- seq ) ! str dup (find-all-indices) zip ; SYMBOL: accum ! accum [ V{ } clone ] initialize : >accum ( item -- ) accum set ; : new-accum ( -- ) V{ } clone >accum ; : accum> ( -- item ) accum get ; : (remove-overlaps) ( rest -- ) dup empty? [ drop ] [ [ rest-slice ] [ 1 head first ] bi ! rest head accum> swap 2dup any-overlap? ! rest accum-vector head t/f [ 2drop ] [ suffix >accum ] if ! rest (remove-overlaps) ] if ; : remove-overlaps ( pairs-seq -- pairs-seq' ) new-accum (remove-overlaps) accum> ; : make-slices ( str pair-seq -- slices-seq ) swap [ [ first2 ] dip ] curry map ; : customized-slices ( str pair-seq -- slices-seq ) swap [ [ first2 2dup ] dip >string 3array ] curry map ;