Paste: lots of supporting words
        
	
	
	
		| Author:  | randy7 | 
		| Mode:  | factor | 
		| Date:  | Fri, 6 Mar 2009 22:13:34 | 
	
	Plain Text |
	
	USING:  prettyprint strings kernel arrays math sequences lil-utils
        namespaces assocs generalizations fry ;
IN: seperate-text
SYMBOL: vik 
: >vik ( n -- )
    vik get swap suffix vik set ;
: (next-index) ( sub seq n -- sub seq i/f )
    [ 2dup ] dip start* ;
: (all-indices) ( sub seq n/f -- vik ) 
    dup f = not [ 
                (next-index) dup >vik 
                 dup [ 1+ ] when (all-indices) 
                ] 
                [ 3drop vik get ] if ; recursive
: all-indices ( seq chr -- array )
    V{ } clone vik set
    swap 0 (all-indices) sift >array ;
    
    
: find-all-firsts ( string chr -- seq )
    all-indices ;
    
: find-all-seconds ( string chr -- seq )
    [ nip length ] 2keep all-indices
    [ + ] with map ;
    
: 1bi1@ ( obj p q quot  -- objp objq ) 
    [ curry ] curry bi@ bi ; inline
: 1bi2* ( obj x y x-quot y-quot -- objx objy ) 
    [ [ curry ] curry ] bi@  bi* bi ; inline
: (match-min) ( seq item -- min ) [ > ] curry filter dup empty? [ drop f ] [ first ] if ;
: close-match ( seq1 seq2 -- seq3 )
    [ swap [ (match-min) ] keep swap 2array ] curry map ;
: complement ( seq -- seq' )
    { f f } suffix 
    [ [ second ] map 0 prefix ] [ [ first ] map ] bi zip 
    [ { 0 0 } = not ] filter ;
: fix-complements ( pairs string -- pairs' )  
    length  '[ [ first2 dup f = [ drop _ ] when 2array ] map ] call ;        
    
: extract-indices ( str indices -- extracts )    
    [ first2 rot subseq ] with map ;
: (find-all-indices) ( string from-chr to-chr -- seq1 seq2 ) 
    [ find-all-firsts ] [ find-all-seconds ] 1bi2* ;
    
: find-all ( string from-chr to-chr -- indices-pairs-seq )
    (find-all-indices) close-match ;
    
: (findings) ( str from-chr to-chr -- str indices )
    pick [ find-all ] dip swap ;
    
: extract-complements ( str from-chr to-chr -- complements )
    (findings) complement over fix-complements extract-indices ;
    
: extract-all ( str from-chr to-chr -- actuals )
    (findings) extract-indices ;
: extract-one ( str from-chr to-chr -- array ) 
    (findings) dup empty? [ nip ] [ first 1array extract-indices ] if ;
: extract-both ( str from-chr to-chr -- actuals complements )
    (findings) [ ] [ complement ] bi
    [ over ] dip swap fix-complements
    [ extract-indices ] 1bi1@ ;
   
: strip-tags ( str -- str )
    "<" ">" extract-complements " " join ;
: (all-clean?) ( pair-seq pair -- ? )
    any-overlap? not ;    
    
: find-start-end-index ( string find -- seq ) 
    dup (find-all-indices) zip ;
    
SYMBOL: accum    
: >accum ( item -- ) accum set ;
: new-accum ( -- ) V{ } clone >accum ;
: accum> ( -- item ) accum get ;
: (remove-overlaps) ( rest -- )
    dup empty? [ drop ] [
    [ rest-slice ] [ 1 head first ] bi 
    accum> swap 2dup any-overlap? 
        [ 2drop ] [ suffix >accum ] if 
        (remove-overlaps)
    ] if ;
: remove-overlaps ( pairs-seq -- pairs-seq' ) 
    new-accum (remove-overlaps) accum> ;    
: make-slices ( str pair-seq -- slices-seq )
    swap [ [ first2 ] dip <slice> ] curry map ;
: customized-slices ( str pair-seq -- slices-seq )
        swap [ [ first2 2dup ] dip <slice> >string 3array ] curry map ;
	
	
		New Annotation