Paste: lots of supporting words

Author: randy7
Mode: factor
Date: Fri, 6 Mar 2009 22:13:34
Plain Text |
USING:  prettyprint strings kernel arrays math sequences lil-utils
        namespaces assocs generalizations fry ;

IN: seperate-text

SYMBOL: vik 
: >vik ( n -- )
    vik get swap suffix vik set ;

: (next-index) ( sub seq n -- sub seq i/f )
    [ 2dup ] dip start* ;

: (all-indices) ( sub seq n/f -- vik ) 
    dup f = not [ 
                (next-index) dup >vik 
                 dup [ 1+ ] when (all-indices) 
                ] 
                [ 3drop vik get ] if ; recursive

: all-indices ( seq chr -- array )
    V{ } clone vik set
    swap 0 (all-indices) sift >array ;


    
    
: find-all-firsts ( string chr -- seq )
    all-indices ;
    
: find-all-seconds ( string chr -- seq )
    [ nip length ] 2keep all-indices
    [ + ] with map ;
    

! !!!!!!!!!!!!!!!!!!!!
! some combinators

: 1bi1@ ( obj p q quot  -- objp objq ) 
    [ curry ] curry bi@ bi ; inline

! example:  5 2 4 [ + ] ... => 7 9 
! 1 object, 2 (bi) to apply (@) to that object using 1 quot.
! hence: 1bi1@ .. a better name is much appreciated.

: 1bi2* ( obj x y x-quot y-quot -- objx objy ) 
    [ [ curry ] curry ] bi@  bi* bi ; inline

! example: 1 2 3 [ + ] [ - ] foo => 3 -2
! takes 1 object 2 objects to apply using a spread (*) and 
! 2 quots to match the applied objects. 1bi2*

! !!!!!!!!!!!!

: (match-min) ( seq item -- min ) [ > ] curry filter dup empty? [ drop f ] [ first ] if ;

: close-match ( seq1 seq2 -- seq3 )
    [ swap [ (match-min) ] keep swap 2array ] curry map ;


: complement ( seq -- seq' )
    { f f } suffix 
    [ [ second ] map 0 prefix ] [ [ first ] map ] bi zip 
    [ { 0 0 } = not ] filter ;

: fix-complements ( pairs string -- pairs' )  ! in order to have the complement reach to the end.
    length  '[ [ first2 dup f = [ drop _ ] when 2array ] map ] call ;        


! : extract-indices1 ( str indices -- extracts )
!     swap [ [ first2 ] dip [ <slice> ] keep clone-like ] curry map ; 
!    ! about x2 slower than the simple subseq version. ( 200% time of other version )
    
: extract-indices ( str indices -- extracts )    
    [ first2 rot subseq ] with map ;

: (find-all-indices) ( string from-chr to-chr -- seq1 seq2 ) 
    [ find-all-firsts ] [ find-all-seconds ] 1bi2* ;
    
: find-all ( string from-chr to-chr -- indices-pairs-seq )
    (find-all-indices) close-match ;
    
: (findings) ( str from-chr to-chr -- str indices )
    pick [ find-all ] dip swap ;
    
: extract-complements ( str from-chr to-chr -- complements )
    (findings) complement over fix-complements extract-indices ;
    
: extract-all ( str from-chr to-chr -- actuals )
    (findings) extract-indices ;

: extract-one ( str from-chr to-chr -- array ) ! HACK: can find just the first ones, for efficiency if it matters.
    (findings) dup empty? [ nip ] [ first 1array extract-indices ] if ;

: extract-both ( str from-chr to-chr -- actuals complements )
    (findings) [ ] [ complement ] bi
    [ over ] dip swap fix-complements
    [ extract-indices ] 1bi1@ ;

   
: strip-tags ( str -- str )
    "<" ">" extract-complements " " join ;

! EXP



: (all-clean?) ( pair-seq pair -- ? )
    any-overlap? not ;    
    
! : (remove-overlaps) ( seq1 seq2 -- seq2' )
!     [ tuck (all-clean?) [ drop f ] unless ] with map sift ;

: find-start-end-index ( string find -- seq ) ! str 
    dup (find-all-indices) zip ;

    


SYMBOL: accum    
! accum [ V{ } clone ] initialize 

: >accum ( item -- ) accum set ;
: new-accum ( -- ) V{ } clone >accum ;
: accum> ( -- item ) accum get ;

: (remove-overlaps) ( rest -- )
    dup empty? [ drop ] [
    [ rest-slice ] [ 1 head first ] bi ! rest head
    accum> swap 2dup any-overlap? ! rest accum-vector head t/f
        [ 2drop ] [ suffix >accum ] if ! rest
        (remove-overlaps)
    ] if ;

: remove-overlaps ( pairs-seq -- pairs-seq' ) 
    new-accum (remove-overlaps) accum> ;    
: make-slices ( str pair-seq -- slices-seq )
    swap [ [ first2 ] dip <slice> ] curry map ;

: customized-slices ( str pair-seq -- slices-seq )
        swap [ [ first2 2dup ] dip <slice> >string 3array ] curry map ;

New Annotation

Summary:
Author:
Mode:
Body: