Paste: messy

Author: pruned
Mode: factor
Date: Sat, 17 Jul 2010 18:33:37
Plain Text |
:: make-csr-gpu ( -- )
        H{
            { "data-swizzling" f }
        }
        :> settings

        [ csr-opt>> ] [ make-csr-opt ] cached drop
        cache csr-opt>> :> csr-opt

        csr-opt second length 16 2^ /+ :> N ! 64k rows
    
        csr-opt first deltas 256 v/n :> rblocks-length
        rblocks-length [ 2 mod ] map dup :> data2-count
        undeltas :> data2-indexes
        rblocks-length [ 2 /i ] map dup :> data4-count
        undeltas :> data4-indexes
        
        { 2 3 } csr-opt nths first2 
        >float-array byte-array>uint-array 
        zip uint-array{ } concat-as :> data
        
        rblocks-length data4-indexes data2-indexes 
        rblocks-length length iota 4array flip
        [   first4 :> ( length data4-index data2-index group )
                    
            { data4-index data2-index }
            "data-swizzling" settings at
            [   
                [   
                    1024 /mod :> ( y x )
                    8192 8 * y * x 8 * +
                ] map
            ] [ 64 v*n ] if
            first2 :> ( data4-base data2-base )
                    
           ! csr-opt second 64 <sliced-groups> rbi swap nth
            ! 64 group * 
            0 :> y-base
    
            { length data4-base data2-base y-base }
        ] map 
        8192 N 8 /+ 2 align * { 0 0 0 0 } pad-tail
        uint-array{ } concat-as blockTex-data
        

        csr-opt second 64 <sliced-groups>
        [ >ushort-array byte-array>uint-array ] map 
        8192 N 1  * 2 align * { 0 0 0 0 } pad-tail
        uint-array{ } concat-as
        8192 4 * <sliced-groups> permTex 16 res-set-any-matrix

        data4-indexes last 8 * 8192 align 8 * 4 * <uint-array> :> data4
        
        data4-indexes length iota
        data4-indexes
        data4-count
        [| rbi index count |
            count iota
            [| x |
                rbi dup 1 +
                [ csr-opt first nth 4 / 2 * ] bi@ 
                data <slice> 256 x * tail 256 head :> block
                
                index x +
                "data-swizzling" settings at
                [   
                    1024 /mod :> ( y x )
                    8192 8 * y * x 8 * +
                ] [ 64 * ] if :> base
                            
                "data-swizzling" settings at
                [
                    block 32 group 
                    [| src y |
                       src
                       base y 8192 * + 4 *
                       data4 copy
                    ] each-index
                ] [ block base data4 copy ] if
            ] each
        ] 3each

        8192 16 N * * <uint-array> :> data2
        
        data2-indexes length iota
        data2-indexes
        data2-count
        [| rbi index count |
            count 0 >
            [
                rbi dup 1 +
                [ csr-opt first nth 4 / 2 * ] bi@ 
                data <slice> 128 tail* :> block
                
                index
                "data-swizzling" settings at
                [   
                    1024 /mod :> ( y x )
                    8192 8 * y * x 8 * +
                ] [ 64 * ] if :> base
                            
                "data-swizzling" settings at
                [
                    block 16 group
                    [| src y |
                       src
                       base y 8192 * + 2 *
                       data2 copy 
                    ] each-index
                ] [ block base data2 copy ] if
            ] when
        ] 3each
        
        { blockTex-data data4 data2 }
        [ aligned-malloc-copy-array ] map
        cache csr-gpu<< ;
        ;

New Annotation

Summary:
Author:
Mode:
Body: