Paste: messy
Author: | pruned |
Mode: | factor |
Date: | Sat, 17 Jul 2010 18:33:37 |
Plain Text |
:: make-csr-gpu ( -- )
H{
{ "data-swizzling" f }
}
:> settings
[ csr-opt>> ] [ make-csr-opt ] cached drop
cache csr-opt>> :> csr-opt
csr-opt second length 16 2^ /+ :> N
csr-opt first deltas 256 v/n :> rblocks-length
rblocks-length [ 2 mod ] map dup :> data2-count
undeltas :> data2-indexes
rblocks-length [ 2 /i ] map dup :> data4-count
undeltas :> data4-indexes
{ 2 3 } csr-opt nths first2
>float-array byte-array>uint-array
zip uint-array{ } concat-as :> data
rblocks-length data4-indexes data2-indexes
rblocks-length length iota 4array flip
[ first4 :> ( length data4-index data2-index group )
{ data4-index data2-index }
"data-swizzling" settings at
[
[
1024 /mod :> ( y x )
8192 8 * y * x 8 * +
] map
] [ 64 v*n ] if
first2 :> ( data4-base data2-base )
0 :> y-base
{ length data4-base data2-base y-base }
] map
8192 N 8 /+ 2 align * { 0 0 0 0 } pad-tail
uint-array{ } concat-as blockTex-data
csr-opt second 64 <sliced-groups>
[ >ushort-array byte-array>uint-array ] map
8192 N 1 * 2 align * { 0 0 0 0 } pad-tail
uint-array{ } concat-as
8192 4 * <sliced-groups> permTex 16 res-set-any-matrix
data4-indexes last 8 * 8192 align 8 * 4 * <uint-array> :> data4
data4-indexes length iota
data4-indexes
data4-count
[| rbi index count |
count iota
[| x |
rbi dup 1 +
[ csr-opt first nth 4 / 2 * ] bi@
data <slice> 256 x * tail 256 head :> block
index x +
"data-swizzling" settings at
[
1024 /mod :> ( y x )
8192 8 * y * x 8 * +
] [ 64 * ] if :> base
"data-swizzling" settings at
[
block 32 group
[| src y |
src
base y 8192 * + 4 *
data4 copy
] each-index
] [ block base data4 copy ] if
] each
] 3each
8192 16 N * * <uint-array> :> data2
data2-indexes length iota
data2-indexes
data2-count
[| rbi index count |
count 0 >
[
rbi dup 1 +
[ csr-opt first nth 4 / 2 * ] bi@
data <slice> 128 tail* :> block
index
"data-swizzling" settings at
[
1024 /mod :> ( y x )
8192 8 * y * x 8 * +
] [ 64 * ] if :> base
"data-swizzling" settings at
[
block 16 group
[| src y |
src
base y 8192 * + 2 *
data2 copy
] each-index
] [ block base data2 copy ] if
] when
] 3each
{ blockTex-data data4 data2 }
[ aligned-malloc-copy-array ] map
cache csr-gpu<< ;
;
New Annotation