Paste: multithreading
Author: | pruned |
Mode: | factor |
Date: | Sun, 31 Oct 2010 09:07:26 |
Plain Text |
USING: accessors alien alien.c-types alien.data alien.syntax
arrays assocs classes.struct combinators compiler.codegen.fixup
compiler.constants cpu.x86 cpu.x86.assembler
cpu.x86.assembler.operands destructors generalizations grouping
hashtables io.binary kernel libc literals locals
math.vectors.simd multiline namespaces prettyprint
sequences specialized-arrays tools.time
windows.kernel32 windows.errors alien.accessors utils
cpu.architecture ;
EXCLUDE: math => float ;
SPECIALIZED-ARRAYS: float double void* uint ;
IN: mt
<PRIVATE
: CreateThread ( lpThreadAttributes dwStackSize lpStartAddress lpParameter dwCreationFlags lpThreadId -- hThread )
[ GetCurrentProcess ] 6 ndip CreateRemoteThread ;
:: create-thread ( lpStartAddress lpParameter -- hThread )
f 0 lpStartAddress lpParameter 0 f CreateThread ;
:: wait-threads ( count handles -- )
count handles t >c-bool INFINITE WaitForMultipleObjects WAIT_FAILED = [ win32-error ] when ;
: virtual-alloc ( size -- alien )
[ GetCurrentProcess f ] dip
flags{ MEM_COMMIT MEM_RESERVE }
PAGE_EXECUTE_READWRITE
VirtualAllocEx ;
: virtual-free ( alien -- )
[ GetCurrentProcess ] dip
0
MEM_RELEASE
VirtualFreeEx win32-error=0/f ;
PRIVATE>
: par-exec ( data/fptr -- )
[ first2 swap create-thread ] map
[ length ] [ >void*-array wait-threads ]
[ [ CloseHandle win32-error=0/f ] each ] tri ;
: par-each ( data fptr -- )
[ 2array ] curry map par-exec ;
<PRIVATE
TUPLE: code-repository
{ memory-block }
{ ptr }
{ size }
{ table } ;
: <code-repository> ( -- cr )
f 0 0 H{ } clone code-repository boa ;
: alloc-block ( size -- alien ) virtual-alloc ;
: free-block ( alien -- ) virtual-free ;
:: increase-capacity ( cr -- )
cr size>> 0 =
[
1024 alloc-block cr memory-block<<
1024 cr size<<
]
[
cr size>> 2 * cr size<<
cr memory-block>> free-block
cr size>> alloc-block cr memory-block<<
] if ;
:: allocate ( code cr -- alien )
[ cr ptr>> code length + cr size>> > ] [ cr increase-capacity ] while
cr ptr>> cr memory-block>> <displaced-alien>
[ code code length memcpy ] keep
cr ptr>> code length + cr ptr<< ;
: flush-code-repository ( -- )
code-repository get-global
{
[ memory-block>> dup dup expired? not and [ free-block ] [ drop ] if ]
[ f swap memory-block<< ]
[ 0 swap ptr<< ]
[ 0 swap size<< ]
[ table>> values [ f swap set-second ] each ]
}
cleave ;
: revive ( cr -- cr )
dup memory-block>> expired? [ flush-code-repository ] when ;
PRIVATE>
: get-func ( name -- fptr )
code-repository get-global revive
table>> at
dup second [ second ]
[ [ first call( -- fptr ) code-repository get-global allocate dup ] keep set-second ] if ;
<PRIVATE
code-repository get-global [ flush-code-repository ] when
<code-repository> code-repository set-global
: register ( quot name -- ) [ f 2array ] dip code-repository get-global table>> set-at ;
:: link-jumps ( code -- binary )
3 code nth 3 group :> labels
code last :> binary
labels
[| l |
l first rc-relative assert=
l third l second - <int>
l second 4 -
binary copy
] each
binary ;
PRIVATE>
: gen-foo ( -- fptr )
[
EAX 666 MOV
0 RET
] with-fixup link-jumps ;
[ gen-foo ] "foo" register
:: gen-rmw ( -- fptr )
{ "ptr" }
[ 4 * 4 + 2array ] map-index >hashtable :> args
[
EDX "ptr" args at stack@ MOV
EAX EDX [] MOV
EAX 1 ADD
EDX [] EAX MOV
0 RET
] with-fixup link-jumps ;
[ gen-rmw ] "rmw" register
:: gen-barrier ( -- fptr )
{ "ptr" }
[ 4 * 4 + 2array ] map-index >hashtable :> args
[
EDX "ptr" args at stack@ MOV
MFENCE
LOCK EDX [] DEC
<label> dup resolve-label :> loop
PAUSE
EAX EDX [] MOV
EAX 0 CMP
loop JNE
0 RET
] with-fixup link-jumps ;
[ gen-barrier ] "barrier" register
:: gen-rdtsc ( -- fptr )
{ "ptr" }
[ 4 * 4 + 2array ] map-index >hashtable :> args
[
ECX "ptr" args at stack@ MOV
RDTSC
ECX 0 [+] EAX MOV
ECX 4 [+] EDX MOV
0 RET
] with-fixup link-jumps ;
[ gen-rdtsc ] "rdtsc" register
: rdtsc-test ( struct -- ) "rdtsc" get-func void { pointer: longlong } cdecl alien-indirect ;
STRUCT: scheduler-data
{ buffer int* }
{ start int }
{ end int }
;
:: gen-scheduler ( -- fptr )
{ "buffer" "start" "end" }
[ 4 * EAX swap [+] 2array ] map-index >hashtable :> struct
[
EBP PUSH
EBP ESP MOV
ESP 16 SUB
0 stack@ EBX MOV
4 stack@ ESI MOV
8 stack@ EDI MOV
EAX EBP 2 4 * [+] MOV
ESI "buffer" struct at MOV
EBX "start" struct at MOV
EDI "end" struct at MOV
<label> dup resolve-label :> loop
EAX ESI EBX 0 [+*8+] MOV
ECX ESI EBX 4 [+*8+] MOV
EAX PUSH
ECX CALL
ESP EBP MOV
ESP 16 SUB
EBX 1 ADD
EBX EDI CMP
loop JL
EBX 0 stack@ MOV
ESI 4 stack@ MOV
EDI 8 stack@ MOV
ESP EBP MOV
EBP POP
0 RET
] with-fixup link-jumps ;
[ gen-scheduler ] "scheduler" register
: scheduler ( struct -- ) "scheduler" get-func void { pointer: scheduler-data } cdecl alien-indirect ;
:: sched ( work -- )
[
work
[
[ [ alien-address ] map ] map
[ concat >uint-array malloc-copy &free 0 ]
[ length ] bi
scheduler-data <struct-boa> malloc-copy &free
] map "scheduler" get-func par-each
] with-destructors ;
: sched-test ( seq -- seq' )
[
[ >uint-array malloc-copy &free alien-address ]
[ length iota [ 4 * + ] with map ] bi
[
[ "rmw" get-func 2array ] map
2 group sched
] keep [ <alien> *int ] map
] with-destructors ;
:: barrier-test ( k th -- time )
[
k 4 * 256 + malloc &free alien-address 256 align <alien>
k iota [ 4 * swap <displaced-alien> ] with map :> barriers
barriers [ th swap 0 set-alien-unsigned-cell ] each
th [ barriers [ "barrier" get-func 2array ] map ] replicate
[ sched ] benchmark
] with-destructors ;
:: par-each-bench ( data k fptr -- time )
[
data length :> th
k 4 * 256 + malloc &free alien-address 256 align <alien>
k iota [ 4 * swap <displaced-alien> ] with map :> barriers
barriers [ th swap 0 set-alien-unsigned-cell ] each
k th * 2 * 8 * 256 + malloc &free alien-address 256 align <alien>
k th * 2 * iota [ 8 * swap <displaced-alien> ] with map
2 group k group :> counters
data
counters
[| param thread-counters |
barriers
thread-counters
[| barrier counter-ab |
counter-ab first "rdtsc" get-func 2array
barrier "barrier" get-func 2array
param fptr 2array
counter-ab second "rdtsc" get-func 2array
4array
] 2map concat
] 2map
[ sched ] benchmark k / :> time!
counters [ [ [ *longlong ] map ] map ] map :> times
times flip [ [ first2 swap - ] map ] map
[ supremum ] map but-last
infimum 2.673335 /
drop
time
] with-destructors ;
STRUCT: memtest-data
{ base float* }
{ start int }
{ length int }
{ vec4 float* }
;
:: gen-memtest ( -- fptr )
{ "struct" }
[ 4 * 16 + 2array ] map-index >hashtable :> args
[
EBX PUSH
ESI PUSH
EDI PUSH
EAX "struct" args at stack@ MOV
ESI EAX 0 [+] MOV
ESI EAX 4 [+] ADD
EDI EAX 12 [+] MOV
ECX 0 MOV
EDX EAX 8 [+] MOV
XMM1 EDI 0 [+] MOVDQA
XMM1 XMM1 XORPS
<label> dup resolve-label :> Xloop
XMM0 ESI ECX [+] MOVDQA
XMM1 XMM0 PADDD
XMM0 ESI ECX 16 [++] MOVDQA
XMM1 XMM0 PADDD
XMM0 ESI ECX 32 [++] MOVDQA
XMM1 XMM0 PADDD
XMM0 ESI ECX 48 [++] MOVDQA
XMM1 XMM0 PADDD
ECX 64 ADD
ECX EDX CMP
Xloop JL
EDI 0 [+] XMM1 MOVDQA
EDI POP
ESI POP
EBX POP
0 RET
] with-fixup link-jumps ;
[ gen-memtest ] "memtest" register
: memtest ( struct -- ) "memtest" get-func void { pointer: memtest-data } cdecl alien-indirect ;
:: bw-test ( mb loops threads -- time )
[
threads
[
mb 1024 * aligned-malloc&free
0
mb 1024 *
16 aligned-malloc&free
memtest-data <struct-boa> malloc-copy &free
] replicate
loops "memtest" get-func par-each-bench
1000 1000 * /f
] with-destructors ;
New Annotation