Paste: loop
Author: | slava |
Mode: | assembly-x86 |
Date: | Wed, 19 Nov 2008 06:31:21 |
Plain Text |
.globl _scan_by_16_impl
/* inputs:
rdi: start
rsi: end
rdx: ptr to mask
temps:
rax: count
rcx: temp
xmm0: mask
xmm1: zero
xmm2-5: temp */
_scan_by_16_impl:
mov $0,%rax
movdqu (%rdx),%xmm0
pxor %xmm1,%xmm1
loop:
movdqa (%rdi),%xmm2
movdqa 16(%rdi),%xmm3
movdqa 32(%rdi),%xmm4
movdqa 48(%rdi),%xmm5
movdqa 64(%rdi),%xmm6
movdqa 80(%rdi),%xmm7
pand %xmm0,%xmm2
pand %xmm0,%xmm3
pand %xmm0,%xmm4
pand %xmm0,%xmm5
pand %xmm0,%xmm6
pand %xmm0,%xmm7
por %xmm2,%xmm3
por %xmm4,%xmm5
por %xmm6,%xmm7
por %xmm3,%xmm7
por %xmm5,%xmm7
psadbw %xmm1,%xmm7
movd %xmm7,%rcx
cmp $0,%rcx
je zero
inc %rax
zero:
add $96,%rdi
cmp %rdi,%rsi
jne loop
ret
Author: | slava |
Mode: | assembly-x86 |
Date: | Wed, 19 Nov 2008 06:33:03 |
Plain Text |
.globl _scan_by_16_impl
/* inputs:
rdi: start
rsi: end
rdx: ptr to mask
temps:
rax: count
rcx: temp
xmm0: mask
xmm1: zero
xmm2-5: temp */
_scan_by_16_impl:
mov $0,%rax
movdqu (%rdx),%xmm0
pxor %xmm1,%xmm1
loop:
movdqa (%rdi),%xmm2
movdqa 16(%rdi),%xmm3
movdqa 32(%rdi),%xmm4
movdqa 48(%rdi),%xmm5
movdqa 64(%rdi),%xmm6
movdqa 80(%rdi),%xmm7
por %xmm2,%xmm3
por %xmm4,%xmm5
por %xmm6,%xmm7
por %xmm3,%xmm7
por %xmm5,%xmm7
pand %xmm0,%xmm7
psadbw %xmm1,%xmm7
movd %xmm7,%rcx
cmp $0,%rcx
je zero
inc %rax
zero:
add $96,%rdi
cmp %rdi,%rsi
jne loop
ret
New Annotation