Paste: loop

Author: slava
Mode: assembly-x86
Date: Wed, 19 Nov 2008 06:31:21
Plain Text |
.globl _scan_by_16_impl
/* inputs:

   rdi: start
   rsi: end
   rdx: ptr to mask

   temps:

   rax: count
   rcx: temp
   xmm0: mask
   xmm1: zero
   xmm2-5: temp */
_scan_by_16_impl:
    mov $0,%rax
    movdqu (%rdx),%xmm0
    pxor %xmm1,%xmm1
loop:
    movdqa (%rdi),%xmm2
    movdqa 16(%rdi),%xmm3
    movdqa 32(%rdi),%xmm4
    movdqa 48(%rdi),%xmm5
    movdqa 64(%rdi),%xmm6
    movdqa 80(%rdi),%xmm7
    pand %xmm0,%xmm2
    pand %xmm0,%xmm3
    pand %xmm0,%xmm4
    pand %xmm0,%xmm5
    pand %xmm0,%xmm6
    pand %xmm0,%xmm7
    por %xmm2,%xmm3
    por %xmm4,%xmm5
    por %xmm6,%xmm7
    por %xmm3,%xmm7
    por %xmm5,%xmm7
    psadbw %xmm1,%xmm7
    movd %xmm7,%rcx
    cmp $0,%rcx
    je zero
    inc %rax
zero:
    add $96,%rdi
    cmp %rdi,%rsi
    jne loop
    ret

Annotation: 4 less insns

Author: slava
Mode: assembly-x86
Date: Wed, 19 Nov 2008 06:33:03
Plain Text |
.globl _scan_by_16_impl
/* inputs:

   rdi: start
   rsi: end
   rdx: ptr to mask

   temps:

   rax: count
   rcx: temp
   xmm0: mask
   xmm1: zero
   xmm2-5: temp */
_scan_by_16_impl:
    mov $0,%rax
    movdqu (%rdx),%xmm0
    pxor %xmm1,%xmm1
loop:
    movdqa (%rdi),%xmm2
    movdqa 16(%rdi),%xmm3
    movdqa 32(%rdi),%xmm4
    movdqa 48(%rdi),%xmm5
    movdqa 64(%rdi),%xmm6
    movdqa 80(%rdi),%xmm7
    por %xmm2,%xmm3
    por %xmm4,%xmm5
    por %xmm6,%xmm7
    por %xmm3,%xmm7
    por %xmm5,%xmm7
    pand %xmm0,%xmm7
    psadbw %xmm1,%xmm7
    movd %xmm7,%rcx
    cmp $0,%rcx
    je zero
    inc %rax
zero:
    add $96,%rdi
    cmp %rdi,%rsi
    jne loop
    ret

New Annotation

Summary:
Author:
Mode:
Body: