Paste: llvm register alloc extragavanza

Author: pruned
Mode: assembly-x86
Date: Fri, 4 Feb 2011 13:24:50
Plain Text |
main+0x6200:
00000000`00406200 4d63c9          movsxd  r9,r9d
00000000`00406203 4e8d1409        lea     r10,[rcx+r9]
00000000`00406207 4f8d5c0d00      lea     r11,[r13+r9]
00000000`0040620c 66420f6f041b    movdqa  xmm0,xmmword ptr [rbx+r11]
00000000`00406212 66420fe00413    pavgb   xmm0,xmmword ptr [rbx+r10]
00000000`00406218 4e8d140a        lea     r10,[rdx+r9]
00000000`0040621c 4e8d1c0f        lea     r11,[rdi+r9]
00000000`00406220 4f8d3c08        lea     r15,[r8+r9]
00000000`00406224 4e8d2408        lea     r12,[rax+r9]
00000000`00406228 4b8d2c31        lea     rbp,[r9+r14]
00000000`0040622c 660f6f0c2b      movdqa  xmm1,xmmword ptr [rbx+rbp]
00000000`00406231 66420fe00c23    pavgb   xmm1,xmmword ptr [rbx+r12]
00000000`00406237 660fe0c8        pavgb   xmm1,xmm0
00000000`0040623b 66420f6f043b    movdqa  xmm0,xmmword ptr [rbx+r15]
00000000`00406241 66420fe0041b    pavgb   xmm0,xmmword ptr [rbx+r11]
00000000`00406247 66420f6f1413    movdqa  xmm2,xmmword ptr [rbx+r10]
00000000`0040624d 66410fe0d6      pavgb   xmm2,xmm14
00000000`00406252 660fe0d0        pavgb   xmm2,xmm0
00000000`00406256 660fe0d1        pavgb   xmm2,xmm1
00000000`0040625a 4c8b9424f0000000 mov     r10,qword ptr [rsp+0F0h]
00000000`00406262 4f8d1411        lea     r10,[r9+r10]
00000000`00406266 4c8b9c24e8000000 mov     r11,qword ptr [rsp+0E8h]
00000000`0040626e 66430f7f1413    movdqa  xmmword ptr [r11+r10],xmm2
00000000`00406274 4183c110        add     r9d,10h
00000000`00406278 ffce            dec     esi
00000000`0040627a 7584            jne     main+0x6200 (00000000`00406200)

Annotation: clay code

Author: pruned
Mode: text
Date: Fri, 4 Feb 2011 13:27:23
Plain Text |
        breakpoint();
        for (x in range(loops))
        {
            var a = aload16b(src+x*16+0*stride);
            var b = aload16b(src+x*16+1*stride);
            var c = aload16b(src+x*16+2*stride);
            var d = aload16b(src+x*16+3*stride);
            var e = aload16b(src+x*16+4*stride);
            var f = aload16b(src+x*16+5*stride);
            var g = aload16b(src+x*16+6*stride);
            var zero = Vec[Byte, 16](0u8);
            
            var ag = avg_b(a,g);
            var bf = avg_b(b,f);
            var ce = avg_b(c,e);
            var d2 = avg_b(d,zero);
            
            var agbf = avg_b(ag,bf);
            var ced = avg_b(ce,d2);
            
            var t = avg_b(agbf, ced);
            
            astore16b(dst+x*16, t);
        }

Annotation: minor change

Author: pruned
Mode: text
Date: Fri, 4 Feb 2011 13:34:13
Plain Text |
            var a = aload16b(src+0*stride);
            var b = aload16b(src+1*stride);
            var c = aload16b(src+2*stride);
            var d = aload16b(src+3*stride);
            var e = aload16b(src+4*stride);
            var f = aload16b(src+5*stride);
            var g = aload16b(src+6*stride);
            
            var zero = Vec[Byte, 16](0u8);
            
            var ag = avg_b(a,g);
            var bf = avg_b(b,f);
            var ce = avg_b(c,e);
            var d2 = avg_b(d,zero);
            
            var agbf = avg_b(ag,bf);
            var ced = avg_b(ce,d2);
            
            var t = avg_b(agbf, ced);
            
            astore16b(dst+x*16, t);
            src += 16;









main+0x61e0:
00000000`004061e0 66420f6f043a    movdqa  xmm0,xmmword ptr [rdx+r15]
00000000`004061e6 66410fe0c6      pavgb   xmm0,xmm14
00000000`004061eb 66430f6f0c38    movdqa  xmm1,xmmword ptr [r8+r15]
00000000`004061f1 66420fe00c3f    pavgb   xmm1,xmmword ptr [rdi+r15]
00000000`004061f7 660fe0c8        pavgb   xmm1,xmm0
00000000`004061fb 66430f6f443d00  movdqa  xmm0,xmmword ptr [r13+r15]
00000000`00406202 66430fe00439    pavgb   xmm0,xmmword ptr [r9+r15]
00000000`00406208 66420f6f1439    movdqa  xmm2,xmmword ptr [rcx+r15]
00000000`0040620e 66420fe0143e    pavgb   xmm2,xmmword ptr [rsi+r15]
00000000`00406214 660fe0d0        pavgb   xmm2,xmm0
00000000`00406218 660fe0d1        pavgb   xmm2,xmm1
00000000`0040621c 4d89dc          mov     r12,r11
00000000`0040621f 49c1fc20        sar     r12,20h
00000000`00406223 48bd0000000010000000 mov rbp,1000000000h
00000000`0040622d 4d8d1c2b        lea     r11,[r11+rbp]
00000000`00406231 4901dc          add     r12,rbx
00000000`00406234 66430f7f1426    movdqa  xmmword ptr [r14+r12],xmm2
00000000`0040623a 4983c710        add     r15,10h
00000000`0040623e 49ffca          dec     r10
00000000`00406241 759d            jne     main+0x61e0 (00000000`004061e0)

Annotation: another minor change

Author: pruned
Mode: text
Date: Fri, 4 Feb 2011 13:44:23
Plain Text |
            var a = aload16b(src+0*stride);
            var b = aload16b(src+1*stride);
            var c = aload16b(src+2*stride);
            var d = aload16b(src+3*stride);
            var e = aload16b(src+4*stride);
            var f = aload16b(src+5*stride);
            var g = aload16b(src+6*stride);
            
            var zero = Vec[Byte, 16](0u8);
            
            var ag = avg_b(a,g);
            var bf = avg_b(b,f);
            var ce = avg_b(c,e);
            var d2 = avg_b(d,zero);
            
            var agbf = avg_b(ag,bf);
            var ced = avg_b(ce,d2);
            
            var t = avg_b(agbf, ced);
            
            astore16b(dst, t);
            src += 16;
            dst += 16;






00000000`00406180 66410f6f042f    movdqa  xmm0,xmmword ptr [r15+rbp]
00000000`00406186 66410fe0c6      pavgb   xmm0,xmm14
00000000`0040618b 66410f6f0c29    movdqa  xmm1,xmmword ptr [r9+rbp]
00000000`00406191 660fe00c2a      pavgb   xmm1,xmmword ptr [rdx+rbp]
00000000`00406196 660fe0c8        pavgb   xmm1,xmm0
00000000`0040619a 66410f6f0428    movdqa  xmm0,xmmword ptr [r8+rbp]
00000000`004061a0 66410fe0042b    pavgb   xmm0,xmmword ptr [r11+rbp]
00000000`004061a6 66410f6f542d00  movdqa  xmm2,xmmword ptr [r13+rbp]
00000000`004061ad 66410fe0142a    pavgb   xmm2,xmmword ptr [r10+rbp]
00000000`004061b3 660fe0d0        pavgb   xmm2,xmm0
00000000`004061b7 660fe0d1        pavgb   xmm2,xmm1
00000000`004061bb 66410f7f142c    movdqa  xmmword ptr [r12+rbp],xmm2
00000000`004061c1 4883c510        add     rbp,10h
00000000`004061c5 48ffc9          dec     rcx
00000000`004061c8 75b6            jne     main+0x6180 (00000000`00406180)

Annotation: horizontal pass

Author: pruned
Mode: text
Date: Fri, 4 Feb 2011 13:52:16
Plain Text |
00000000`00406100 f3410f6f40ff    movdqu  xmm0,xmmword ptr [r8-1]
00000000`00406106 f3410f6f48fa    movdqu  xmm1,xmmword ptr [r8-6]
00000000`0040610c f3410f6f50fb    movdqu  xmm2,xmmword ptr [r8-5]
00000000`00406112 f3410f6f58fc    movdqu  xmm3,xmmword ptr [r8-4]
00000000`00406118 f3410f6f60fe    movdqu  xmm4,xmmword ptr [r8-2]
00000000`0040611e 660fe0d0        pavgb   xmm2,xmm0
00000000`00406122 f3410f6f00      movdqu  xmm0,xmmword ptr [r8]
00000000`00406127 660fe0c1        pavgb   xmm0,xmm1
00000000`0040612b 660fe0c2        pavgb   xmm0,xmm2
00000000`0040612f 66410f6f48fd    movdqa  xmm1,xmmword ptr [r8-3]
00000000`00406135 66410fe0ce      pavgb   xmm1,xmm14
00000000`0040613a 660fe0e3        pavgb   xmm4,xmm3
00000000`0040613e 660fe0e1        pavgb   xmm4,xmm1
00000000`00406142 660fe0e0        pavgb   xmm4,xmm0
00000000`00406146 66410f7f23      movdqa  xmmword ptr [r11],xmm4
00000000`0040614b 4983c010        add     r8,10h
00000000`0040614f 4983c310        add     r11,10h
00000000`00406153 49ffc9          dec     r9
00000000`00406156 75a8            jne     main+0x6100 (00000000`00406100)

Annotation: LLVM is lost...

Author: pruned
Mode: text
Date: Fri, 4 Feb 2011 13:55:59
Plain Text |
main+0x6110:
00000000`00406110 4d63d2          movsxd  r10,r10d
00000000`00406113 4d8d1c12        lea     r11,[r10+rdx]
00000000`00406117 4c89d5          mov     rbp,r10
00000000`0040611a 4883cd05        or      rbp,5
00000000`0040611e 4801d5          add     rbp,rdx
00000000`00406121 f3410f6f0428    movdqu  xmm0,xmmword ptr [r8+rbp]
00000000`00406127 4c89d5          mov     rbp,r10
00000000`0040612a 4883cd01        or      rbp,1
00000000`0040612e 4801d5          add     rbp,rdx
00000000`00406131 f3410f6f0c28    movdqu  xmm1,xmmword ptr [r8+rbp]
00000000`00406137 660fe0c8        pavgb   xmm1,xmm0
00000000`0040613b f3430f6f0418    movdqu  xmm0,xmmword ptr [r8+r11]
00000000`00406141 4d89d3          mov     r11,r10
00000000`00406144 4983cb06        or      r11,6
00000000`00406148 4901d3          add     r11,rdx
00000000`0040614b f3430f6f1418    movdqu  xmm2,xmmword ptr [r8+r11]
00000000`00406151 660fe0d0        pavgb   xmm2,xmm0
00000000`00406155 660fe0d1        pavgb   xmm2,xmm1
00000000`00406159 4d89d3          mov     r11,r10
00000000`0040615c 4983cb04        or      r11,4
00000000`00406160 4901d3          add     r11,rdx
00000000`00406163 f3430f6f0418    movdqu  xmm0,xmmword ptr [r8+r11]
00000000`00406169 4d89d3          mov     r11,r10
00000000`0040616c 4983cb02        or      r11,2
00000000`00406170 4901d3          add     r11,rdx
00000000`00406173 f3430f6f0c18    movdqu  xmm1,xmmword ptr [r8+r11]
00000000`00406179 660fe0c8        pavgb   xmm1,xmm0
00000000`0040617d 4d89d3          mov     r11,r10
00000000`00406180 4983cb03        or      r11,3
00000000`00406184 4901d3          add     r11,rdx
00000000`00406187 66430f6f0418    movdqa  xmm0,xmmword ptr [r8+r11]
00000000`0040618d 66410fe0c6      pavgb   xmm0,xmm14
00000000`00406192 660fe0c1        pavgb   xmm0,xmm1
00000000`00406196 660fe0c2        pavgb   xmm0,xmm2
00000000`0040619a 4f8d1c2a        lea     r11,[r10+r13]
00000000`0040619e 66430f7f0419    movdqa  xmmword ptr [r9+r11],xmm0
00000000`004061a4 4183c210        add     r10d,10h
00000000`004061a8 ffc9            dec     ecx
00000000`004061aa 0f8560ffffff    jne     main+0x6110 (00000000`00406110)

Annotation: with i64 ranges

Author: pruned
Mode: text
Date: Fri, 4 Feb 2011 14:10:40
Plain Text |
00000000`00406100 f3410f6f40ff    movdqu  xmm0,xmmword ptr [r8-1]
00000000`00406106 f3410f6f48fa    movdqu  xmm1,xmmword ptr [r8-6]
00000000`0040610c f3410f6f50fb    movdqu  xmm2,xmmword ptr [r8-5]
00000000`00406112 f3410f6f58fc    movdqu  xmm3,xmmword ptr [r8-4]
00000000`00406118 f3410f6f60fe    movdqu  xmm4,xmmword ptr [r8-2]
00000000`0040611e 660fe0d0        pavgb   xmm2,xmm0
00000000`00406122 f3410f6f00      movdqu  xmm0,xmmword ptr [r8]
00000000`00406127 660fe0c1        pavgb   xmm0,xmm1
00000000`0040612b 660fe0c2        pavgb   xmm0,xmm2
00000000`0040612f 66410f6f48fd    movdqa  xmm1,xmmword ptr [r8-3]
00000000`00406135 66410fe0ce      pavgb   xmm1,xmm14
00000000`0040613a 660fe0e3        pavgb   xmm4,xmm3
00000000`0040613e 660fe0e1        pavgb   xmm4,xmm1
00000000`00406142 660fe0e0        pavgb   xmm4,xmm0
00000000`00406146 66410f7f23      movdqa  xmmword ptr [r11],xmm4
00000000`0040614b 4983c010        add     r8,10h
00000000`0040614f 4983c310        add     r11,10h
00000000`00406153 49ffc9          dec     r9
00000000`00406156 75a8            jne     main+0x6100 (00000000`00406100)


00000000`00406180 66410f6f042a    movdqa  xmm0,xmmword ptr [r10+rbp]
00000000`00406186 66410fe0c6      pavgb   xmm0,xmm14
00000000`0040618b 66410f6f0c2f    movdqa  xmm1,xmmword ptr [r15+rbp]
00000000`00406191 66410fe00c2b    pavgb   xmm1,xmmword ptr [r11+rbp]
00000000`00406197 660fe0c8        pavgb   xmm1,xmm0
00000000`0040619b 660f6f042a      movdqa  xmm0,xmmword ptr [rdx+rbp]
00000000`004061a0 66410fe00429    pavgb   xmm0,xmmword ptr [r9+rbp]
00000000`004061a6 66410f6f542d00  movdqa  xmm2,xmmword ptr [r13+rbp]
00000000`004061ad 66410fe01428    pavgb   xmm2,xmmword ptr [r8+rbp]
00000000`004061b3 660fe0d0        pavgb   xmm2,xmm0
00000000`004061b7 660fe0d1        pavgb   xmm2,xmm1
00000000`004061bb 66410f7f142c    movdqa  xmmword ptr [r12+rbp],xmm2
00000000`004061c1 4883c510        add     rbp,10h
00000000`004061c5 48ffc9          dec     rcx
00000000`004061c8 75b6            jne     main+0x6180 (00000000`00406180)

New Annotation

Summary:
Author:
Mode:
Body: