Paste: loop
Author: | pruned |
Mode: | javascript |
Date: | Sat, 29 Oct 2011 19:58:22 |
Plain Text |
fft_tw_2xN_alt(_y, n, m, _table_w)
{
var y = _y;
var table_w = begin(_table_w);
for (k in range(n/2*1i64))
{
for (j in range(m*1i64))
{
var t = y[k+j*n];
var s = y[k+n/2+j*n];
var w_k = _table_w[k];
var sw = w_k * s;
y.st[k+j*n] <= t+sw;
y.st[k+n/2+j*n] <= t-sw;
}
}
}
Author: | pruned |
Mode: | javascript |
Date: | Sat, 29 Oct 2011 20:08:23 |
Plain Text |
fft_tw_2xN_alt(_y, n, m, _table_w)
{
var y = _y;
var table_w = begin(_table_w);
for (k in range(n/2*1i64))
{
for (j in range(m*1i64))
{
var t = y[k+j*n];
var s = table_w[k] * y[k+n/2+j*n];
y.st[k+j*n] <= t + s;
y.st[k+n/2+j*n] <= t - s;
}
}
}
Author: | pruned |
Mode: | assembly-x86 |
Date: | Sat, 29 Oct 2011 20:26:19 |
Plain Text |
00000000`00402762 f3410f10042b movss xmm0,dword ptr [r11+rbp]
00000000`00402768 f3410f104ffc movss xmm1,dword ptr [r15-4]
00000000`0040276e f3410f1017 movss xmm2,dword ptr [r15]
00000000`00402773 0f28d8 movaps xmm3,xmm0
00000000`00402776 f30f59da mulss xmm3,xmm2
00000000`0040277a f30f10242b movss xmm4,dword ptr [rbx+rbp]
00000000`0040277f 0f28ec movaps xmm5,xmm4
00000000`00402782 f30f59e9 mulss xmm5,xmm1
00000000`00402786 f30f5ceb subss xmm5,xmm3
00000000`0040278a f30f101c2a movss xmm3,dword ptr [rdx+rbp]
00000000`0040278f 0f28f3 movaps xmm6,xmm3
00000000`00402792 f30f58f5 addss xmm6,xmm5 // 6 = 3+5
00000000`00402796 f3410f103c29 movss xmm7,dword ptr [r9+rbp]
00000000`0040279c f30f11342a movss dword ptr [rdx+rbp],xmm6
00000000`004027a1 f30f59c1 mulss xmm0,xmm1
00000000`004027a5 f30f59e2 mulss xmm4,xmm2
00000000`004027a9 f30f58e0 addss xmm4,xmm0
00000000`004027ad 0f28c7 movaps xmm0,xmm7
00000000`004027b0 f30f58c4 addss xmm0,xmm4
00000000`004027b4 f3410f110429 movss dword ptr [r9+rbp],xmm0
00000000`004027ba f30f5cdd subss xmm3,xmm5
00000000`004027be f30f111c2b movss dword ptr [rbx+rbp],xmm3
00000000`004027c3 f30f5cfc subss xmm7,xmm4
00000000`004027c7 f3410f113c2b movss dword ptr [r11+rbp],xmm7
00000000`004027cd 4c01c5 add rbp,r8
00000000`004027d0 49ffcd dec r13
00000000`004027d3 758b jne main+0x2760 (00000000`00402760)
now 8% faster:
00000000`00402762 f3410f10042b movss xmm0,dword ptr [r11+rbp]
00000000`00402768 f3410f104ffc movss xmm1,dword ptr [r15-4]
00000000`0040276e f3410f1017 movss xmm2,dword ptr [r15]
00000000`00402773 0f28da movaps xmm3,xmm2
00000000`00402776 f30f59d8 mulss xmm3,xmm0 // 3 = 0*2
00000000`0040277a f30f10242b movss xmm4,dword ptr [rbx+rbp]
00000000`0040277f 0f28e9 movaps xmm5,xmm1
00000000`00402782 f30f59ec mulss xmm5,xmm4 // 5 = 1*4
00000000`00402786 f30f5ceb subss xmm5,xmm3 // 5 = 1*4-3
00000000`0040278a f30f101c2a movss xmm3,dword ptr [rdx+rbp]
00000000`0040278f 0f28f3 movaps xmm6,xmm3
00000000`00402792 f30f58f5 addss xmm6,xmm5 // 6=3+5
00000000`00402796 f3410f103c29 movss xmm7,dword ptr [r9+rbp]
00000000`0040279c f30f11342a movss dword ptr [rdx+rbp],xmm6
00000000`004027a1 f30f59d4 mulss xmm2,xmm4
00000000`004027a5 f30f59c8 mulss xmm1,xmm0
00000000`004027a9 f30f58ca addss xmm1,xmm2
00000000`004027ad 0f28c7 movaps xmm0,xmm7
00000000`004027b0 f30f58c1 addss xmm0,xmm1
00000000`004027b4 f3410f110429 movss dword ptr [r9+rbp],xmm0
00000000`004027ba f30f5cdd subss xmm3,xmm5
00000000`004027be f30f111c2b movss dword ptr [rbx+rbp],xmm3
00000000`004027c3 f30f5cf9 subss xmm7,xmm1
00000000`004027c7 f3410f113c2b movss dword ptr [r11+rbp],xmm7
00000000`004027cd 4c01c5 add rbp,r8
00000000`004027d0 49ffcd dec r13
00000000`004027d3 758b jne main+0x2760 (00000000`00402760)
New Annotation