Optimizing for SIMD Floating-point Applications
5
5-11
y1 x1
movhps xmm7, [ecx+16]
// xmm7 = y2 x2 y1 x1
movlps xmm0, [ecx+32]
// xmm0 = -- -- y3 x3
movhps xmm0, [ecx+48]
// xmm0 = y4 x4 y3 x3
movaps xmm6, xmm7
// xmm6 = y1 x1 y1 x1
shufps xmm7, xmm0, 0x88
// xmm7 = x1 x2 x3 x4 => X
shufps xmm6, xmm0, 0xDD
// xmm6 = y1 y2 y3 y4 => Y
movlps xmm2, [ecx+8]
// xmm2 = -- -- w1 z1
movhps xmm2, [ecx+24]
// xmm2 = w2 z2 u1 z1
movlps xmm1, [ecx+40]
// xmm1 = -- -- s3 z3
movhps xmm1, [ecx+56]
// xmm1 = w4 z4 w3 z3
movaps xmm0, xmm2
// xmm0 = w1 z1 w1 z1
shufps xmm2, xmm1, 0x88
// xmm2 = z1 z2 z3 z4 => Z
movlps xmm7, [ecx]
// xmm7 = -- --shufps xmm0, xmm1,
// 0xDD xmm6 = w1 w2 w3 w4 => W
movaps [edx], xmm7
// store X
movaps [edx+16], xmm6
// store Y
movaps [edx+32], xmm2
// store Z
movaps [edx+48], xmm0
// store W
// SWIZZLE XYZ -> XXX
}
}
Example 5-3
Swizzling Data
(continued)
Summary of Contents for ARCHITECTURE IA-32
Page 1: ...IA 32 Intel Architecture Optimization Reference Manual Order Number 248966 013US April 2006...
Page 220: ...IA 32 Intel Architecture Optimization 3 40...
Page 434: ...IA 32 Intel Architecture Optimization 9 20...
Page 514: ...IA 32 Intel Architecture Optimization B 60...
Page 536: ...IA 32 Intel Architecture Optimization C 22...