337
387
// Following section disappears without intrinsics
339
389
inline void v_vsubTranspose4ntw(float *in, float *out, int xline, int yline) {
340
393
// Do a 4x4 transpose in the SSE registers, non-temporal writes.
341
394
// An sfence is needed after using this sub to ensure global visibilty of the writes.
342
395
__m128 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;