RWSTRUCTUREDBUFFER(counterBuffer, ParticleCounters, 4); RWSTRUCTUREDBUFFER(indexBuffer, uint, 2); RWSTRUCTUREDBUFFER(distanceBuffer, float, 6); #define NumElements counterBuffer[0].aliveCount_afterSimulation [numthreads(256, 1, 1)] void main(uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID) { uint4 tgp; tgp.x = Gid.x * 256; tgp.y = 0; tgp.z = NumElements; tgp.w = min(512, max(0, NumElements - Gid.x * 512)); uint localID = tgp.x + GTid.x; // calculate threadID within this sortable-array uint index_low = localID & (job_params.x - 1); uint index_high = 2 * (localID - index_low); uint index = tgp.y + index_high + index_low; uint nSwapElem = tgp.y + index_high + job_params.y + job_params.z*index_low; if (nSwapElem b) { distanceBuffer[index] = b; distanceBuffer[nSwapElem] = a; uint aI = indexBuffer[index]; uint bI = indexBuffer[nSwapElem]; indexBuffer[index] = bI; indexBuffer[nSwapElem] = aI; } } }