#if( SORT_SIZE>2048 ) #error #endif #define NUM_THREADS (SORT_SIZE/2) #define INVERSION (16*2 + 8*3) //-------------------------------------------------------------------------------------- // Structured Buffers //-------------------------------------------------------------------------------------- RWSTRUCTUREDBUFFER(counterBuffer, ParticleCounters, 4); RWSTRUCTUREDBUFFER(indexBuffer, uint, 2); RWSTRUCTUREDBUFFER(distanceBuffer, float, 6); #define NumElements counterBuffer[0].aliveCount_afterSimulation //-------------------------------------------------------------------------------------- // Bitonic Sort Compute Shader //-------------------------------------------------------------------------------------- groupshared float2 g_LDS[SORT_SIZE]; [numthreads(NUM_THREADS, 1, 1)] void main(uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex) { uint4 tgp; tgp.x = Gid.x * 256; tgp.y = 0; tgp.z = NumElements; tgp.w = min(512, max(0, NumElements - Gid.x * 512)); int GlobalBaseIndex = tgp.y + tgp.x * 2 + GTid.x; int LocalBaseIndex = GI; uint i; // Load shared data [unroll]for (i = 0; i<2; ++i) { if (GI + i*NUM_THREADS < tgp.w) { uint loadIndex = GlobalBaseIndex + i*NUM_THREADS; g_LDS[LocalBaseIndex + i*NUM_THREADS] = float2(distanceBuffer[loadIndex], (float)indexBuffer[loadIndex]); } } GroupMemoryBarrierWithGroupSync(); // sort threadgroup shared memory for (int nMergeSubSize = SORT_SIZE >> 1; nMergeSubSize>0; nMergeSubSize = nMergeSubSize >> 1) { int tmp_index = GI; int index_low = tmp_index & (nMergeSubSize - 1); int index_high = 2 * (tmp_index - index_low); int index = index_high + index_low; unsigned int nSwapElem = index_high + nMergeSubSize + index_low; if (nSwapElem b.x) { g_LDS[index] = b; g_LDS[nSwapElem] = a; } } GroupMemoryBarrierWithGroupSync(); } // Store shared data [unroll]for (i = 0; i<2; ++i) { if (GI + i*NUM_THREADS < tgp.w) { uint loadIndex = LocalBaseIndex + i*NUM_THREADS; uint storeIndex = GlobalBaseIndex + i*NUM_THREADS; distanceBuffer[storeIndex] = g_LDS[loadIndex].x; indexBuffer[storeIndex] = (uint)g_LDS[loadIndex].y; } } }