Plaster
New
List
Login
c
default
shinmera
2023.06.30 16:41:26
#version 430 #define SORT_SIZE 512 layout(std430, row_major) buffer ParticleDistances{ float particle_distances[]; } ; layout(std430, row_major) buffer AliveParticles1{ uint alive_particles_1[]; } ; #define HALF_SIZE (SORT_SIZE/2) #define ITERATIONS (1024 < HALF_SIZE ? HALF_SIZE/1024 : 1) #define NUM_THREADS (HALF_SIZE/ITERATIONS) layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in ; uniform int elements; shared vec2 local_storage[SORT_SIZE]; void main(); void _GLSLTK_main_1(){ uint global_base_index = ((gl_WorkGroupID.x * SORT_SIZE) + gl_LocalInvocationID.x); uint local_base_index = gl_LocalInvocationIndex; uint elements_in_thread_group = min(SORT_SIZE, (elements - (gl_WorkGroupID.x * SORT_SIZE))); for(uint i = 0; (i < (2 * ITERATIONS)); ++i){ if(((gl_LocalInvocationIndex + (i * NUM_THREADS)) < elements_in_thread_group)){ uint load_index = (global_base_index + (i * NUM_THREADS)); local_storage[(local_base_index + (i * NUM_THREADS))] = vec2(particle_distances[load_index], float(alive_particles_1[load_index])); }; }; groupMemoryBarrier(); barrier(); for(uint merge_size = 2; (merge_size <= SORT_SIZE); merge_size = (merge_size * 2)){ for(uint sub_size = (merge_size >> 1); (0 < sub_size); sub_size = (sub_size >> 1)){ for(uint i = 0; (i < ITERATIONS); ++i){ uint tmp_index = (gl_LocalInvocationIndex + (NUM_THREADS * i)); uint index_low = (tmp_index & (sub_size - 1)); uint index_high = (2 * (tmp_index - index_low)); uint index = (index_low + index_high); uint candidate = (sub_size == (merge_size >> 1))? ((index_high + ((2 * sub_size) - 1)) - index_low) :(index_high + (index_low + sub_size)); if((candidate < elements_in_thread_group)){ vec2 a = local_storage[index]; vec2 b = local_storage[candidate]; if((b.x < a.x)){ local_storage[index] = b; local_storage[candidate] = a; }; }; groupMemoryBarrier(); barrier(); }; }; }; for(uint i = 0; (i < (2 * ITERATIONS)); ++i){ if(((gl_LocalInvocationIndex + (i * NUM_THREADS)) < elements_in_thread_group)){ uint load_index = (local_base_index + (i * NUM_THREADS)); uint store_index = (global_base_index + (i * NUM_THREADS)); particle_distances[store_index] = local_storage[load_index].x; alive_particles_1[store_index] = uint(local_storage[load_index].y); }; }; } void main(){ _GLSLTK_main_1(); }
Raw
Annotate
Repaste
Annotations
c
default
shinmera
2023.06.30 16:42:05
#define SORT_SIZE 512 #if( SORT_SIZE>4096 ) // won't work for arrays>4096 #error due to LDS size SORT_SIZE must be 4096 or smaller #else #define ITEMS_PER_GROUP ( SORT_SIZE ) #endif #define HALF_SIZE (SORT_SIZE/2) #define ITERATIONS (HALF_SIZE > 1024 ? HALF_SIZE/1024 : 1) #define NUM_THREADS (HALF_SIZE/ITERATIONS) #define INVERSION (16*2 + 8*3) //-------------------------------------------------------------------------------------- // Structured Buffers //-------------------------------------------------------------------------------------- RWSTRUCTUREDBUFFER(counterBuffer, ParticleCounters, 4); RWSTRUCTUREDBUFFER(indexBuffer, uint, 2); RWSTRUCTUREDBUFFER(distanceBuffer, float, 6); #define NumElements counterBuffer[0].aliveCount_afterSimulation //-------------------------------------------------------------------------------------- // Bitonic Sort Compute Shader //-------------------------------------------------------------------------------------- groupshared float2 g_LDS[SORT_SIZE]; [numthreads(NUM_THREADS, 1, 1)] void main(uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex) { int GlobalBaseIndex = (Gid.x * SORT_SIZE) + GTid.x; int LocalBaseIndex = GI; uint numElementsInThreadGroup = min(SORT_SIZE, NumElements - (Gid.x * SORT_SIZE)); // Load shared data uint i; [unroll]for (i = 0; i<2 * ITERATIONS; ++i) { if (GI + i*NUM_THREADS < numElementsInThreadGroup) { uint loadIndex = GlobalBaseIndex + i*NUM_THREADS; g_LDS[LocalBaseIndex + i*NUM_THREADS] = float2(distanceBuffer[loadIndex], (float)indexBuffer[loadIndex]); } } GroupMemoryBarrierWithGroupSync(); // Bitonic sort for (unsigned int nMergeSize = 2; nMergeSize <= SORT_SIZE; nMergeSize = nMergeSize * 2) { for (uint nMergeSubSize = nMergeSize >> 1; nMergeSubSize>0; nMergeSubSize = nMergeSubSize >> 1) { [unroll]for (i = 0; i<ITERATIONS; ++i) { int tmp_index = GI + NUM_THREADS * i; int index_low = tmp_index & (nMergeSubSize - 1); int index_high = 2 * (tmp_index - index_low); int index = index_high + index_low; unsigned int nSwapElem = nMergeSubSize == nMergeSize >> 1 ? index_high + (2 * nMergeSubSize - 1) - index_low : index_high + nMergeSubSize + index_low; if (nSwapElem<numElementsInThreadGroup) { float2 a = g_LDS[index]; float2 b = g_LDS[nSwapElem]; if (a.x > b.x) { g_LDS[index] = b; g_LDS[nSwapElem] = a; } } GroupMemoryBarrierWithGroupSync(); } } } // Store shared data [unroll]for (i = 0; i<2 * ITERATIONS; ++i) { if (GI + i*NUM_THREADS < numElementsInThreadGroup) { uint loadIndex = LocalBaseIndex + i*NUM_THREADS; uint storeIndex = GlobalBaseIndex + i*NUM_THREADS; distanceBuffer[storeIndex] = g_LDS[loadIndex].x; indexBuffer[storeIndex] = (uint)g_LDS[loadIndex].y; } } }
Raw
Repaste