Plaster
New
List
Login
c
default
shinmera
2023.06.30 18:44:04
#version 430 #define SORT_SIZE 512 layout(std430, row_major) buffer ParticleDistances{ float particle_distances[]; } ; layout(std430, row_major) buffer AliveParticles1{ uint alive_particles_1[]; } ; #define NUM_THREADS (SORT_SIZE/2) layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in ; uniform int elements; shared vec2 local_storage[SORT_SIZE]; void main(); void _GLSLTK_main_1(){ uvec4 tgp = uvec4((gl_WorkGroupID.x * 256), 0, elements, clamp((elements - (gl_WorkGroupID.x * 512)), uint(0), uint(512))); uint global_base_index = (tgp.y + ((tgp.x * 2) + gl_LocalInvocationID.x)); uint local_base_index = gl_LocalInvocationIndex; for(uint i = 0; (i < 2); ++i){ if(((gl_LocalInvocationIndex + (i * NUM_THREADS)) < tgp.w)){ uint load_index = (global_base_index + (i * NUM_THREADS)); local_storage[(local_base_index + (i * NUM_THREADS))] = vec2(particle_distances[load_index], float(alive_particles_1[load_index])); }; }; groupMemoryBarrier(); barrier(); for(uint sub_size = (SORT_SIZE >> 1); (0 < sub_size); sub_size = (sub_size >> 1)){ uint tmp_index = gl_LocalInvocationIndex; uint index_low = (tmp_index & (sub_size - 1)); uint index_high = (2 * (tmp_index - index_low)); uint index = (index_low + index_high); uint candidate = (index_high + (index_low + sub_size)); if((candidate < tgp.w)){ vec2 a = local_storage[index]; vec2 b = local_storage[candidate]; if((b.x < a.x)){ local_storage[index] = b; local_storage[candidate] = a; }; }; groupMemoryBarrier(); barrier(); }; for(uint i = 0; (i < 2); ++i){ if(((gl_LocalInvocationIndex + (i * NUM_THREADS)) < tgp.w)){ uint load_index = (local_base_index + (i * NUM_THREADS)); uint store_index = (global_base_index + (i * NUM_THREADS)); particle_distances[store_index] = local_storage[load_index].x; alive_particles_1[store_index] = uint(local_storage[load_index].y); }; }; } void main(){ _GLSLTK_main_1(); }
Raw
Annotate
Repaste
Annotations
c
default
shinmera
2023.06.30 18:44:22
#if( SORT_SIZE>2048 ) #error #endif #define NUM_THREADS (SORT_SIZE/2) #define INVERSION (16*2 + 8*3) //-------------------------------------------------------------------------------------- // Structured Buffers //-------------------------------------------------------------------------------------- RWSTRUCTUREDBUFFER(counterBuffer, ParticleCounters, 4); RWSTRUCTUREDBUFFER(indexBuffer, uint, 2); RWSTRUCTUREDBUFFER(distanceBuffer, float, 6); #define NumElements counterBuffer[0].aliveCount_afterSimulation //-------------------------------------------------------------------------------------- // Bitonic Sort Compute Shader //-------------------------------------------------------------------------------------- groupshared float2 g_LDS[SORT_SIZE]; [numthreads(NUM_THREADS, 1, 1)] void main(uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex) { uint4 tgp; tgp.x = Gid.x * 256; tgp.y = 0; tgp.z = NumElements; tgp.w = min(512, max(0, NumElements - Gid.x * 512)); int GlobalBaseIndex = tgp.y + tgp.x * 2 + GTid.x; int LocalBaseIndex = GI; uint i; // Load shared data [unroll]for (i = 0; i<2; ++i) { if (GI + i*NUM_THREADS < tgp.w) { uint loadIndex = GlobalBaseIndex + i*NUM_THREADS; g_LDS[LocalBaseIndex + i*NUM_THREADS] = float2(distanceBuffer[loadIndex], (float)indexBuffer[loadIndex]); } } GroupMemoryBarrierWithGroupSync(); // sort threadgroup shared memory for (int nMergeSubSize = SORT_SIZE >> 1; nMergeSubSize>0; nMergeSubSize = nMergeSubSize >> 1) { int tmp_index = GI; int index_low = tmp_index & (nMergeSubSize - 1); int index_high = 2 * (tmp_index - index_low); int index = index_high + index_low; unsigned int nSwapElem = index_high + nMergeSubSize + index_low; if (nSwapElem<tgp.w) { float2 a = g_LDS[index]; float2 b = g_LDS[nSwapElem]; if (a.x > b.x) { g_LDS[index] = b; g_LDS[nSwapElem] = a; } } GroupMemoryBarrierWithGroupSync(); } // Store shared data [unroll]for (i = 0; i<2; ++i) { if (GI + i*NUM_THREADS < tgp.w) { uint loadIndex = LocalBaseIndex + i*NUM_THREADS; uint storeIndex = GlobalBaseIndex + i*NUM_THREADS; distanceBuffer[storeIndex] = g_LDS[loadIndex].x; indexBuffer[storeIndex] = (uint)g_LDS[loadIndex].y; } } }
Raw
Repaste