import TimingHelper from 'https://webgpufundamentals.org/webgpu/resources/js/timing-helper.js'; async function main() { const adapter = await navigator.gpu?.requestAdapter(); const canTimestamp = adapter?.features.has('timestamp-query'); if (!canTimestamp) { fail('need a browser that supports timestamp-query'); return; } const device = await adapter?.requestDevice({ requiredFeatures: [ ...(canTimestamp ? ['timestamp-query'] : []), ], }); const timingHelper = new TimingHelper(device); if (!device) { fail('need a browser that supports WebGPU'); return; } let Nparticles = 2**16; let dt = 0.04; let mass = 1.0/Nparticles; let workgroup_size = 64; let Nworkgroups = Nparticles/workgroup_size; let workgroup_size_reduce = 8; const computeModule = device.createShaderModule({ label: 'nbody compute module', code: ` struct Particle { pos: vec3f, vel: vec3f, energy: f32, }; @group(0) @binding(0) var particles: array; @group(0) @binding(2) var stride: u32; @compute @workgroup_size(${workgroup_size}) fn kick( @builtin(local_invocation_index) local_invocation_index: u32, @builtin(num_workgroups) num_workgroups: vec3, @builtin(workgroup_id) workgroup_id : vec3, ) { let pi = workgroup_id.x * ${workgroup_size} + local_invocation_index; // global_invocation_index for (var j = 0u; j<${Nparticles}-1; j++){ let pj = select(j+1,j,j, @builtin(workgroup_id) workgroup_id : vec3, ) { let pi = workgroup_id.x * ${workgroup_size} + local_invocation_index; // global_invocation_index particles[pi].pos += 0.5*${dt} * particles[pi].vel; } @compute @workgroup_size(${workgroup_size}) fn energy( @builtin(local_invocation_index) local_invocation_index: u32, @builtin(num_workgroups) num_workgroups: vec3, @builtin(workgroup_id) workgroup_id : vec3, ) { let pi = workgroup_id.x * ${workgroup_size} + local_invocation_index; // global_invocation_index let v = length(particles[pi].vel); particles[pi].energy = ${mass} * v*v; for (var j = 0u; j<${Nparticles}-1; j++){ let pj = select(j+1,j,j, @builtin(workgroup_id) workgroup_id : vec3, ) { let g_index = workgroup_id.x * ${workgroup_size_reduce} + local_invocation_index; // global_invocation_index particles[g_index].energy += particles[g_index+stride].energy; } `, }); const computeBindGroupLayout = device.createBindGroupLayout({ entries: [ { binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'storage', minBindingSize: 0, }, }, { binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: 'uniform', minBindingSize: 0, }, }, ], }); const computePipelineLayout = device.createPipelineLayout({ bindGroupLayouts: [ computeBindGroupLayout ], }); const kickComputePipeline = device.createComputePipeline({ label: 'kick compute pipeline', layout: computePipelineLayout, compute: { entryPoint: "kick", module: computeModule, }, }); const driftComputePipeline = device.createComputePipeline({ label: 'drift compute pipeline', layout: computePipelineLayout, compute: { entryPoint: "drift", module: computeModule, }, }); const energyComputePipeline = device.createComputePipeline({ label: 'energy compute pipeline', layout: computePipelineLayout, compute: { entryPoint: "energy", module: computeModule, }, }); const sumEnergyComputePipeline = device.createComputePipeline({ label: 'sum_energy compute pipeline', layout: computePipelineLayout, compute: { entryPoint: "sum_energy", module: computeModule, }, }); let rng_state = 20; const initialConditions = new Float32Array(8*Nparticles); for (var i = 0; i=1; stride /= 2){ const strideBuffer = device.createBuffer({ label: 'stride buffer', size: 4, mappedAtCreation: true, usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, }); new Uint32Array(strideBuffer.getMappedRange()).set([stride]); strideBuffer.unmap(); const computeBindGroup = device.createBindGroup({ label: 'bindGroup for compute', layout: computeBindGroupLayout, entries: [ { binding: 0, resource: { buffer: workBuffer } }, { binding: 2, resource: { buffer: strideBuffer } } ] }); computePass.setBindGroup(0, computeBindGroup); computePass.setPipeline(sumEnergyComputePipeline); let Nworkgroups_sum = stride/workgroup_size_reduce > 1 ? stride/workgroup_size_reduce : 1; computePass.dispatchWorkgroups(Nworkgroups_sum); console.log(stride); } computePass.end(); const computeCommandBuffer = computeEncoder.finish(); device.queue.submit([computeCommandBuffer]); timingHelper.getResult().then(duration => { console.log(`duration: ${duration/1e9}s`); }); // Copy to CPU console.log("transfer started"); const resultBuffer = device.createBuffer({ label: 'result buffer', size: initialConditions.byteLength, usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST, }); const encoder = device.createCommandEncoder(); encoder.copyBufferToBuffer(workBuffer, 0, resultBuffer, 0, resultBuffer.size); const commandBuffer = encoder.finish(); device.queue.submit([commandBuffer]); await resultBuffer.mapAsync(GPUMapMode.READ); const result = new Float32Array(resultBuffer.getMappedRange().slice()); console.log('energy: ', result[7]); resultBuffer.unmap(); workBuffer.destroy() resultBuffer.destroy() } function fail(msg) { // eslint-disable-next-line no-alert alert(msg); } main();