Question
Add an algorithm to this kernel code that finds the maximum value in the vector it will reduce. __global__ void reduction(float *out, float *in, unsigned
Add an algorithm to this kernel code that finds the maximum value in the vector it will reduce.
__global__ void reduction(float *out, float *in, unsigned size) {
// INSERT KERNEL CODE HERE
#ifdef SIMPLE __shared__ float in_s[2 * BLOCK_SIZE]; int idx = 2 * blockIdx.x * blockDim.x + threadIdx.x;
in_s[threadIdx.x] = ((idx < size) ? in[idx] : 0.0f); in_s[threadIdx.x + BLOCK_SIZE] = ((idx + BLOCK_SIZE < size) ? in[idx + BLOCK_SIZE] : 0.0f);
for (int stride = 1; stride < BLOCK_SIZE << 1; stride <<= 1) { __syncthreads(); if (threadIdx.x % stride == 0) in_s[2 * threadIdx.x] += in_s[2 * threadIdx.x + stride]; }
#else __shared__ float in_s[BLOCK_SIZE]; int idx = 2 * blockIdx.x * blockDim.x + threadIdx.x;
in_s[threadIdx.x] = ((idx < size) ? in[idx] : 0.0f) + ((idx + BLOCK_SIZE < size) ? in[idx + BLOCK_SIZE] : 0.0f);
for (int stride = BLOCK_SIZE >> 1; stride > 0; stride >>= 1) { __syncthreads(); if (threadIdx.x < stride) in_s[threadIdx.x] += in_s[threadIdx.x + stride]; } #endif
if (threadIdx.x == 0) out[blockIdx.x] = in_s[0]; }
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started