Answered step by step
Verified Expert Solution
Link Copied!

Question

1 Approved Answer

Edit kernel.cu o complete the functionality of histogram use a block size of 512. here are three modes of operation for the application. Check main()

Edit kernel.cu o complete the functionality of histogram, use a block size of 512.

here are three modes of operation for the application. Check main() for a description of the modes (repeated below). You will support each of these modes using Histogram with a privitization pattern.

kernel.cu

__global__ void histo_kernel(unsigned int* input, unsigned int* bins, unsigned int num_elements, unsigned int num_bins)
{

//insert code here

}

void histogram(unsigned int* input, unsigned int* bins, unsigned int num_elements, unsigned int num_bins) {

//insert code here

}

main.cu

#include
#include

#include "support.h"
#include "kernel.cu"

int main(int argc, char* argv[])
{
Timer timer;

// Initialize host variables ----------------------------------------------

printf("Setting up the problem..."); fflush(stdout);
startTime(&timer);

unsigned int *in_h;
unsigned int* bins_h;
unsigned int *in_d;
unsigned int* bins_d;
unsigned int num_elements, num_bins;
cudaError_t cuda_ret;

if(argc == 1) {
num_elements = 1000000;
num_bins = 4096;
} else if(argc == 2) {
num_elements = atoi(argv[1]);
num_bins = 4096;
} else if(argc == 3) {
num_elements = atoi(argv[1]);
num_bins = atoi(argv[2]);
} else {
printf(" Invalid input parameters!"
" Usage: ./histogram # Input: 1,000,000, Bins: 4,096"
" Usage: ./histogram # Input: m, Bins: 4,096"
" Usage: ./histogram # Input: m, Bins: n"
"");
exit(0);
}
initVector(&in_h, num_elements, num_bins);
bins_h = (unsigned int*) malloc(num_bins*sizeof(unsigned int));

stopTime(&timer); printf("%f s", elapsedTime(timer));
printf(" Input size = %u Number of bins = %u", num_elements,
num_bins);

// Allocate device variables ----------------------------------------------

printf("Allocating device variables..."); fflush(stdout);
startTime(&timer);

cuda_ret = cudaMalloc((void**)&in_d, num_elements * sizeof(unsigned int));
if(cuda_ret != cudaSuccess) printf("Unable to allocate device memory");
cuda_ret = cudaMalloc((void**)&bins_d, num_bins * sizeof(unsigned int));
if(cuda_ret != cudaSuccess) printf("Unable to allocate device memory");

cudaDeviceSynchronize();
stopTime(&timer); printf("%f s", elapsedTime(timer));

// Copy host variables to device ------------------------------------------

printf("Copying data from host to device..."); fflush(stdout);
startTime(&timer);

cuda_ret = cudaMemcpy(in_d, in_h, num_elements * sizeof(unsigned int),
cudaMemcpyHostToDevice);
if(cuda_ret != cudaSuccess) printf("Unable to copy memory to the device");

cuda_ret = cudaMemset(bins_d, 0, num_bins * sizeof(unsigned int));
if(cuda_ret != cudaSuccess) printf("Unable to set device memory");

cudaDeviceSynchronize();
stopTime(&timer); printf("%f s", elapsedTime(timer));

// Launch kernel ----------------------------------------------------------
printf("Launching kernel..."); fflush(stdout);
startTime(&timer);

histogram(in_d, bins_d, num_elements, num_bins);
cuda_ret = cudaDeviceSynchronize();
if(cuda_ret != cudaSuccess) printf("Unable to launch/execute kernel");

stopTime(&timer); printf("%f s", elapsedTime(timer));

// Copy device variables from host ----------------------------------------

printf("Copying data from device to host..."); fflush(stdout);
startTime(&timer);

cuda_ret = cudaMemcpy(bins_h, bins_d, num_bins * sizeof(unsigned int),
cudaMemcpyDeviceToHost);
if(cuda_ret != cudaSuccess) printf("Unable to copy memory to host");

cudaDeviceSynchronize();
stopTime(&timer); printf("%f s", elapsedTime(timer));

// Verify correctness -----------------------------------------------------

printf("Verifying results..."); fflush(stdout);

verify(in_h, bins_h, num_elements, num_bins);

// Free memory ------------------------------------------------------------

cudaFree(in_d); cudaFree(bins_d);
free(in_h); free(bins_h);

return 0;
}

Step by Step Solution

3.45 Rating (164 Votes )

There are 3 Steps involved in it

Step: 1

Answer Calculate the minimum number of jars of silver polish that would have to b... blur-text-image

Get Instant Access to Expert-Tailored Solutions

See step-by-step solutions with expert insights and AI powered tools for academic success

Step: 2

blur-text-image

Step: 3

blur-text-image

Ace Your Homework with AI

Get the answers you need in no time with our AI-driven, step-by-step assistance

Get Started

Recommended Textbook for

Derivatives Markets

Authors: Robert McDonald

3rd Edition

978-9332536746, 9789332536746

More Books

Students also viewed these Accounting questions