1.Examine the source code of matrix_mult.c. Which parallel computing library does this program use? 2. Compile the matrix_mult.c source code using the gcc compiler. Make
1.Examine the source code of matrix_mult.c. Which parallel computing library does this program use? 2. Compile the matrix_mult.c source code using the gcc compiler. Make sure to include the option for the correct parallel library when compiling your code. What command did you use to compile the source code? 3. Create a Slurm script to run a batch job for the binary produced from the compilation step in question 2. You should request exactly 4 cores and 2 GB of RAM to run your compiled binary. Set the partition to debug. Print out your complete Slurm script.
file of
matrix_mult.c
#include
#include
#include
#include
#define nrow 2500 /* number of rows in matrix */
#define ncol 2500 /* number of columns in matrix */
int main (int argc, char *argv[]) {
int tid;
int nthreads;
int i;
int j;
int k;
int chunk;
double a[nrow][ncol];
double b[nrow][ncol];
double c[nrow][ncol];
double sum;
chunk = 10; /* set loop iteration chunk size */
double start_time;
double end_time;
char buffer[256];
start_time = omp_get_wtime();
gethostname(buffer, sizeof(buffer));
printf("Running benchmark on: %s ", buffer);
/* parallel region */
#pragma omp parallel shared(a, b, c, nthreads, chunk) private(tid, i, j, k)
{
tid = omp_get_thread_num();
if (tid == 0) {
nthreads = omp_get_num_threads();
printf("Starting with %d threads ", nthreads);
printf("Initializing matrices ");
}
/* Initialize matrices */
#pragma omp for schedule (static, chunk)
for (i = 0; i < nrow; i++) {
for (j = 0; j < ncol; j++) {
a[i][j]= i + j;
}
}
#pragma omp for schedule (static, chunk)
for (i = 0; i < ncol; i++) {
for (j = 0; j < ncol; j++) {
b[i][j]= i * j;
}
}
#pragma omp for schedule (static, chunk)
for (i = 0; i < nrow; i++) {
for (j = 0; j < ncol; j++) {
c[i][j]= 0;
}
}
/* Matrix multiply sharing iterations on outer loop */
printf("Thread %d starting matrix multiplication ", tid);
#pragma omp for schedule (static, chunk)
for (i = 0; i < nrow; i++) {
/* printf("thread %d has completed row %d ", tid, i); */
for(j = 0; j < ncol; j++) {
for (k = 0; k < ncol; k++) {
c[i][j] += a[i][k] * b[k][j];
}
}
}
} /* End of parallel region */
printf("Matrix multiplication is done. End of parallel task. ");
/* print sum of columns */
for (i = 0; i < ncol; i++) {
sum = 0.0;
for (j = 0; j < nrow; j++) {
sum += c[i][j];
}
/* printf("Sum of column %d in product matrix is %lf ", i, sum); */
}
printf("Matrix column sum is done. End of serial task. ");
end_time = omp_get_wtime();
printf("Wall time was %lf seconds. ", end_time - start_time);
return(0);
}
Step by Step Solution
There are 3 Steps involved in it
Step: 1
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started