Professional Documents
Culture Documents
#include<stdio.h>
#include<conio.h>
#include<cuda.h>
void main()
{
float *a_h, *c_h;
float *a_d, *c_d;
int row, col;
printf("Enter the size of the matrix>>");
scanf("%d %d",&row,&col);
// Memory allocation on host
a_h = (float*)malloc(sizeof(float)*row*col);
c_h = (float*)malloc(sizeof(float)*row*col);
// Memory allocation on device
cudaMalloc((void**)&a_d,sizeof(float)*row*col);
cudaMalloc((void**)&c_d,sizeof(float)*row*col);
//Input from user
printf("Enter the matix\n");
for(int i =0 ; i < row*col ; i++)
{
scanf("%f",&a_h[i]);
}
// Display the matrix
for(int i= 0; i<row; i++)
{
for(int j =0 ; j< col ; j++)
{
printf("%f\t",a_h[i*col + j]);
}
printf("\n");
}
//copying the data to device
cudaMemcpy(a_d,a_h,sizeof(float)*row*col,cudaMemcpyHostToDevice);
//defining block size
dim3 dimBlock(col,row,1);
//kernel call
Matrix_Transpose<<< 1 , dimBlock >>>(a_d,c_d,col);