Professional Documents
Culture Documents
#include <bits/stdc++.h>
using namespace std;
/*
* Each cell in the matrix is assigned to a different thread.
* Each thread do O(N*number of asssigned cell) computation.
* Assigned cells of different threads does not overlape with
* each other. And so no need for synchronization.
*/
int main(void)
{
//size of matrix
int N = 1<<9;
clock_t t;
double avg=0;
cout<<"Strting CPU computation"<<endl;
for(int i=0;i<=3;i++)
{
t=clock();
CPUmatmul(N, x, y,ans);
t = clock() - t;
if(i)avg+=t; //we will ignore the first run
printf ("It took CPU-%d %f
ms.\n",i,(((double)t)/CLOCKS_PER_SEC)*1000);
}
avg/=3;
avg/=CLOCKS_PER_SEC;
avg*=1000;
printf ("It took %lf ms on avg.\n",avg);
if(check(N,ans))cout<<"RUN OK."<<endl;
else cout<<"RUN NOT OK."<<endl;
// Free memory
cudaFree(x);
cudaFree(y);
return 0;
}