// // Code based on helloflops3 from lotsofcores.com // // // A simple example that gets lots of Flops (Floating Point Operations) on // Intel(r) Xeon Phi(tm) co-processors using openmp to scale // /* TODO: Change array of structures to structure of arrays */ #include #include #include #include #include // dtime // // returns the current wall clock time // double dtime() { double tseconds = 0.0; struct timeval mytime; gettimeofday(&mytime,(struct timezone*)0); tseconds = (double)(mytime.tv_sec + mytime.tv_usec*1.0e-6); return( tseconds ); } #define FLOPS_ARRAY_SIZE (1024*1024) #define MAXFLOPS_ITERS 100000000 #define LOOP_COUNT 128 // number of REAL pt ops per calculation #define FLOPSPERCALC 2 #define BYTESPERFLOAT 4 #define BYTESPERDOUBLE 8 // TODO:change these to try double precision AFTER vectorization #define REAL float #define BYTESPERREAL (4) #define BYTESPERCALC (3*BYTESPERREAL) //3 numbers * 4bytes per REAL // define some arrays - // make sure they are 64 byte aligned // for best cache access /* TODO: comment out array of structures */ typedef struct{ REAL fa; REAL fb; } pieceofdata; pieceofdata array[FLOPS_ARRAY_SIZE]; // TODO uncomment 64-byte aligned arrays// /* REAL fa[FLOPS_ARRAY_SIZE] __attribute__((aligned(64))); REAL fb[FLOPS_ARRAY_SIZE] __attribute__((aligned(64))); */ // // Main program - pedal to the metal...calculate using tons o'flops! // int main(int argc, char *argv[] ) { int i,j,k; int numthreads; double tstart, tstop, ttime; double gflops = 0.0; REAL a=1.1; printf("Initializing\r\n"); float arithmetic_intensity = (float)FLOPSPERCALC/BYTESPERCALC; for(i=0; i 0.0) { printf("GFlops = %10.3lf, Secs = %10.3lf, GFlops per sec = %10.3lf\r\n", gflops, ttime, gflops/ttime); } printf("Bandwidth = %g GB/sec\n",gflops/arithmetic_intensity); printf("Arithmetic intensity=%g B/flop\n",arithmetic_intensity); return( 0 ); }