diff options
-rw-r--r-- | Changelog.txt | 1 | ||||
-rw-r--r-- | driver/others/profile.c | 9 | ||||
-rwxr-xr-x[-rw-r--r--] | interface/create | 0 |
3 files changed, 6 insertions, 4 deletions
diff --git a/Changelog.txt b/Changelog.txt index c4e6a8fe2..cc90ee198 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -18,6 +18,7 @@ common: * Added openblas_set_num_threads for Fortran. * Fixed #25 a wrong result of rotmg. * Fixed a bug about detecting underscore prefix in c_check. + * Print the wall time (cycles) with enabling FUNCTION_PROFILE x86/x86_64: * Fixed #28 a wrong result of dsdot on x86_64. diff --git a/driver/others/profile.c b/driver/others/profile.c index f65550c9f..f464c0b6a 100644 --- a/driver/others/profile.c +++ b/driver/others/profile.c @@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) { if (cycles > 0) { fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n"); - fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n"); + fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n"); for (i = 0; i < MAX_PROF_TABLE; i ++) { if (function_profile_table[i].calls) { #ifndef OS_WINDOWS - fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n", + fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n", #else - fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n", + fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n", #endif func_table[i], function_profile_table[i].calls, (double)function_profile_table[i].cycles / (double)cycles * 100., (double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100., - (double)function_profile_table[i].area / (double)function_profile_table[i].cycles + (double)function_profile_table[i].area / (double)function_profile_table[i].cycles, + function_profile_table[i].cycles ); } } diff --git a/interface/create b/interface/create index b7be8ab6e..b7be8ab6e 100644..100755 --- a/interface/create +++ b/interface/create |