summaryrefslogtreecommitdiff
path: root/caffe2/quantization
diff options
context:
space:
mode:
authorSummer Deng <summerdeng@fb.com>2019-03-08 19:00:43 -0800
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-03-08 19:03:54 -0800
commitc10c73f0473baf80305bb800dbee113ff1067a95 (patch)
treeb6b18cbbf1d0c4123ac3faeb9e64e869e9e0d970 /caffe2/quantization
parent0fd1dc45c037ae806352e08a7b83eeacfad101a0 (diff)
downloadpytorch-c10c73f0473baf80305bb800dbee113ff1067a95.tar.gz
pytorch-c10c73f0473baf80305bb800dbee113ff1067a95.tar.bz2
pytorch-c10c73f0473baf80305bb800dbee113ff1067a95.zip
Int8 FC performance debugging (#17700)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/17700 Add performance debugging utilities in DNNLOWP FC operator and the python script Reviewed By: amylittleyang Differential Revision: D14321299 fbshipit-source-id: 50dbd7b352a1da5d2ecb659d8003e71e70750063
Diffstat (limited to 'caffe2/quantization')
-rw-r--r--caffe2/quantization/server/fully_connected_dnnlowp_op.cc149
1 files changed, 102 insertions, 47 deletions
diff --git a/caffe2/quantization/server/fully_connected_dnnlowp_op.cc b/caffe2/quantization/server/fully_connected_dnnlowp_op.cc
index c19f43e975..5eee0a9604 100644
--- a/caffe2/quantization/server/fully_connected_dnnlowp_op.cc
+++ b/caffe2/quantization/server/fully_connected_dnnlowp_op.cc
@@ -1,6 +1,8 @@
#include "fully_connected_dnnlowp_op.h"
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
#include <chrono>
+#endif
#include "caffe2/core/flags.h"
#include "caffe2/core/tensor_int8.h"
@@ -91,25 +93,30 @@ bool FullyConnectedDNNLowPOp<T>::RunOnDevice() {
return true;
}
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
chrono::time_point<chrono::system_clock> t_very_begin, t_begin, t_end;
-
- if (VLOG_IS_ON(3)) {
+ /* if (VLOG_IS_ON(3)) */
+ {
t_begin = chrono::system_clock::now();
t_very_begin = t_begin;
}
+#endif
// Get quantization parameters
if (!GetQuantizationParameters_()) {
return false;
}
- if (VLOG_IS_ON(3)) {
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(3)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " get_quant_params: " << dt * 1e3
- << " ms";
+ LOG(INFO) << "@PERF this=" << this << " get_quant_params: " << dt * 1e3
+ << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
const auto& X = InputTensorCPU_(0);
const auto& W = InputTensorCPU_(1);
@@ -130,13 +137,17 @@ bool FullyConnectedDNNLowPOp<T>::RunOnDevice() {
const T* Xdata = nullptr;
vector<T> X_temp;
- if (VLOG_IS_ON(3)) {
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " initialize parameters: " << dt * 1e3
- << " ms";
+ LOG(INFO) << "@PERF this=" << this << " initialize parameters: " << dt * 1e3
+ << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
+
if (Wq_packed_) {
// fast path to use fbgemm
using namespace fbgemm;
@@ -144,22 +155,31 @@ bool FullyConnectedDNNLowPOp<T>::RunOnDevice() {
if (X.template IsType<T>() || !dequantize_output_) {
// Only when input and output are float, we don't need input to be
// quantized.
- if (VLOG_IS_ON(3)) {
- t_begin = chrono::system_clock::now();
- }
+
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ { t_begin = chrono::system_clock::now(); }
+#endif
+
Xdata = QuantizeInputIfNeeded<T>(this, 0, in_qparams_[0], X_temp);
- if (VLOG_IS_ON(3)) {
+
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " input quantization: " << dt * 1e3
- << " ms";
+ LOG(INFO) << "@PERF this=" << this
+ << " input quantization: " << dt * 1e3 << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
}
- if (VLOG_IS_ON(3)) {
- t_begin = chrono::system_clock::now();
- }
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ { t_begin = chrono::system_clock::now(); }
+#endif
+
if (!dequantize_output_) {
Y_int32_.resize(Y->size());
DoNothing<> doNothingObj{};
@@ -396,17 +416,24 @@ bool FullyConnectedDNNLowPOp<T>::RunOnDevice() {
} // dequantize_output
} else {
// Quantize X
- if (VLOG_IS_ON(3)) {
- t_begin = chrono::system_clock::now();
- }
+
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ { t_begin = chrono::system_clock::now(); }
+#endif
+
Xdata = QuantizeInputIfNeeded<T>(this, 0, in_qparams_[0], X_temp);
- if (VLOG_IS_ON(3)) {
+
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " input quantization: " << dt * 1e3
- << " ms";
+ LOG(INFO) << "@PERF this=" << this << " input quantization: " << dt * 1e3
+ << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
Y_int32_.resize(Y->size());
for (int i = 0; i < M; ++i) {
@@ -429,13 +456,16 @@ bool FullyConnectedDNNLowPOp<T>::RunOnDevice() {
StoreMatrixInMatrixMarketFormat(N, K, Wdata, this->debug_def().input(1));
}
- if (VLOG_IS_ON(3)) {
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " gemm: " << dt * 1e3 << " ms";
+ LOG(INFO) << "@PERF this=" << this << " gemm: " << dt * 1e3 << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
// Adjust with bias and zero_point and then requantize
// See batch_matmul_dnnlowp_op.cc to why we compute column_offsets,
@@ -495,20 +525,23 @@ bool FullyConnectedDNNLowPOp<T>::RunOnDevice() {
MeasureQuantizationError_();
- if (VLOG_IS_ON(3)) {
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this
- << " bias-offset-requantization: " << dt * 1e3 << " ms";
+ LOG(INFO) << "@PERF this=" << this
+ << " bias-offset-requantization: " << dt * 1e3 << " ms";
t_end = chrono::system_clock::now();
double ops = 2. * M * N * K;
dt = chrono::duration<double>(t_end - t_very_begin).count();
double gops = ops / dt / 1e9;
- VLOG(3) << "@PERF this=" << this
- << " output=" << this->debug_def().output(0) << " " << M << "x" << N
- << "x" << K << ": " << dt * 1e3 << " ms " << gops << " gops";
+ LOG(INFO) << "@PERF this=" << this
+ << " output=" << this->debug_def().output(0) << " " << M << "x"
+ << N << "x" << K << ": " << dt * 1e3 << " ms " << gops << " gops";
}
+#endif
return true;
}
@@ -517,20 +550,25 @@ template <typename T>
bool FullyConnectedDNNLowPOp<T>::GetQuantizationParameters_() {
using namespace dnnlowp;
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
chrono::time_point<chrono::system_clock> t_begin, t_end;
- if (VLOG_IS_ON(3)) {
- t_begin = chrono::system_clock::now();
- }
+ /* if (VLOG_IS_ON(1)) */
+ { t_begin = chrono::system_clock::now(); }
+#endif
+
// Choose quantization for X
in_qparams_[0] = GetInputTensorQuantizationParamsOf(this, 0, qfactory_.get());
- if (VLOG_IS_ON(3)) {
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " GetInputTensorQuantizationParamsOf "
- << dt * 1e3 << " ms";
+ LOG(INFO) << "@PERF this=" << this << " GetInputTensorQuantizationParamsOf "
+ << dt * 1e3 << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
// Quantize W
const auto& X = InputTensorCPU_(0);
@@ -618,12 +656,16 @@ bool FullyConnectedDNNLowPOp<T>::GetQuantizationParameters_() {
filter_qparams_[0]);
}
- if (VLOG_IS_ON(3)) {
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " Quantize W " << dt * 1e3 << " ms";
+ LOG(INFO) << "@PERF this=" << this << " Quantize W " << dt * 1e3 << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
+
// Pre-compute column_offset
// If input tensor doesn't use dynamic quantization, we fold column_offsets_
// into bias.
@@ -641,13 +683,17 @@ bool FullyConnectedDNNLowPOp<T>::GetQuantizationParameters_() {
K, N, W_quantized_.data(), filter_qparams_, *column_offsets_);
}
}
- if (VLOG_IS_ON(3)) {
+
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " Calculate column offset " << dt * 1e3
- << " ms";
+ LOG(INFO) << "@PERF this=" << this << " Calculate column offset "
+ << dt * 1e3 << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
// Quantize bias
if (!is_weight_constant_ || (!b_quantized_data_ && !b_dequantized_data_) ||
@@ -736,12 +782,16 @@ bool FullyConnectedDNNLowPOp<T>::GetQuantizationParameters_() {
vector<T_signed>().swap(W_quantized_);
}
- if (VLOG_IS_ON(3)) {
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " Quantize bias " << dt * 1e3 << " ms";
+ LOG(INFO) << "@PERF this=" << this << " Quantize bias " << dt * 1e3
+ << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
if (!dequantize_output_ && !requantization_param_selected_) {
GetOutputQuantizationParams_();
@@ -762,13 +812,18 @@ bool FullyConnectedDNNLowPOp<T>::GetQuantizationParameters_() {
Fp32Op_()->Get()->RunOnDevice();
}
}
- if (VLOG_IS_ON(3)) {
+
+#ifdef DNNLOWP_MEASURE_TIME_BREAKDOWN
+ /* if (VLOG_IS_ON(1)) */
+ {
t_end = chrono::system_clock::now();
double dt = chrono::duration<double>(t_end - t_begin).count();
- VLOG(3) << "@PERF this=" << this << " GetOutputQuantizationParams "
- << dt * 1e3 << " ms";
+ LOG(INFO) << "@PERF this=" << this << " GetOutputQuantizationParams "
+ << dt * 1e3 << " ms";
t_begin = chrono::system_clock::now();
}
+#endif
+
return true;
}