diff options
-rw-r--r-- | aten/src/ATen/native/quantized/QTensor.cpp | 12 | ||||
-rw-r--r-- | aten/src/ATen/quantized/QTensorImpl.h | 7 | ||||
-rw-r--r-- | aten/src/ATen/quantized/Quantizer.cpp | 16 | ||||
-rw-r--r-- | aten/src/ATen/quantized/Quantizer.h | 20 | ||||
-rw-r--r-- | aten/src/ATen/test/quantized_test.cpp | 2 |
5 files changed, 30 insertions, 27 deletions
diff --git a/aten/src/ATen/native/quantized/QTensor.cpp b/aten/src/ATen/native/quantized/QTensor.cpp index 84243b1d59..85c4481b74 100644 --- a/aten/src/ATen/native/quantized/QTensor.cpp +++ b/aten/src/ATen/native/quantized/QTensor.cpp @@ -7,32 +7,32 @@ namespace at { namespace native { -QTensor quantize_linear_cpu(const RealTensor& self, double scale, int64_t zero_point) { +Tensor quantize_linear_cpu(const Tensor& self, double scale, int64_t zero_point) { auto quantizer = make_per_tensor_affine_quantizer(scale, zero_point); return quantizer->quantize(self); } -RealTensor dequantize_quant(const QTensor& self) { +Tensor dequantize_quant(const Tensor& self) { return get_qtensorimpl(self)->quantizer()->dequantize(self); } -Scalar q_scale_quant(const QTensor& self) { +Scalar q_scale_quant(const Tensor& self) { auto quantizer = get_qtensorimpl(self)->quantizer(); AT_ASSERT(quantizer->qscheme() == kPerTensorAffine); return Scalar(static_cast<PerTensorAffineQuantizer*>(quantizer.get())->scale()); } -Scalar q_zero_point_quant(const QTensor& self) { +Scalar q_zero_point_quant(const Tensor& self) { auto quantizer = get_qtensorimpl(self)->quantizer(); AT_ASSERT(quantizer->qscheme() == kPerTensorAffine); return Scalar(static_cast<PerTensorAffineQuantizer*>(quantizer.get())->zero_point()); } -Quantizer* quantizer(const QTensor& self) { +Quantizer* quantizer(const Tensor& self) { return get_qtensorimpl(self)->quantizer().get(); } -Tensor int_repr_quant(const QTensor& self) { +Tensor int_repr_quant(const Tensor& self) { Tensor dst = at::empty(self.sizes(), self.options().dtype(at::kByte)); uint8_t* self_data = reinterpret_cast<uint8_t *>(self.data<qint8>()); uint8_t* dst_data = dst.data<uint8_t>(); diff --git a/aten/src/ATen/quantized/QTensorImpl.h b/aten/src/ATen/quantized/QTensorImpl.h index 6e9af90e76..3aeeb40257 100644 --- a/aten/src/ATen/quantized/QTensorImpl.h +++ b/aten/src/ATen/quantized/QTensorImpl.h @@ -6,6 +6,13 @@ namespace at { +/** + * QTensorImpl is a TensorImpl for Quantized Tensors, it stores Quantizer which + * specifies the quantization scheme and parameters, for more information please + * see ATen/quantized/Quantizer.h + * + * We'll use QTensor in code or documentation to refer to a Tensor with QTensorImpl. + */ struct CAFFE2_API QTensorImpl : public c10::TensorImpl { public: QTensorImpl( diff --git a/aten/src/ATen/quantized/Quantizer.cpp b/aten/src/ATen/quantized/Quantizer.cpp index 1ec6ce9f8d..c0784a549e 100644 --- a/aten/src/ATen/quantized/Quantizer.cpp +++ b/aten/src/ATen/quantized/Quantizer.cpp @@ -18,7 +18,7 @@ QuantizerPtr make_per_tensor_affine_quantizer( static_cast<float>(scale), static_cast<uint8_t>(zero_point)); } -QTensorImpl* get_qtensorimpl(const QTensor& self) { +QTensorImpl* get_qtensorimpl(const Tensor& self) { // TODO: remove this when Variable and Tensor are merged AT_ASSERTM( !self.is_variable(), @@ -29,7 +29,7 @@ QTensorImpl* get_qtensorimpl(const QTensor& self) { return static_cast<QTensorImpl*>(self.unsafeGetTensorImpl()); } -inline QTensor new_qtensor_cpu( +inline Tensor new_qtensor_cpu( IntArrayRef sizes, const TensorOptions& options, QuantizerPtr quantizer) { @@ -40,7 +40,7 @@ inline QTensor new_qtensor_cpu( int64_t nelements = at::prod_intlist(sizes); auto dtype = options.dtype(); AT_CHECK(isQIntType(typeMetaToScalarType(dtype)), - "ScalarType not supported for QTensor in new_qtensor_cpu."); + "ScalarType is not supported in new_qtensor_cpu."); auto storage = c10::make_intrusive<StorageImpl>( dtype, nelements, @@ -76,14 +76,14 @@ qint8 quantize_uint8(float scale, uint8_t zero_point, float value) { return static_cast<qint8>(qvalue); } -QTensor PerTensorAffineQuantizer::quantize(RealTensor tensor) { +Tensor PerTensorAffineQuantizer::quantize(Tensor tensor) { IntArrayRef sizes = tensor.sizes(); // Here we need a std::intrusive_ptr<Quantizer>.. but actually "this" is the // quantizer that can be reused, so I'm using intrusive_from_this here AT_CHECK( tensor.options().device() == kCPU, "quantize only works for CPU backend right now."); - QTensor qv = new_qtensor_cpu( + Tensor qv = new_qtensor_cpu( sizes, tensor.options().dtype(at::kQInt8), intrusive_from_this()); @@ -110,11 +110,11 @@ QTensor PerTensorAffineQuantizer::quantize(RealTensor tensor) { return qv; } -RealTensor PerTensorAffineQuantizer::dequantize(QTensor tensor) { +Tensor PerTensorAffineQuantizer::dequantize(Tensor tensor) { std::vector<int64_t> sizes = tensor.sizes().vec(); - at::TensorOptions real_options = tensor.options().dtype(at::kFloat); + at::TensorOptions options = tensor.options().dtype(at::kFloat); - RealTensor rv = at::empty(sizes, real_options); + Tensor rv = at::empty(sizes, options); float* rvd = rv.data<float>(); tensor = tensor.contiguous(); diff --git a/aten/src/ATen/quantized/Quantizer.h b/aten/src/ATen/quantized/Quantizer.h index f4badcfcf7..e735f8f33c 100644 --- a/aten/src/ATen/quantized/Quantizer.h +++ b/aten/src/ATen/quantized/Quantizer.h @@ -14,10 +14,6 @@ namespace at { struct QTensorImpl; - -using QTensor = Tensor; -using RealTensor = Tensor; - struct Quantizer; using QuantizerPtr = c10::intrusive_ptr<Quantizer>; @@ -42,7 +38,7 @@ using QuantizerPtr = c10::intrusive_ptr<Quantizer>; * they should have one to one mapping. * * Note about intrusive_ptr: - * QTensor holds an intrusive_ptr to Quantizer, and multiple Tensor can + * Quantized Tensor holds an intrusive_ptr to Quantizer, and multiple Tensor can * share the same Quantizer. Quantizer should be immutable. */ struct CAFFE2_API Quantizer : public c10::intrusive_ptr_target { @@ -66,12 +62,12 @@ struct CAFFE2_API Quantizer : public c10::intrusive_ptr_target { /** * quantize a float Tensor into a quantized Tensor. */ - virtual QTensor quantize(RealTensor t) = 0; + virtual Tensor quantize(Tensor t) = 0; /** * dequantize a quantized Tensor into a float Tensor. */ - virtual RealTensor dequantize(QTensor t) = 0; + virtual Tensor dequantize(Tensor t) = 0; }; /** @@ -173,8 +169,8 @@ struct CAFFE2_API PerTensorAffineQuantizer : public AffineQuantizer { scale_(scale), zero_point_(zero_point) {} - QTensor quantize(RealTensor tensor) override; - RealTensor dequantize(QTensor tensor) override; + Tensor quantize(Tensor tensor) override; + Tensor dequantize(Tensor tensor) override; float scale() const { return scale_; @@ -231,7 +227,7 @@ struct CAFFE2_API PerChannelAffineQuantizer : public AffineQuantizer { // setters/getters for QTensorImpl fields; otherwise, you should use // the low level setters/getters that were implemented using this. // This may be called repeatedly, so make sure it's pretty cheap. -CAFFE2_API QTensorImpl* get_qtensorimpl(const QTensor& self); +CAFFE2_API QTensorImpl* get_qtensorimpl(const Tensor& self); // Quantize a float value into a uint8 value given scale and zero_point CAFFE2_API qint8 quantize_uint8(float scale, uint8_t zero_point, float value); @@ -241,8 +237,8 @@ CAFFE2_API qint8 quantize_uint8(float scale, uint8_t zero_point, float value); CAFFE2_API QuantizerPtr make_per_tensor_affine_quantizer(double scale, int64_t zero_point); -// Create a QTensor given arguments for normal Tensor and a quantizer -CAFFE2_API QTensor new_qtensor_cpu( +// Create a Quantized Tensor given arguments for normal Tensor and a quantizer +CAFFE2_API Tensor new_qtensor_cpu( IntArrayRef sizes, const TensorOptions& options, QuantizerPtr quantizer); diff --git a/aten/src/ATen/test/quantized_test.cpp b/aten/src/ATen/test/quantized_test.cpp index 1c62698729..7163ef7250 100644 --- a/aten/src/ATen/test/quantized_test.cpp +++ b/aten/src/ATen/test/quantized_test.cpp @@ -60,7 +60,7 @@ TEST(TestQTensor, EmptyQuantized) { int zero_point = 10; int val = 100; int numel = 10; - QTensor q = at::_empty_affine_quantized({numel}, at::device(at::kCPU).dtype(kQInt8), scale, zero_point); + Tensor q = at::_empty_affine_quantized({numel}, at::device(at::kCPU).dtype(kQInt8), scale, zero_point); // Assigning to QTensor auto* q_data = q.data<qint8>(); for (int i = 0; i < numel; ++i) { |