Skip to content

Commit 63a5c55

Browse files
hsharma35facebook-github-bot
authored andcommitted
Add tests for op_quantize_per_tensor + add checks for quant_min/max (#10300)
Summary: Pull Request resolved: #10300 HiFi4's quantize_per_tensor does not consider quant_min/max. Add checks when quant_min/max out of bounds. Reviewed By: mcremon-meta Differential Revision: D73268792
1 parent ad7cd2b commit 63a5c55

File tree

3 files changed

+189
-9
lines changed

3 files changed

+189
-9
lines changed

backends/cadence/hifi/operators/op_quantize_per_tensor.cpp

+28-9
Original file line numberDiff line numberDiff line change
@@ -6,30 +6,46 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <xa_type_def.h>
10+
11+
#include <xa_nnlib_kernels_api.h>
12+
913
#include <executorch/backends/cadence/hifi/kernels/kernels.h>
14+
#include <executorch/runtime/core/exec_aten/exec_aten.h>
15+
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
1016
#include <executorch/runtime/kernel/kernel_includes.h>
11-
#include <xa_nnlib_kernels_api.h>
17+
#include <executorch/runtime/kernel/kernel_runtime_context.h>
1218

1319
namespace cadence {
1420
namespace impl {
1521
namespace HiFi {
1622
namespace native {
1723

18-
using executorch::aten::ScalarType;
19-
using executorch::aten::Tensor;
20-
using executorch::runtime::KernelRuntimeContext;
24+
using ::executorch::aten::ScalarType;
25+
using ::executorch::aten::Tensor;
26+
using ::executorch::runtime::KernelRuntimeContext;
2127

2228
// Quantize the input tensor (PT2 version). Note that quant_<min,max> are not
2329
// used in any computation.
2430
void quantize_per_tensor_out(
2531
KernelRuntimeContext& ctx,
2632
const Tensor& input,
2733
double scale,
28-
int64_t zero_point,
34+
const int64_t zero_point,
2935
__ET_UNUSED int64_t quant_min,
3036
__ET_UNUSED int64_t quant_max,
31-
ScalarType dtype,
37+
const ScalarType dtype,
3238
Tensor& out) {
39+
// Add checks for dtype quant min/max bounds.
40+
ET_SWITCH_REALB_TYPES(
41+
out.scalar_type(), ctx, "quantize_per_tensor", OUT_DTYPE, [&]() {
42+
ET_KERNEL_CHECK(
43+
ctx,
44+
std::numeric_limits<OUT_DTYPE>::min() == quant_min &&
45+
std::numeric_limits<OUT_DTYPE>::max() == quant_max,
46+
InvalidArgument, );
47+
});
48+
3349
const float* input_data = input.const_data_ptr<float>();
3450
const size_t numel = out.numel();
3551
if (out.scalar_type() == ScalarType::Byte) {
@@ -55,10 +71,13 @@ void quantize_per_tensor_out(
5571
cadence::impl::HiFi::kernels::quantize<int32_t>(
5672
out_data, input_data, 1. / scale, zero_point, numel);
5773
} else {
58-
ET_CHECK_MSG(
74+
ET_KERNEL_CHECK_MSG(
75+
ctx,
5976
false,
60-
"Unhandled output dtype %hhd",
61-
static_cast<int8_t>(out.scalar_type()));
77+
InvalidType,
78+
,
79+
"Unhandled output dtype %s",
80+
::torch::executor::toString(out.scalar_type()));
6281
}
6382
}
6483

backends/cadence/hifi/operators/operators.h

+22
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,25 @@
1111
#define ET_FORALL_CADENCE_QUANTIZED_TYPES(_) \
1212
_(uint8_t, Byte) \
1313
_(int8_t, Char)
14+
15+
namespace cadence {
16+
namespace impl {
17+
namespace HiFi {
18+
namespace native {
19+
20+
// Quantize the input tensor (PT2 version). Note that quant_<min,max> are not
21+
// used in any computation.
22+
void quantize_per_tensor_out(
23+
::executorch::runtime::KernelRuntimeContext& ctx,
24+
const ::executorch::aten::Tensor& input,
25+
double scale,
26+
int64_t zero_point,
27+
int64_t quant_min,
28+
int64_t quant_max,
29+
::executorch::aten::ScalarType dtype,
30+
::executorch::aten::Tensor& out);
31+
32+
} // namespace native
33+
} // namespace HiFi
34+
} // namespace impl
35+
} // namespace cadence
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <gtest/gtest.h>
10+
#include <sys/times.h>
11+
#include <xtensa/sim.h>
12+
13+
#include <executorch/kernels/test/TestUtil.h>
14+
#include <executorch/runtime/core/error.h>
15+
#include <executorch/runtime/core/exec_aten/exec_aten.h>
16+
#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
17+
#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
18+
#include <executorch/runtime/platform/runtime.h>
19+
20+
#include <executorch/backends/cadence/hifi/operators/operators.h>
21+
22+
namespace cadence {
23+
namespace impl {
24+
namespace HiFi {
25+
namespace native {
26+
namespace {
27+
28+
using ::executorch::aten::Scalar;
29+
using ::executorch::aten::ScalarType;
30+
using ::executorch::aten::Tensor;
31+
using ::executorch::aten::TensorImpl;
32+
using ::executorch::runtime::Error;
33+
using ::executorch::runtime::KernelRuntimeContext;
34+
using ::executorch::runtime::runtime_init;
35+
using ::executorch::runtime::testing::TensorFactory;
36+
37+
class HiFiQuantizePerTensorTest : public OperatorTest {
38+
public:
39+
protected:
40+
void quantize_per_tensor_out(
41+
const Tensor& input,
42+
double scale,
43+
int64_t zero_point,
44+
__ET_UNUSED int64_t quant_min,
45+
__ET_UNUSED int64_t quant_max,
46+
ScalarType dtype,
47+
Tensor& out) {
48+
::cadence::impl::HiFi::native::quantize_per_tensor_out(
49+
context_, input, scale, zero_point, quant_min, quant_max, dtype, out);
50+
}
51+
};
52+
53+
TEST_F(HiFiQuantizePerTensorTest, ThrowKernelFailureForQuantMinMoreThanLimit) {
54+
TensorFactory<ScalarType::Float> tf;
55+
const std::vector<int> sizes{4};
56+
constexpr ScalarType kOutDtype = ScalarType::Int;
57+
TensorFactory<kOutDtype> tf_out;
58+
Tensor out = tf_out.zeros(sizes);
59+
// Some arbitrary values for scalar args.
60+
constexpr double kScale = 0.01;
61+
constexpr int64_t kZeroPoint = 32768;
62+
// quant_min and quant_max are not used in the computation.
63+
// However, the kernel should still throw a kernel failure error when
64+
// quant_min > std::numeric_limits<kOutDtype>::min() or quant_max <
65+
// std::numeric_limits<kOutDtype>::max().
66+
constexpr int64_t kQuantMin = 10;
67+
constexpr int64_t kQuantMax = std::numeric_limits<int32_t>::max();
68+
69+
ET_EXPECT_KERNEL_FAILURE(
70+
context_,
71+
quantize_per_tensor_out(
72+
tf.make(sizes, {1, 2, 3, 4}),
73+
kScale,
74+
kZeroPoint,
75+
kQuantMin,
76+
kQuantMax,
77+
kOutDtype,
78+
out));
79+
}
80+
81+
TEST_F(HiFiQuantizePerTensorTest, ThrowKernelFailureForQuantMaxLessThanLimit) {
82+
TensorFactory<ScalarType::Float> tf;
83+
const std::vector<int> sizes{4};
84+
constexpr ScalarType kOutDtype = ScalarType::Int;
85+
TensorFactory<kOutDtype> tf_out;
86+
Tensor out = tf_out.zeros(sizes);
87+
// Some arbitrary values for scalar args.
88+
constexpr double kScale = 0.01;
89+
constexpr int64_t kZeroPoint = 32768;
90+
// quant_min and quant_max are not used in the computation.
91+
// However, the kernel should still throw a kernel failure error when
92+
// quant_min > std::numeric_limits<kOutDtype>::min() or quant_max <
93+
// std::numeric_limits<kOutDtype>::max().
94+
constexpr int64_t kQuantMin = std::numeric_limits<int32_t>::min();
95+
constexpr int64_t kQuantMax = 20;
96+
97+
ET_EXPECT_KERNEL_FAILURE(
98+
context_,
99+
quantize_per_tensor_out(
100+
tf.make(sizes, {1, 2, 3, 4}),
101+
kScale,
102+
kZeroPoint,
103+
kQuantMin,
104+
kQuantMax,
105+
kOutDtype,
106+
out));
107+
}
108+
109+
TEST_F(HiFiQuantizePerTensorTest, CheckSingleElementQuantize) {
110+
TensorFactory<ScalarType::Float> tf;
111+
const std::vector<int> sizes{1};
112+
constexpr ScalarType kOutDtype = ScalarType::Int;
113+
TensorFactory<kOutDtype> tf_out;
114+
Tensor out = tf_out.zeros(sizes);
115+
// Some arbitrary values for scalar args.
116+
constexpr double kScale = 0.01;
117+
constexpr int64_t kZeroPoint = 32768;
118+
constexpr int64_t kQuantMin = std::numeric_limits<int32_t>::min();
119+
constexpr int64_t kQuantMax = std::numeric_limits<int32_t>::max();
120+
constexpr float kInputValue = 100.0f;
121+
constexpr int32_t kExpectedOutputValue =
122+
static_cast<int32_t>(kInputValue / kScale + kZeroPoint);
123+
124+
quantize_per_tensor_out(
125+
tf.make(sizes, {kInputValue}),
126+
kScale,
127+
kZeroPoint,
128+
kQuantMin,
129+
kQuantMax,
130+
kOutDtype,
131+
out);
132+
EXPECT_TENSOR_EQ(out, tf_out.make(sizes, {kExpectedOutputValue}));
133+
}
134+
135+
} // namespace
136+
} // namespace native
137+
} // namespace HiFi
138+
} // namespace impl
139+
} // namespace cadence

0 commit comments

Comments
 (0)