pytorch · metascroy · Apr 18, 2025 · digantdesai · Apr 21, 2025 · metascroy
@@ -15,12 +15,12 @@
 )
 
 from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
-from torch.ao.quantization.quantize_pt2e import (
+from torch.export import export_for_training
+from torchao.quantization.pt2e.quantize_pt2e import (
     convert_pt2e,
     prepare_pt2e,
     prepare_qat_pt2e,
 )
-from torch.export import export_for_training
 
 
 class TestCoreMLQuantizer:

@@ -34,25 +34,25 @@
     is_ethosu,
 )  # usort: skip
 from executorch.exir.backend.compile_spec_schema import CompileSpec
-from torch.ao.quantization.fake_quantize import (
+from torch.fx import GraphModule, Node
+from torchao.quantization.pt2e import _ObserverOrFakeQuantizeConstructor
 url = https://github.com/pytorch/ao.git 
 url = https://github.com/pytorch/ao.git 
+from torchao.quantization.pt2e.fake_quantize import (
     FakeQuantize,
     FusedMovingAvgObsFakeQuantize,
 )
-from torch.ao.quantization.observer import (
+from torchao.quantization.pt2e.observer import (
     HistogramObserver,
     MinMaxObserver,
     MovingAverageMinMaxObserver,
     MovingAveragePerChannelMinMaxObserver,
     PerChannelMinMaxObserver,
     PlaceholderObserver,
 )
-from torch.ao.quantization.qconfig import _ObserverOrFakeQuantizeConstructor
-from torch.ao.quantization.quantizer import QuantizationSpec, Quantizer
-from torch.ao.quantization.quantizer.utils import (
+from torchao.quantization.pt2e.quantizer import QuantizationSpec, Quantizer
+from torchao.quantization.pt2e.quantizer.utils import (
     _annotate_input_qspec_map,
     _annotate_output_qspec,
 )
-from torch.fx import GraphModule, Node
 
 __all__ = [
     "TOSAQuantizer",

@@ -15,10 +15,10 @@
 
 import torch
 from torch._subclasses import FakeTensor
-
-from torch.ao.quantization.quantizer import QuantizationAnnotation
 from torch.fx import GraphModule, Node
 
+from torchao.quantization.pt2e.quantizer import QuantizationAnnotation
+
 
 def is_annotated(node: Node) -> bool:
     """Given a node return whether the node is annotated."""

@@ -13,12 +13,15 @@
 from executorch.backends.arm.quantizer import arm_quantizer_utils
 from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
 from executorch.backends.arm.tosa_utils import get_node_debug_info
-from torch.ao.quantization.quantizer import QuantizationSpecBase, SharedQuantizationSpec
-from torch.ao.quantization.quantizer.utils import (
+from torch.fx import Node
+from torchao.quantization.pt2e.quantizer import (
+    QuantizationSpecBase,
+    SharedQuantizationSpec,
+)
+from torchao.quantization.pt2e.quantizer.utils import (
     _annotate_input_qspec_map,
     _annotate_output_qspec,
 )
-from torch.fx import Node
 
 logger = logging.getLogger(__name__)
 

@@ -9,9 +9,9 @@
 from dataclasses import dataclass
 
 import torch
-from torch.ao.quantization import ObserverOrFakeQuantize
+from torchao.quantization.pt2e import ObserverOrFakeQuantize
 
-from torch.ao.quantization.quantizer import (
+from torchao.quantization.pt2e.quantizer import (
     DerivedQuantizationSpec,
     FixedQParamsQuantizationSpec,
     QuantizationSpec,

@@ -18,8 +18,8 @@
     TosaPipelineMI,
 )
 from executorch.backends.xnnpack.test.tester import Quantize
-from torch.ao.quantization.observer import HistogramObserver
-from torch.ao.quantization.quantizer import QuantizationSpec
+from torchao.quantization.pt2e.observer import HistogramObserver
+from torchao.quantization.pt2e.quantizer import QuantizationSpec
 
 
 aten_op = "torch.ops.aten.add.Tensor"

@@ -18,8 +18,8 @@
     TosaPipelineBI,
 )
 from executorch.backends.xnnpack.test.tester import Quantize
-from torch.ao.quantization.observer import HistogramObserver
-from torch.ao.quantization.quantizer import QuantizationSpec
+from torchao.quantization.pt2e.observer import HistogramObserver
+from torchao.quantization.pt2e.quantizer import QuantizationSpec
 
 
 def _get_16_bit_quant_config():

@@ -14,8 +14,8 @@
     TosaPipelineBI,
 )
 from executorch.backends.xnnpack.test.tester import Quantize
-from torch.ao.quantization.observer import HistogramObserver
-from torch.ao.quantization.quantizer import QuantizationSpec
+from torchao.quantization.pt2e.observer import HistogramObserver
+from torchao.quantization.pt2e.quantizer import QuantizationSpec
 
 
 def _get_16_bit_quant_config():

@@ -37,10 +37,10 @@
 from executorch.exir.passes.sym_shape_eval_pass import HintBasedSymShapeEvalPass
 from executorch.exir.program._program import to_edge_with_preserved_ops
 from torch._inductor.decomposition import remove_decompositions
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 from torch.export import export
 from torch.export.exported_program import ExportedProgram
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 from .passes import get_cadence_passes
 

@@ -15,7 +15,7 @@
 
 from torch import fx
 from torch._ops import OpOverload
-from torch.ao.quantization.quantizer import (
+from torchao.quantization.pt2e.quantizer import (
     DerivedQuantizationSpec,
     SharedQuantizationSpec,
 )

@@ -38,9 +38,9 @@
 
 from torch import fx
 
-from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
-from torch.ao.quantization.quantizer import DerivedQuantizationSpec, Quantizer
-from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
+from torchao.quantization.pt2e.observer import HistogramObserver, MinMaxObserver
+from torchao.quantization.pt2e.quantizer import DerivedQuantizationSpec, Quantizer
+from torchao.quantization.pt2e.quantizer.composable_quantizer import ComposableQuantizer
 
 
 act_qspec_asym8s = QuantizationSpec(

@@ -14,13 +14,13 @@
 import torch
 from torch import fx
 from torch._ops import OpOverload
-from torch.ao.quantization import ObserverOrFakeQuantize
 
 from torch.fx import GraphModule
 from torch.fx.passes.utils.source_matcher_utils import (
     check_subgraphs_connected,
     SourcePartition,
 )
+from torchao.quantization.pt2e import ObserverOrFakeQuantize
 
 
 def quantize_tensor_multiplier(

@@ -41,11 +41,11 @@
 from parameterized.parameterized import parameterized
 from pyre_extensions import none_throws
 
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
-
 from torch.export import export_for_training
 from torch.fx.passes.infra.pass_base import PassResult
 
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
 
 class TestRemoveOpsPasses(unittest.TestCase):
     @parameterized.expand(

@@ -11,7 +11,7 @@
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dim_order_utils import get_dim_order
 from executorch.exir.pass_base import ExportPass, PassResult
-from torch.ao.quantization.pt2e.graph_utils import find_sequential_partitions
+from torchao.quantization.pt2e.pt2e.graph_utils import find_sequential_partitions
 
 
 class PermuteMemoryFormatsPass(ExportPass):

@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation
+from torchao.quantization.pt2e.quantizer.quantizer import QuantizationAnnotation
 
 
 def _nodes_are_annotated(node_list):

@@ -19,9 +19,9 @@
 )
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.graph_module import get_control_flow_submodules
-from torch.ao.quantization.pt2e.graph_utils import find_sequential_partitions
 from torch.export import ExportedProgram
 from torch.fx.passes.operator_support import OperatorSupportBase
+from torchao.quantization.pt2e.pt2e.graph_utils import find_sequential_partitions
 
 
 @final

@@ -11,9 +11,9 @@
 from executorch.backends.example.example_operators.ops import module_to_annotator
 from executorch.backends.xnnpack.quantizer.xnnpack_quantizer_utils import OperatorConfig
 from torch import fx
-from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
-from torch.ao.quantization.pt2e.graph_utils import find_sequential_partitions
-from torch.ao.quantization.quantizer import QuantizationSpec, Quantizer
+from torchao.quantization.pt2e.observer import HistogramObserver, MinMaxObserver
+from torchao.quantization.pt2e.pt2e.graph_utils import find_sequential_partitions
+from torchao.quantization.pt2e.quantizer import QuantizationSpec, Quantizer
 
 
 def get_uint8_tensor_spec(observer_or_fake_quant_ctr):

@@ -17,10 +17,10 @@
     DuplicateDequantNodePass,
 )
 from executorch.exir.delegate import executorch_call_delegate
-
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
 from torch.export import export
 
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+
 from torchvision.models.quantization import mobilenet_v2
 
 

@@ -10,18 +10,18 @@
 from torch._ops import OpOverload
 from torch._subclasses import FakeTensor
 
-from torch.ao.quantization.quantizer import QuantizationAnnotation
-from torch.ao.quantization.quantizer.utils import (
-    _annotate_input_qspec_map,
-    _annotate_output_qspec,
-)
-
 from torch.export import export_for_training
 from torch.fx import Graph, Node
 from torch.fx.passes.utils.matcher_with_name_node_map_utils import (
     SubgraphMatcherWithNameNodeMap,
 )
 
+from torchao.quantization.pt2e.quantizer import QuantizationAnnotation
+from torchao.quantization.pt2e.quantizer.utils import (
+    _annotate_input_qspec_map,
+    _annotate_output_qspec,
+)
+
 from .qconfig import QuantizationConfig
 
 

@@ -10,9 +10,9 @@
 
 import torch
 
-from torch.ao.quantization.fake_quantize import FakeQuantize
-from torch.ao.quantization.observer import MinMaxObserver, PerChannelMinMaxObserver
-from torch.ao.quantization.quantizer import QuantizationSpec
+from torchao.quantization.pt2e.fake_quantize import FakeQuantize
+from torchao.quantization.pt2e.observer import MinMaxObserver, PerChannelMinMaxObserver
+from torchao.quantization.pt2e.quantizer import QuantizationSpec
 
 
 @unique

@@ -4,8 +4,8 @@
 # except in compliance with the License. See the license file in the root
 # directory of this source tree for more details.
 
-from torch.ao.quantization.quantizer import Quantizer
 from torch.fx import GraphModule
+from torchao.quantization.pt2e.quantizer import Quantizer
 
 from .._passes.decompose_scaled_dot_product_attention import (
     DecomposeScaledDotProductAttention,

diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py
@@ -35,9 +35,9 @@
     QuantizationSpec,
 )
 from torch import fx
-from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver
-from torch.ao.quantization.quantizer import DerivedQuantizationSpec, Quantizer
-from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer
+from torchao.quantization.pt2e.observer import HistogramObserver, MinMaxObserver
+from torchao.quantization.pt2e.quantizer import DerivedQuantizationSpec, Quantizer
+from torchao.quantization.pt2e.quantizer.composable_quantizer import ComposableQuantizer
 
 
 class NeutronAtenQuantizer(Quantizer):

diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
@@ -14,7 +14,7 @@
 from executorch.backends.nxp.quantizer.utils import get_bias_qparams
 from torch import fx
 from torch._ops import OpOverload
-from torch.ao.quantization.quantizer import (
+from torchao.quantization.pt2e.quantizer import (
     DerivedQuantizationSpec,
     FixedQParamsQuantizationSpec,
     SharedQuantizationSpec,

diff --git a/backends/nxp/quantizer/utils.py b/backends/nxp/quantizer/utils.py
@@ -14,11 +14,11 @@
 import torch
 from torch import fx
 from torch._ops import OpOverload
-from torch.ao.quantization import ObserverOrFakeQuantize
 from torch.fx.passes.utils.source_matcher_utils import (
     check_subgraphs_connected,
     SourcePartition,
 )
+from torchao.quantization.pt2e import ObserverOrFakeQuantize
 
 
 def is_annotated(nodes: List[fx.Node]) -> bool:

diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py
@@ -8,7 +8,7 @@
 import executorch.backends.nxp.tests.models as models
 import torch
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
 
 def _get_target_name(node):

diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py
@@ -17,12 +17,12 @@
 import torch.fx
 
 from nncf.common.graph.graph import NNCFGraph  # type: ignore[import-untyped]
-from torch.ao.quantization.observer import (
+from torchao.quantization.pt2e.observer import (
     HistogramObserver,
     PerChannelMinMaxObserver,
     UniformQuantizationObserverBase,
 )
-from torch.ao.quantization.quantizer.quantizer import (
+from torchao.quantization.pt2e.quantizer.quantizer import (
     EdgeOrNode,
     QuantizationAnnotation,
     QuantizationSpec,

@@ -124,9 +124,9 @@ def _dequant_fold_params(self, n, quant_attrs, param):
             offsets = self._expand(quant_attrs[QCOM_ZERO_POINTS], dim, axis)
             param = param.sub(offsets).mul(scales).to(torch.float32).contiguous()
         elif quant_attrs[QCOM_ENCODING] in [
-            exir_ops.edge.pt2e_quant.dequantize_affine.default
+            exir_ops.edge.torchao.dequantize_affine.default
         ]:
-            param = torch.ops.pt2e_quant.dequantize_affine(
+            param = torch.ops.torchao.dequantize_affine(
                 param,
                 block_size=quant_attrs[QCOM_BLOCK_SIZE],
                 scale=quant_attrs[QCOM_SCALE],

@@ -131,8 +131,8 @@ def get_to_edge_transform_passes(
         from executorch.backends.qualcomm._passes import utils
         from executorch.exir.dialects._ops import ops as exir_ops
 
-        utils.q_ops.add(exir_ops.edge.pt2e_quant.quantize_affine.default)
-        utils.dq_ops.add(exir_ops.edge.pt2e_quant.dequantize_affine.default)
+        utils.q_ops.add(exir_ops.edge.torchao.quantize_affine.default)
+        utils.dq_ops.add(exir_ops.edge.torchao.dequantize_affine.default)
 
         passes_job = (
             passes_job if passes_job is not None else get_capture_program_passes()

@@ -242,8 +242,8 @@ def get_quant_encoding_conf(
         )
         # TODO: refactor this when target could be correctly detected
         per_block_encoding = {
-            exir_ops.edge.pt2e_quant.quantize_affine.default,
-            exir_ops.edge.pt2e_quant.dequantize_affine.default,
+            exir_ops.edge.torchao.quantize_affine.default,
+            exir_ops.edge.torchao.dequantize_affine.default,
         }
         if quant_attrs[QCOM_ENCODING] in per_block_encoding:
             return self.make_qnn_per_block_config(node, quant_attrs)
@@ -271,7 +271,7 @@ def get_quant_tensor_value(
                     axis_order.index(x) for x in range(len(axis_order))
                 )
                 tensor = tensor.permute(origin_order)
-            tensor = torch.ops.pt2e_quant.quantize_affine(
+            tensor = torch.ops.torchao.quantize_affine(
                 tensor,
                 block_size=quant_attrs[QCOM_BLOCK_SIZE],
                 scale=quant_attrs[QCOM_SCALE],