In tutorials/quantize_vit, extract common methods to util.py (#238)

lancerts · msaroufim · web-flow · commit f0f00cef0251 · 2024-05-20T13:12:29.000-07:00
* Extract common methods to util.py

* Update tutorials/quantize_vit/run_vit_b.py

Co-authored-by: Mark Saroufim &lt;marksaroufim@gmail.com&gt;

* Update tutorials/quantize_vit/run_vit_b_quant.py

Co-authored-by: Mark Saroufim &lt;marksaroufim@gmail.com&gt;

* amend

* amend

* Include the torchao utils

---------

Co-authored-by: Mark Saroufim &lt;marksaroufim@gmail.com&gt;
Co-authored-by: Mark Saroufim &lt;marksaroufim@meta.com&gt;
diff --git a/torchao/utils.py b/torchao/utils.py
@@ -0,0 +1,26 @@
+import torch
+
+
+def benchmark_model(model, num_runs, input_tensor):
+    torch.cuda.synchronize()
+    start_event = torch.cuda.Event(enable_timing=True)
+    end_event = torch.cuda.Event(enable_timing=True)
+    start_event.record()
+    
+    # benchmark
+    for _ in range(num_runs):
+        with torch.autograd.profiler.record_function("timed region"):
+            model(input_tensor)
+    
+    end_event.record()
+    torch.cuda.synchronize()
+    return start_event.elapsed_time(end_event) / num_runs
+
+def profiler_runner(path, fn, *args, **kwargs):
+    with torch.profiler.profile(
+            activities=[torch.profiler.ProfilerActivity.CPU,
+                        torch.profiler.ProfilerActivity.CUDA],
+            record_shapes=True) as prof:
+        result = fn(*args, **kwargs)
+    prof.export_chrome_trace(path)
+    return result
diff --git a/tutorials/quantize_vit/run_vit_b.py b/tutorials/quantize_vit/run_vit_b.py
@@ -1,6 +1,8 @@
 import torch
 import torchvision.models.vision_transformer as models
 
+from torchao.utils import benchmark_model, profiler_runner
+torch.set_float32_matmul_precision("high")
 # Load Vision Transformer model
 model = models.vit_b_16(pretrained=True)
 
@@ -12,30 +14,6 @@
 
 model = torch.compile(model, mode='max-autotune')
 
-def benchmark_model(model, num_runs, input_tensor):
-    torch.cuda.synchronize()
-    start_event = torch.cuda.Event(enable_timing=True)
-    end_event = torch.cuda.Event(enable_timing=True)
-    start_event.record()
-    
-    # benchmark
-    for _ in range(num_runs):
-        with torch.autograd.profiler.record_function("timed region"):
-            model(input_tensor)
-    
-    end_event.record()
-    torch.cuda.synchronize()
-    return start_event.elapsed_time(end_event) / num_runs
-
-def profiler_runner(path, fn, *args, **kwargs):
-    with torch.profiler.profile(
-            activities=[torch.profiler.ProfilerActivity.CPU,
-                        torch.profiler.ProfilerActivity.CUDA],
-            record_shapes=True) as prof:
-        result = fn(*args, **kwargs)
-    prof.export_chrome_trace(path)
-    return result
-
 # Must run with no_grad when optimizing for inference
 with torch.no_grad():
     # warmup
diff --git a/tutorials/quantize_vit/run_vit_b_quant.py b/tutorials/quantize_vit/run_vit_b_quant.py
@@ -2,6 +2,8 @@
 import torchao
 import torchvision.models.vision_transformer as models
 
+from torchao.utils import benchmark_model, profiler_runner
+torch.set_float32_matmul_precision("high")
 # Load Vision Transformer model
 model = models.vit_b_16(pretrained=True)
 
@@ -19,30 +21,6 @@
 
 model = torch.compile(model, mode='max-autotune')
 
-def benchmark_model(model, num_runs, input_tensor):
-    torch.cuda.synchronize()
-    start_event = torch.cuda.Event(enable_timing=True)
-    end_event = torch.cuda.Event(enable_timing=True)
-    start_event.record()
-    
-    # benchmark
-    for _ in range(num_runs):
-        with torch.autograd.profiler.record_function("timed region"):
-            model(input_tensor)
-    
-    end_event.record()
-    torch.cuda.synchronize()
-    return start_event.elapsed_time(end_event) / num_runs
-
-def profiler_runner(path, fn, *args, **kwargs):
-    with torch.profiler.profile(
-            activities=[torch.profiler.ProfilerActivity.CPU,
-                        torch.profiler.ProfilerActivity.CUDA],
-            record_shapes=True) as prof:
-        result = fn(*args, **kwargs)
-    prof.export_chrome_trace(path)
-    return result
-
 # Must run with no_grad when optimizing for inference
 with torch.no_grad():
     # warmup