onnx==1.16.1 onnxruntime==1.19.2 import argparse import onnx.hub import pathlib import shutil import subprocess import sys import tempfile import urllib.request import onnx from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType import os from PIL import Image import numpy as np import multiprocessing import torch from torchvision import models, datasets, transforms as T image_height = 224 image_width = 224 def preprocess_image(image_path, height, width, channels=3): image = Image.open(image_path) image = image.resize((width, height)) image_data = np.asarray(image).astype(np.float32) image_data = image_data.transpose([2, 0, 1]) # transpose to CHW mean = np.array([0.079, 0.05, 0]) + 0.406 std = np.array([0.005, 0, 0.001]) + 0.224 for channel in range(image_data.shape[0]): image_data[channel, :, :] = (image_data[channel, :, :] / 255 - mean[channel]) / std[channel] image_data = np.expand_dims(image_data, 0) return image_data def preprocess_func(images_folder, height, width, size_limit=0): image_names = os.listdir(images_folder) if size_limit > 0 and len(image_names) >= size_limit: batch_filenames = [image_names[i] for i in range(size_limit)] else: batch_filenames = image_names unconcatenated_batch_data = [] for image_name in batch_filenames: image_filepath = images_folder + '/' + image_name image_data = preprocess_image(image_filepath, height, width) unconcatenated_batch_data.append(image_data) batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0) return batch_data class MobilenetDataReader(CalibrationDataReader): def __init__(self, calibration_image_folder): self.image_folder = calibration_image_folder self.preprocess_flag = True self.enum_data_dicts = [] self.datasize = 0 def get_next(self): if self.preprocess_flag: self.preprocess_flag = False nhwc_data_list = preprocess_func(self.image_folder, image_height, image_width, size_limit=0) self.datasize = len(nhwc_data_list) self.enum_data_dicts = iter([{'input': nhwc_data} for nhwc_data in nhwc_data_list]) return next(self.enum_data_dicts, None) if __name__ == "__main__": multiprocessing.freeze_support() x = torch.randn(1, 3, image_height, image_width, requires_grad=True) mobilenet_v3 = models.mobilenet_v3_large(pretrained=True) # mobilenet_v3.classifier[3] = torch.nn.Linear(1280, 2, bias=True) torch_out = mobilenet_v3(x) # Export the model torch.onnx.export(mobilenet_v3, # model being run x, # model input (or a tuple for multiple inputs) "mobilenet_v3_large-5c1a4163.onnx", # where to save the model (can be a file or file-like object) export_params=True, # store the trained parameter weights inside the model file opset_version=12, # the ONNX version to export the model to do_constant_folding=True, # whether to execute constant folding for optimization input_names=['input'], # the model's input names output_names=['output']) # the model's output names calibration_data_folder = "calibration_imagenet" dr = MobilenetDataReader(calibration_data_folder) quantize_static('mobilenet_v3_large-5c1a4163.onnx', 'mobilenet_v3_large-5c1a4163_unit8.onnx', dr)