onnx量化

onnx==1.16.1
onnxruntime==1.19.2


import argparse
import onnx.hub
import pathlib
import shutil
import subprocess
import sys
import tempfile
import urllib.request
import onnx
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType
import os
from PIL import Image
import numpy as np
import multiprocessing
import torch
from torchvision import models, datasets, transforms as T

image_height = 224
image_width = 224

def preprocess_image(image_path, height, width, channels=3):
    image = Image.open(image_path)
    image = image.resize((width, height))
    image_data = np.asarray(image).astype(np.float32)
    image_data = image_data.transpose([2, 0, 1]) # transpose to CHW
    mean = np.array([0.079, 0.05, 0]) + 0.406
    std = np.array([0.005, 0, 0.001]) + 0.224
    for channel in range(image_data.shape[0]):
        image_data[channel, :, :] = (image_data[channel, :, :] / 255 - mean[channel]) / std[channel]
    image_data = np.expand_dims(image_data, 0)
    return image_data

def preprocess_func(images_folder, height, width, size_limit=0):
    image_names = os.listdir(images_folder)
    if size_limit > 0 and len(image_names) >= size_limit:
        batch_filenames = [image_names[i] for i in range(size_limit)]
    else:
        batch_filenames = image_names
    unconcatenated_batch_data = []

    for image_name in batch_filenames:
        image_filepath = images_folder + '/' + image_name
        image_data = preprocess_image(image_filepath, height, width)
        unconcatenated_batch_data.append(image_data)
    batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
    return batch_data


class MobilenetDataReader(CalibrationDataReader):
    def __init__(self, calibration_image_folder):
        self.image_folder = calibration_image_folder
        self.preprocess_flag = True
        self.enum_data_dicts = []
        self.datasize = 0

    def get_next(self):
        if self.preprocess_flag:
            self.preprocess_flag = False
            nhwc_data_list = preprocess_func(self.image_folder, image_height, image_width, size_limit=0)
            self.datasize = len(nhwc_data_list)
            self.enum_data_dicts = iter([{'input': nhwc_data} for nhwc_data in nhwc_data_list])
        return next(self.enum_data_dicts, None)


if __name__ == "__main__":
    multiprocessing.freeze_support()

    x = torch.randn(1, 3, image_height, image_width, requires_grad=True)
    mobilenet_v3 = models.mobilenet_v3_large(pretrained=True)
    # mobilenet_v3.classifier[3] = torch.nn.Linear(1280, 2, bias=True)
    torch_out = mobilenet_v3(x)
    # Export the model
    torch.onnx.export(mobilenet_v3,  # model being run
                      x,  # model input (or a tuple for multiple inputs)
                      "mobilenet_v3_large-5c1a4163.onnx",  # where to save the model (can be a file or file-like object)
                      export_params=True,  # store the trained parameter weights inside the model file
                      opset_version=12,  # the ONNX version to export the model to
                      do_constant_folding=True,  # whether to execute constant folding for optimization
                      input_names=['input'],  # the model's input names
                      output_names=['output'])  # the model's output names

    calibration_data_folder = "calibration_imagenet"
    dr = MobilenetDataReader(calibration_data_folder)
    quantize_static('mobilenet_v3_large-5c1a4163.onnx',
                    'mobilenet_v3_large-5c1a4163_unit8.onnx',
                    dr)


发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

10 − 10 =