本文为Python实现,C++实现链接模型量化
若还没有配置环境(CUDA,CUDNN,TensorRT),请移至C++实现中查看环境配置方法
支持三种不同精度的量化
模型单精度量化 (FP32)
模型半精度量化 (FP16)
模型Int8量化 (INT8)
经测试yolov5,yolov6,yolov7,yolov8转化成功
yolov5: https://github.com/ultralytics/yolov5
yolov6: https://github.com/meituan/YOLOv6
yolov7: https://github.com/WongKinYiu/yolov7
yolov8: https://github.com/ultralytics/ultralytics
注意:若使用INT8量化,需要额外的文件,这里是calibrator.py文章来源:https://www.toymoban.com/news/detail-604342.html
main.py文件
import tensorrt as trt
import os
from calibrator import Calibrator
LOGGER = trt.Logger(trt.Logger.VERBOSE)
def buildEngine(onnx_file, engine_file, quantification, batch_size, FP16_mode, INT8_mode,
img_height, img_wdith, calibration_images, calibration_cache):
builder = trt.Builder(LOGGER)
network = builder.create_network(1<<int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, LOGGER)
config = builder.create_builder_config()
parser.parse_from_file(onnx_file)
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 16*(1<<20))
if FP16_mode == True:
config.set_flag(trt.BuilderFlag.FP16)
elif INT8_mode == True:
config.set_flag(trt.BuilderFlag.INT8)
config.int8_calibrator = Calibrator(quantification, batch_size, img_height, img_wdith, calibration_images, calibration_cache)
engine = builder.build_serialized_network(network, config)
if engine is None:
print("EXPORT ENGINE FAILED!")
# exit(0)
with open(engine_file, "wb") as f:
f.write(engine)
def main():
quantification = 1 # 量化次数
batch_size = 1
img_height = 640
img_wdith = 640
calibration_images = "./images"
onnx_file = "./weights/yolov7.onnx"
engine_file = "./weights/yolov7.engine"
calibration_cache = "./weights/yolov7_calibration.cache"
'''
模型使用单精度量化,设置 FP16_mode = False & INT8_mode = False (FP32)
模型使用半精度量化,设置 FP16_mode = True & INT8_mode = False (FP16)
模型使用 Int8量化,设置 FP16_mode = False & INT8_mode = True (INT8)
'''
FP16_mode = False
INT8_mode = False
if not os.path.exists(onnx_file):
print("LOAD ONNX FILE FAILED: ", onnx_file)
print('Load ONNX file from:%s \nStart export, Please wait a moment...'%(onnx_file))
buildEngine(onnx_file, engine_file, quantification, batch_size, FP16_mode, INT8_mode,
img_height, img_wdith, calibration_images, calibration_cache)
print('Export ENGINE success, Save as: ', engine_file)
if __name__ == '__main__':
main()
calibrator.py文件
import os
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import logging
import numpy as np
import cv2
class Calibrator(trt.IInt8EntropyCalibrator):
def __init__(self, quantification=1, batch_size=1, height=640, width=640, calibration_images="", cache_file=""):
trt.IInt8EntropyCalibrator.__init__(self)
self.index = 0
self.length = quantification
self.batch_size = batch_size
self.cache_file = cache_file
self.height = height
self.width = width
self.img_list = [calibration_images + '/' + l for l in os.listdir(calibration_images)]
self.calibration_data = np.zeros((self.batch_size, 3, self.height, self.width), dtype=np.float32)
self.d_input = cuda.mem_alloc(self.calibration_data.nbytes)
def next_batch(self):
if self.index < self.length:
for i in range(self.batch_size):
img = cv2.imread(self.img_list[i + self.index*self.batch_size])
img = self.preprocess(img)
self.calibration_data[i] = img
self.index += 1
return np.ascontiguousarray(self.calibration_data, dtype=np.float32)
else:
return np.array([])
def __len__(self):
return self.length
def get_batch_size(self):
return self.batch_size
def get_batch(self, name):
batch = self.next_batch()
if not batch.size:
return None
cuda.memcpy_htod(self.d_input, batch)
return [int(self.d_input)]
def read_calibration_cache(self):
# If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
if os.path.exists(self.cache_file):
with open(self.cache_file, "rb") as f:
return f.read()
def write_calibration_cache(self, cache):
with open(self.cache_file, "wb") as f:
f.write(cache)
def preprocess(self, img):
h, w, c = img.shape
image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
r_w = self.width / w
r_h = self.height / h
if r_h > r_w:
tw = self.width
th = int(r_w * h)
tx1 = tx2 = 0
ty1 = int((self.height - th) / 2)
ty2 = self.height - th - ty1
else:
tw = int(r_h * w)
th = self.height
tx1 = int((self.width - tw) / 2)
tx2 = self.width - tw - tx1
ty1 = ty2 = 0
image = cv2.resize(image, (tw, th))
image = cv2.copyMakeBorder(image, ty1, ty2, tx1, tx2, cv2.BORDER_CONSTANT, (128, 128, 128))
image = image.astype(np.float32)
image /= 255.0
image = np.transpose(image, [2, 0, 1])
return image
结束~文章来源地址https://www.toymoban.com/news/detail-604342.html
到了这里,关于模型量化!ONNX转TensorRT(FP32, FP16, INT8)的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!