Tensort之uff

# This sample uses a UFF MNIST model to create a TensorRT Inference Engine
from random import randint
from PIL import Image
import numpy as np

import pycuda.driver as cuda
# This import causes pycuda to automatically manage CUDA context creation and cleanup.
import pycuda.autoinit

import tensorrt as trt
import time

import sys, os
sys.path.insert(1, os.path.join(sys.path[0], ".."))
import common

# You can set the logger severity higher to suppress messages (or lower to display more messages).
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

batch_size = 128

class ModelData(object):
    MODEL_FILE = os.path.join(os.path.dirname(__file__), "model2/frozen_model.uff")
    INPUT_NAME ="input_1"
    INPUT_SHAPE = (3, 256, 256)
    OUTPUT_NAME = 'predictions/Softmax'
    DTYPE = trt.float32

def build_engine(model_file):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_batch_size = batch_size
        builder.max_workspace_size = common.GiB(1)
        # Parse the Uff Network
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer.
def load_normalized_test_case(data_path, pagelocked_buffer, case_num=randint(0, 9)):
#    test_case_path = os.path.join(data_path, str(case_num) + ".pgm")
    # Flatten the image into a 1D array, normalize, and copy to pagelocked memory.
    def normalize_image(image):
        # Resize, antialias and transpose the image to CHW.
        c, h, w = ModelData.INPUT_SHAPE
        return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE))
    test_case_path = "lena.jpg"
    img = normalize_image(Image.open(test_case_path))
    img_array = []
    for i in range(batch_size):
        img_array.append(img)
    img_array = np.array(img_array, dtype=trt.nptype(ModelData.DTYPE))
    img_array = img_array.ravel()
    np.copyto(pagelocked_buffer, img_array)
    return case_num

def main():
#    data_path = common.find_sample_data(description="Runs an MNIST network using a UFF model file", subfolder="mnist")
    data_path = "/home/bjxiangboren/tools/TensorRT-5.0.2.6/data/mnist/"
    model_file = ModelData.MODEL_FILE

#    with open("inception_batch.engine", "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: 
#        engine = runtime.deserialize_cuda_engine(f.read())
    with build_engine(model_file) as engine:
        # Build an engine, allocate buffers and create a stream.
        # For more information on buffer allocation, refer to the introductory samples.
        with open("inception_batch.engine", "wb") as f:
            f.write(engine.serialize())
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            case_num = load_normalized_test_case(data_path, pagelocked_buffer=inputs[0].host)
            # For more information on performing inference, refer to the introductory samples.
            # The common.do_inference function will return a list of outputs - we only have one in this case.
            while True:
                start_time = time.time()
                [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=batch_size)
                end_time = time.time()
                print("time dis is %s" % (end_time - start_time))
#            output = output.reshape((30,1001))
#            print output
#            print output.shape
#            print np.argmax(output, axis=1)
#                pred = np.argmax(output)
#                print("Test Case: " + str(case_num))
#                print("Prediction: " + str(pred))

if __name__ == '__main__':
    main()

1、首先将pb转为uff格式的模型

python /usr/lib/python3.5/dist-packages/uff/bin/convert_to_uff.py –input_file models/lenet5.pb

2、使用trt engine加速

这个加速还是挺明显的，但转换后的模型无法使用tfservign了，只能用tensorrt自己的engine。

参考：https://devtalk.nvidia.com/default/topic/1044466/tensorrt/uff-inference-time-large-than-pb-time-when-process-vgg-19/

https://blog.csdn.net/zong596568821xp/article/details/86077553

https://blog.csdn.net/g11d111/article/details/92061884

https://mp.weixin.qq.com/s/Ps49ZTfJprcOYrc6xo-gLg?

Tensort之uff

sqlserver还原3101

为什么iOS13更新频率这么快，BUG还这么多

最新文章

realme 市场沟通总监：真我 GT7 Pro 核心部件全部来自国际顶尖供应链合作

ctf加载程序需要自启动吗(ctf加载程序开机启动能禁用吗)

c语言转义字符占几个字节(c语言转义字符的使用输出字符串)

有线中继插wan口还是lan口(有线桥接时接wan口还是lan口)

shell循环分为几种(shell的循环控制结构)

mt6771v处理器相当于骁龙多少(联发科mt6771v处理器怎么样)

删除数据库表的命令(删除表的内容sql命令是什么)

怎么练口才(怎么训练自己的说话能力)

俄罗斯人口是多少(2021年中国人口是多少)

火葬场特点介绍火葬场技术设备介绍

最新评论

标签

关注我们么么哒！

Tensort之uff

sqlserver还原3101

为什么iOS13更新频率这么快，BUG还这么多

最新文章

realme 市场沟通总监：真我 GT7 Pro 核心部件全部来自国际顶尖供应链合作

最新评论

标签

关注我们 么么哒！

关注我们的公众号

关注我们么么哒！