当前位置:网站首页>"Iruntime": undeclared identifier
"Iruntime": undeclared identifier
2022-07-24 21:36:00 【AI vision netqi】
“IRuntime”: undeclared identifier
Complete usage :
TensorRT series (1) Model reasoning _ Blog of torrent source -CSDN Blog _tensorrt Reasoning
// tensorRT include
#include <NvInfer.h>
#include <NvInferRuntime.h>
// cuda include
#include <cuda_runtime.h>
// system include
#include <stdio.h>
#include <math.h>
#include <iostream>
#include <fstream>
#include <vector>
using namespace std;
// The code from the previous section
class TRTLogger : public nvinfer1::ILogger
{
public:
virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override
{
if(severity <= Severity::kINFO)
{
printf("%d: %s\n", severity, msg);
}
}
} logger;
nvinfer1::Weights make_weights(float* ptr, int n)
{
nvinfer1::Weights w;
w.count = n;
w.type = nvinfer1::DataType::kFLOAT;
w.values = ptr;
return w;
}
bool build_model()
{
TRTLogger logger;
// This is the basic component
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(logger);
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1);
// Build a model
/*
Network definition:
image
|
linear (fully connected) input = 3, output = 2, bias = True w=[[1.0, 2.0, 0.5], [0.1, 0.2, 0.5]], b=[0.3, 0.8]
|
sigmoid
|
prob
*/
const int num_input = 3;
const int num_output = 2;
float layer1_weight_values[] = {1.0, 2.0, 0.5, 0.1, 0.2, 0.5};
float layer1_bias_values[] = {0.3, 0.8};
nvinfer1::ITensor* input = network->addInput("image", nvinfer1::DataType::kFLOAT, nvinfer1::Dims4(1, num_input, 1, 1));
nvinfer1::Weights layer1_weight = make_weights(layer1_weight_values, 6);
nvinfer1::Weights layer1_bias = make_weights(layer1_bias_values, 2);
auto layer1 = network->addFullyConnected(*input, num_output, layer1_weight, layer1_bias);
auto prob = network->addActivation(*layer1->getOutput(0), nvinfer1::ActivationType::kSIGMOID);
// Will be what we need prob Mark as output
network->markOutput(*prob->getOutput(0));
printf("Workspace Size = %.2f MB\n", (1 << 28) / 1024.0f / 1024.0f);
config->setMaxWorkspaceSize(1 << 28);
builder->setMaxBatchSize(1);
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
if(engine == nullptr)
{
printf("Build engine failed.\n");
return false;
}
// Serialize the model , And save as a file
nvinfer1::IHostMemory* model_data = engine->serialize();
FILE* f = fopen("engine.trtmodel", "wb");
fwrite(model_data->data(), 1, model_data->size(), f);
fclose(f);
// The unloading order is in reverse order of the construction order
model_data->destroy();
engine->destroy();
network->destroy();
config->destroy();
builder->destroy();
printf("Done.\n");
return true;
}
vector<unsigned char> load_file(const string& file)
{
ifstream in(file, ios::in | ios::binary);
if (!in.is_open())
return {};
in.seekg(0, ios::end);
size_t length = in.tellg();
std::vector<uint8_t> data;
if (length > 0){
in.seekg(0, ios::beg);
data.resize(length);
in.read((char*)&data[0], length);
}
in.close();
return data;
}
void inference(){
// ------------------------------ 1. Prepare the model and load ----------------------------
TRTLogger logger;
auto engine_data = load_file("engine.trtmodel");
// Before executing reasoning , You need to create an inferential runtime Interface instance . And builer equally ,runtime need logger:
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger);
// Read the model from to engine_data in , You can deserialize it to get engine
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size());
if(engine == nullptr){
printf("Deserialize cuda engine failed.\n");
runtime->destroy();
return;
}
nvinfer1::IExecutionContext* execution_context = engine->createExecutionContext();
cudaStream_t stream = nullptr;
// establish CUDA flow , To make sure that batch Your reasoning is independent
cudaStreamCreate(&stream);
/*
Network definition:
image
|
linear (fully connected) input = 3, output = 2, bias = True w=[[1.0, 2.0, 0.5], [0.1, 0.2, 0.5]], b=[0.3, 0.8]
|
sigmoid
|
prob
*/
// ------------------------------ 2. Prepare the data for reasoning and move it to GPU ----------------------------
float input_data_host[] = {1, 2, 3};
float* input_data_device = nullptr;
float output_data_host[2];
float* output_data_device = nullptr;
cudaMalloc(&input_data_device, sizeof(input_data_host));
cudaMalloc(&output_data_device, sizeof(output_data_host));
cudaMemcpyAsync(input_data_device, input_data_host, sizeof(input_data_host), cudaMemcpyHostToDevice, stream);
// Specify... With an array of pointers input and output stay gpu The pointer in .
float* bindings[] = {input_data_device, output_data_device};
// ------------------------------ 3. Reason and carry the results back to CPU ----------------------------
bool success = execution_context->enqueueV2((void**)bindings, stream, nullptr);
cudaMemcpyAsync(output_data_host, output_data_device, sizeof(output_data_host), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
printf("output_data_host = %f, %f\n", output_data_host[0], output_data_host[1]);
// ------------------------------ 4. Free memory ----------------------------
printf("Clean memory\n");
cudaStreamDestroy(stream);
execution_context->destroy();
engine->destroy();
runtime->destroy();
// ------------------------------ 5. Manual reasoning for verification ----------------------------
const int num_input = 3;
const int num_output = 2;
float layer1_weight_values[] = {1.0, 2.0, 0.5, 0.1, 0.2, 0.5};
float layer1_bias_values[] = {0.3, 0.8};
printf(" Manually verify the calculation results :\n");
for(int io = 0; io < num_output; ++io)
{
float output_host = layer1_bias_values[io];
for(int ii = 0; ii < num_input; ++ii)
{
output_host += layer1_weight_values[io * num_input + ii] * input_data_host[ii];
}
// sigmoid
float prob = 1 / (1 + exp(-output_host));
printf("output_prob[%d] = %f\n", io, prob);
}
}
int main()
{
if(!build_model())
{
return -1;
}
inference();
return 0;
}
Link to the original text :https://blog.csdn.net/weicao1990/article/details/125034572
makefile:
cc := g++
name := pro
workdir := workspace
srcdir := src
objdir := objs
stdcpp := c++11
cuda_home := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/trt8cuda112cudnn8
syslib := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/lib
cpp_pkg := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/cpp-packages
cuda_arch :=
nvcc := $(cuda_home)/bin/nvcc -ccbin=$(cc)
# Definition cpp Path lookup and dependencies for mk file
cpp_srcs := $(shell find $(srcdir) -name "*.cpp")
cpp_objs := $(cpp_srcs:.cpp=.cpp.o)
cpp_objs := $(cpp_objs:$(srcdir)/%=$(objdir)/%)
cpp_mk := $(cpp_objs:.cpp.o=.cpp.mk)
# Definition cu File path lookup and dependencies mk file
cu_srcs := $(shell find $(srcdir) -name "*.cu")
cu_objs := $(cu_srcs:.cu=.cu.o)
cu_objs := $(cu_objs:$(srcdir)/%=$(objdir)/%)
cu_mk := $(cu_objs:.cu.o=.cu.mk)
# Definition opencv and cuda Library files needed
link_cuda := cudart cudnn
link_trtpro :=
link_tensorRT := nvinfer
link_opencv :=
link_sys := stdc++ dl
link_librarys := $(link_cuda) $(link_tensorRT) $(link_sys) $(link_opencv)
# Define the header file path , Please note that there must be no spaces behind the slash
# Just write the path , No need to write -I
include_paths := src \
$(cuda_home)/include/cuda \
$(cuda_home)/include/tensorRT \
$(cpp_pkg)/opencv4.2/include
# Define library file path , Just write the path , No need to write -L
library_paths := $(cuda_home)/lib64 $(syslib) $(cpp_pkg)/opencv4.2/lib
# hold library path To splice into a string , for example a b c => a:b:c
# And then make LD_LIBRARY_PATH=a:b:c
empty :=
library_path_export := $(subst $(empty) $(empty),:,$(library_paths))
# Concatenate the library path and header file path to form a , Batch automatic addition -I、-L、-l
run_paths := $(foreach item,$(library_paths),-Wl,-rpath=$(item))
include_paths := $(foreach item,$(include_paths),-I$(item))
library_paths := $(foreach item,$(library_paths),-L$(item))
link_librarys := $(foreach item,$(link_librarys),-l$(item))
# If it's another graphics card , Please amend -gencode=arch=compute_75,code=sm_75 For the ability of the corresponding graphics card
# The corresponding number of the graphics card is shown here :https://developer.nvidia.com/zh-cn/cuda-gpus#compute
# If it is jetson nano, Hint not found -m64 Instructions , Please delete it -m64 Options . It doesn't affect the result
cpp_compile_flags := -std=$(stdcpp) -w -g -O0 -m64 -fPIC -fopenmp -pthread
cu_compile_flags := -std=$(stdcpp) -w -g -O0 -m64 $(cuda_arch) -Xcompiler "$(cpp_compile_flags)"
link_flags := -pthread -fopenmp -Wl,-rpath='$$ORIGIN'
cpp_compile_flags += $(include_paths)
cu_compile_flags += $(include_paths)
link_flags += $(library_paths) $(link_librarys) $(run_paths)
# If the header file is modified , The instructions here allow him to automatically compile the dependent cpp perhaps cu file
ifneq ($(MAKECMDGOALS), clean)
-include $(cpp_mk) $(cu_mk)
endif
$(name) : $(workdir)/$(name)
all : $(name)
run : $(name)
@cd $(workdir) && ./$(name) $(run_args)
$(workdir)/$(name) : $(cpp_objs) $(cu_objs)
@echo Link [email protected]
@mkdir -p $(dir [email protected])
@$(cc) $^ -o [email protected] $(link_flags)
$(objdir)/%.cpp.o : $(srcdir)/%.cpp
@echo Compile CXX $<
@mkdir -p $(dir [email protected])
@$(cc) -c $< -o [email protected] $(cpp_compile_flags)
$(objdir)/%.cu.o : $(srcdir)/%.cu
@echo Compile CUDA $<
@mkdir -p $(dir [email protected])
@$(nvcc) -c $< -o [email protected] $(cu_compile_flags)
# compile cpp Dependencies , Generate mk file
$(objdir)/%.cpp.mk : $(srcdir)/%.cpp
@echo Compile depends C++ $<
@mkdir -p $(dir [email protected])
@$(cc) -M $< -MF [email protected] -MT $(@:.cpp.mk=.cpp.o) $(cpp_compile_flags)
# compile cu File Dependencies , Generate cumk file
$(objdir)/%.cu.mk : $(srcdir)/%.cu
@echo Compile depends CUDA $<
@mkdir -p $(dir [email protected])
@$(nvcc) -M $< -MF [email protected] -MT $(@:.cu.mk=.cu.o) $(cu_compile_flags)
# Define cleanup instructions
clean :
@rm -rf $(objdir) $(workdir)/$(name) $(workdir)/*.trtmodel
# Prevent symbols from being treated as files
.PHONY : clean run $(name)
# Export dependent library path , Make it possible to run
export LD_LIBRARY_PATH:=$(library_path_export)
Link to the original text :https://blog.csdn.net/weicao1990/article/details/125034572
Focus on refining :
1. You have to use createNetworkV2, And designated as 1( Indicates dominant batch),createNetwork Has been abandoned , non-overt batch The government does not recommend , This method directly affects reasoning enqueue still enqueueV2;
2. builder、config Equal pointer , Remember to release , Otherwise, there will be a memory leak , Use ptr->destroy() Release ;
3. markOutput Represents the output node of the model ,mark A few times , There are several outputs ,addInput A few times there are a few inputs ;
4. workspaceSize Is the size of the workspace , some layer When additional storage is needed , Don't allocate space by yourself , But for memory reuse , Direct search tensorRT want workspace Space ;
5. Remember that , The saved model can only be adapted to compile time trt edition 、 Device specified at compile time , It can only be guaranteed to be optimal under this configuration . If you use trt Execute across different devices , Sometimes you can run , But not the best , Also do not recommend ;
6. bindings yes tensorRT A description of the input-output tensor ,bindings = input-tensor + output-tensor. such as input Yes a,output Yes b, c, d, that bindings = [a, b, c, d],bindings[0] = a,bindings[2] = c;
7. enqueueV2 It's asynchronous reasoning , Add to stream Queue waiting to execute . Input bindings It is tensors The pointer to ( Note that device pointer);
8. createExecutionContext It can be executed multiple times , Allow an engine to have multiple execution contexts .
————————————————
Copyright notice : This paper is about CSDN Blogger 「 The source of the torrent 」 The original article of , follow CC 4.0 BY-SA Copyright agreement , For reprint, please attach the original source link and this statement .
Link to the original text :https://blog.csdn.net/weicao1990/article/details/125034572
边栏推荐
- Lecun proposed that mask strategy can also be applied to twin networks based on vit for self supervised learning!
- One bite of Stream(7)
- Uniqueness and ordering in set
- C # image template matching and marking
- Problems with SQLite compare comparison tables
- Is it safe for Hengtai securities to open an account?
- C local functions and yield statements
- 91. (leaflet chapter) leaflet situation plotting - offensive direction drawing
- Unfair distribution
- [Development Tutorial 4] open source Bluetooth heart rate waterproof sports Bracelet - external flash reading and writing
猜你喜欢
![[Matplotlib drawing]](/img/ac/dea6fa0aff6f02477fba48c929fadc.png)
[Matplotlib drawing]
![[CCNA experiment sharing] routing between VLANs of layer 3 switches](/img/71/2f28c6b6b62f273fad1b3d71e648a1.jpg)
[CCNA experiment sharing] routing between VLANs of layer 3 switches

How to prevent weight under Gao Bingfa?

Drive subsystem development

CAD sets hyperlinks to entities (WEB version)

C # image template matching and marking

Build your own stock analysis system based on b\s architecture

Binary search

Sqlserver BCP parameter interpretation, character format selection and fault handling summary
![[image processing] pyefd.elliptic_ fourier_ How descriptors are used](/img/72/d2c825ddd95f541b37b98b2d7f6539.png)
[image processing] pyefd.elliptic_ fourier_ How descriptors are used
随机推荐
Drive subsystem development
Drawing library Matplotlib drawing
【类的组合(在一个类中定义一个类)】
2022 Tsinghua summer school notes L2_ 2 basic introduction of CNN and RNN
What are intelligent investment advisory products?
小程序地理位置接口申请
Summary of communication with customers
Baidu PaddlePaddle easydl helps improve the inspection efficiency of high-altitude photovoltaic power stations by 98%
[Matplotlib drawing]
Penetration test - command execution injection
How about Urumqi Shenwan Hongyuan securities account opening? Is it safe?
Binary search
CAD sets hyperlinks to entities (WEB version)
微信小程序监听实时地理位置变化事件接口申请
Strong reference, weak reference, soft reference, virtual reference
APR learning failure problem location and troubleshooting
Alibaba cloud and parallel cloud launched the cloud XR platform to support the rapid landing of immersive experience applications
What should I do when selecting the RDS instance access method?
Huawei cloud data governance production line dataarts, let "data 'wisdom' speak"
What should I do to select the method of mongodb instance accessing the database?