当前位置:网站首页>"Iruntime": undeclared identifier
"Iruntime": undeclared identifier
2022-07-24 21:36:00 【AI vision netqi】
“IRuntime”: undeclared identifier
Complete usage :
TensorRT series (1) Model reasoning _ Blog of torrent source -CSDN Blog _tensorrt Reasoning
// tensorRT include
#include <NvInfer.h>
#include <NvInferRuntime.h>
// cuda include
#include <cuda_runtime.h>
// system include
#include <stdio.h>
#include <math.h>
#include <iostream>
#include <fstream>
#include <vector>
using namespace std;
// The code from the previous section
class TRTLogger : public nvinfer1::ILogger
{
public:
virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override
{
if(severity <= Severity::kINFO)
{
printf("%d: %s\n", severity, msg);
}
}
} logger;
nvinfer1::Weights make_weights(float* ptr, int n)
{
nvinfer1::Weights w;
w.count = n;
w.type = nvinfer1::DataType::kFLOAT;
w.values = ptr;
return w;
}
bool build_model()
{
TRTLogger logger;
// This is the basic component
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(logger);
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1);
// Build a model
/*
Network definition:
image
|
linear (fully connected) input = 3, output = 2, bias = True w=[[1.0, 2.0, 0.5], [0.1, 0.2, 0.5]], b=[0.3, 0.8]
|
sigmoid
|
prob
*/
const int num_input = 3;
const int num_output = 2;
float layer1_weight_values[] = {1.0, 2.0, 0.5, 0.1, 0.2, 0.5};
float layer1_bias_values[] = {0.3, 0.8};
nvinfer1::ITensor* input = network->addInput("image", nvinfer1::DataType::kFLOAT, nvinfer1::Dims4(1, num_input, 1, 1));
nvinfer1::Weights layer1_weight = make_weights(layer1_weight_values, 6);
nvinfer1::Weights layer1_bias = make_weights(layer1_bias_values, 2);
auto layer1 = network->addFullyConnected(*input, num_output, layer1_weight, layer1_bias);
auto prob = network->addActivation(*layer1->getOutput(0), nvinfer1::ActivationType::kSIGMOID);
// Will be what we need prob Mark as output
network->markOutput(*prob->getOutput(0));
printf("Workspace Size = %.2f MB\n", (1 << 28) / 1024.0f / 1024.0f);
config->setMaxWorkspaceSize(1 << 28);
builder->setMaxBatchSize(1);
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
if(engine == nullptr)
{
printf("Build engine failed.\n");
return false;
}
// Serialize the model , And save as a file
nvinfer1::IHostMemory* model_data = engine->serialize();
FILE* f = fopen("engine.trtmodel", "wb");
fwrite(model_data->data(), 1, model_data->size(), f);
fclose(f);
// The unloading order is in reverse order of the construction order
model_data->destroy();
engine->destroy();
network->destroy();
config->destroy();
builder->destroy();
printf("Done.\n");
return true;
}
vector<unsigned char> load_file(const string& file)
{
ifstream in(file, ios::in | ios::binary);
if (!in.is_open())
return {};
in.seekg(0, ios::end);
size_t length = in.tellg();
std::vector<uint8_t> data;
if (length > 0){
in.seekg(0, ios::beg);
data.resize(length);
in.read((char*)&data[0], length);
}
in.close();
return data;
}
void inference(){
// ------------------------------ 1. Prepare the model and load ----------------------------
TRTLogger logger;
auto engine_data = load_file("engine.trtmodel");
// Before executing reasoning , You need to create an inferential runtime Interface instance . And builer equally ,runtime need logger:
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger);
// Read the model from to engine_data in , You can deserialize it to get engine
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size());
if(engine == nullptr){
printf("Deserialize cuda engine failed.\n");
runtime->destroy();
return;
}
nvinfer1::IExecutionContext* execution_context = engine->createExecutionContext();
cudaStream_t stream = nullptr;
// establish CUDA flow , To make sure that batch Your reasoning is independent
cudaStreamCreate(&stream);
/*
Network definition:
image
|
linear (fully connected) input = 3, output = 2, bias = True w=[[1.0, 2.0, 0.5], [0.1, 0.2, 0.5]], b=[0.3, 0.8]
|
sigmoid
|
prob
*/
// ------------------------------ 2. Prepare the data for reasoning and move it to GPU ----------------------------
float input_data_host[] = {1, 2, 3};
float* input_data_device = nullptr;
float output_data_host[2];
float* output_data_device = nullptr;
cudaMalloc(&input_data_device, sizeof(input_data_host));
cudaMalloc(&output_data_device, sizeof(output_data_host));
cudaMemcpyAsync(input_data_device, input_data_host, sizeof(input_data_host), cudaMemcpyHostToDevice, stream);
// Specify... With an array of pointers input and output stay gpu The pointer in .
float* bindings[] = {input_data_device, output_data_device};
// ------------------------------ 3. Reason and carry the results back to CPU ----------------------------
bool success = execution_context->enqueueV2((void**)bindings, stream, nullptr);
cudaMemcpyAsync(output_data_host, output_data_device, sizeof(output_data_host), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
printf("output_data_host = %f, %f\n", output_data_host[0], output_data_host[1]);
// ------------------------------ 4. Free memory ----------------------------
printf("Clean memory\n");
cudaStreamDestroy(stream);
execution_context->destroy();
engine->destroy();
runtime->destroy();
// ------------------------------ 5. Manual reasoning for verification ----------------------------
const int num_input = 3;
const int num_output = 2;
float layer1_weight_values[] = {1.0, 2.0, 0.5, 0.1, 0.2, 0.5};
float layer1_bias_values[] = {0.3, 0.8};
printf(" Manually verify the calculation results :\n");
for(int io = 0; io < num_output; ++io)
{
float output_host = layer1_bias_values[io];
for(int ii = 0; ii < num_input; ++ii)
{
output_host += layer1_weight_values[io * num_input + ii] * input_data_host[ii];
}
// sigmoid
float prob = 1 / (1 + exp(-output_host));
printf("output_prob[%d] = %f\n", io, prob);
}
}
int main()
{
if(!build_model())
{
return -1;
}
inference();
return 0;
}
Link to the original text :https://blog.csdn.net/weicao1990/article/details/125034572
makefile:
cc := g++
name := pro
workdir := workspace
srcdir := src
objdir := objs
stdcpp := c++11
cuda_home := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/trt8cuda112cudnn8
syslib := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/lib
cpp_pkg := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/cpp-packages
cuda_arch :=
nvcc := $(cuda_home)/bin/nvcc -ccbin=$(cc)
# Definition cpp Path lookup and dependencies for mk file
cpp_srcs := $(shell find $(srcdir) -name "*.cpp")
cpp_objs := $(cpp_srcs:.cpp=.cpp.o)
cpp_objs := $(cpp_objs:$(srcdir)/%=$(objdir)/%)
cpp_mk := $(cpp_objs:.cpp.o=.cpp.mk)
# Definition cu File path lookup and dependencies mk file
cu_srcs := $(shell find $(srcdir) -name "*.cu")
cu_objs := $(cu_srcs:.cu=.cu.o)
cu_objs := $(cu_objs:$(srcdir)/%=$(objdir)/%)
cu_mk := $(cu_objs:.cu.o=.cu.mk)
# Definition opencv and cuda Library files needed
link_cuda := cudart cudnn
link_trtpro :=
link_tensorRT := nvinfer
link_opencv :=
link_sys := stdc++ dl
link_librarys := $(link_cuda) $(link_tensorRT) $(link_sys) $(link_opencv)
# Define the header file path , Please note that there must be no spaces behind the slash
# Just write the path , No need to write -I
include_paths := src \
$(cuda_home)/include/cuda \
$(cuda_home)/include/tensorRT \
$(cpp_pkg)/opencv4.2/include
# Define library file path , Just write the path , No need to write -L
library_paths := $(cuda_home)/lib64 $(syslib) $(cpp_pkg)/opencv4.2/lib
# hold library path To splice into a string , for example a b c => a:b:c
# And then make LD_LIBRARY_PATH=a:b:c
empty :=
library_path_export := $(subst $(empty) $(empty),:,$(library_paths))
# Concatenate the library path and header file path to form a , Batch automatic addition -I、-L、-l
run_paths := $(foreach item,$(library_paths),-Wl,-rpath=$(item))
include_paths := $(foreach item,$(include_paths),-I$(item))
library_paths := $(foreach item,$(library_paths),-L$(item))
link_librarys := $(foreach item,$(link_librarys),-l$(item))
# If it's another graphics card , Please amend -gencode=arch=compute_75,code=sm_75 For the ability of the corresponding graphics card
# The corresponding number of the graphics card is shown here :https://developer.nvidia.com/zh-cn/cuda-gpus#compute
# If it is jetson nano, Hint not found -m64 Instructions , Please delete it -m64 Options . It doesn't affect the result
cpp_compile_flags := -std=$(stdcpp) -w -g -O0 -m64 -fPIC -fopenmp -pthread
cu_compile_flags := -std=$(stdcpp) -w -g -O0 -m64 $(cuda_arch) -Xcompiler "$(cpp_compile_flags)"
link_flags := -pthread -fopenmp -Wl,-rpath='$$ORIGIN'
cpp_compile_flags += $(include_paths)
cu_compile_flags += $(include_paths)
link_flags += $(library_paths) $(link_librarys) $(run_paths)
# If the header file is modified , The instructions here allow him to automatically compile the dependent cpp perhaps cu file
ifneq ($(MAKECMDGOALS), clean)
-include $(cpp_mk) $(cu_mk)
endif
$(name) : $(workdir)/$(name)
all : $(name)
run : $(name)
@cd $(workdir) && ./$(name) $(run_args)
$(workdir)/$(name) : $(cpp_objs) $(cu_objs)
@echo Link [email protected]
@mkdir -p $(dir [email protected])
@$(cc) $^ -o [email protected] $(link_flags)
$(objdir)/%.cpp.o : $(srcdir)/%.cpp
@echo Compile CXX $<
@mkdir -p $(dir [email protected])
@$(cc) -c $< -o [email protected] $(cpp_compile_flags)
$(objdir)/%.cu.o : $(srcdir)/%.cu
@echo Compile CUDA $<
@mkdir -p $(dir [email protected])
@$(nvcc) -c $< -o [email protected] $(cu_compile_flags)
# compile cpp Dependencies , Generate mk file
$(objdir)/%.cpp.mk : $(srcdir)/%.cpp
@echo Compile depends C++ $<
@mkdir -p $(dir [email protected])
@$(cc) -M $< -MF [email protected] -MT $(@:.cpp.mk=.cpp.o) $(cpp_compile_flags)
# compile cu File Dependencies , Generate cumk file
$(objdir)/%.cu.mk : $(srcdir)/%.cu
@echo Compile depends CUDA $<
@mkdir -p $(dir [email protected])
@$(nvcc) -M $< -MF [email protected] -MT $(@:.cu.mk=.cu.o) $(cu_compile_flags)
# Define cleanup instructions
clean :
@rm -rf $(objdir) $(workdir)/$(name) $(workdir)/*.trtmodel
# Prevent symbols from being treated as files
.PHONY : clean run $(name)
# Export dependent library path , Make it possible to run
export LD_LIBRARY_PATH:=$(library_path_export)
Link to the original text :https://blog.csdn.net/weicao1990/article/details/125034572
Focus on refining :
1. You have to use createNetworkV2, And designated as 1( Indicates dominant batch),createNetwork Has been abandoned , non-overt batch The government does not recommend , This method directly affects reasoning enqueue still enqueueV2;
2. builder、config Equal pointer , Remember to release , Otherwise, there will be a memory leak , Use ptr->destroy() Release ;
3. markOutput Represents the output node of the model ,mark A few times , There are several outputs ,addInput A few times there are a few inputs ;
4. workspaceSize Is the size of the workspace , some layer When additional storage is needed , Don't allocate space by yourself , But for memory reuse , Direct search tensorRT want workspace Space ;
5. Remember that , The saved model can only be adapted to compile time trt edition 、 Device specified at compile time , It can only be guaranteed to be optimal under this configuration . If you use trt Execute across different devices , Sometimes you can run , But not the best , Also do not recommend ;
6. bindings yes tensorRT A description of the input-output tensor ,bindings = input-tensor + output-tensor. such as input Yes a,output Yes b, c, d, that bindings = [a, b, c, d],bindings[0] = a,bindings[2] = c;
7. enqueueV2 It's asynchronous reasoning , Add to stream Queue waiting to execute . Input bindings It is tensors The pointer to ( Note that device pointer);
8. createExecutionContext It can be executed multiple times , Allow an engine to have multiple execution contexts .
————————————————
Copyright notice : This paper is about CSDN Blogger 「 The source of the torrent 」 The original article of , follow CC 4.0 BY-SA Copyright agreement , For reprint, please attach the original source link and this statement .
Link to the original text :https://blog.csdn.net/weicao1990/article/details/125034572
边栏推荐
- Codeforces Round #808 (Div. 2)(A~D)
- Five common misuse of async/await
- Discussion on solving the application ecological problems of domestic systems based on small programs
- [Matplotlib drawing]
- What problems should be paid attention to when using a database without public ip: port?
- [CCNA experiment sharing] routing between VLANs of layer 3 switches
- Among the database accounts in DTS, the accounts of MySQL database and mongodb database appear most. What are the specific accounts
- Redis (12) -- redis server
- How to design the order system in e-commerce projects? (supreme Collection Edition)
- Mysql database query is so slow. Besides index, what else can it do?
猜你喜欢

MQ release confirmation

Brand new: the latest ranking of programming languages in July

A simple method of converting SVG to PDF

Es+redis+mysql, the high availability architecture design is awesome! (supreme Collection Edition)

About the acid of MySQL, there are thirty rounds of skirmishes with mvcc and interviewers

rogabet note 1.1

Practical skills!!

Together again Net program hangs dead, a real case analysis of using WinDbg

Five common misuse of async/await

Uniqueness and ordering in set
随机推荐
Smarter! Airiot accelerates the upgrading of energy conservation and emission reduction in the coal industry
Overview and installation of scientific computing toolkit scipyscipy
Is it safe for Hengtai securities to open an account?
如何修改 Kubernetes 节点 IP 地址?
Maxcompute instance related operations
【MLFP】《Face Presentation Attack with Latex Masks in Multispectral Videos》
findContours
What should I pay attention to when choosing the self built database access method on ECs?
Experience of using dump file to reverse locate crash location
Unfair distribution
PR 2022 22.5 Chinese version
How does redis realize inventory deduction and prevent oversold? (glory Collection Edition)
Rce (no echo)
【Pyspark基础】行转列和列转行(超多列时)
Summary of yarn capacity scheduler
What should I pay attention to when selecting DTS database type?
[image processing] pyefd.elliptic_ fourier_ How descriptors are used
Intranet penetration learning (I) introduction to Intranet
Use of cache in C #
How about Urumqi Shenwan Hongyuan securities account opening? Is it safe?