当前位置：网站首页>"Iruntime": undeclared identifier

"Iruntime": undeclared identifier

2022-07-24 21:36:00 【AI vision netqi】

“IRuntime”: undeclared identifier

Complete usage ：

TensorRT series （1） Model reasoning _ Blog of torrent source -CSDN Blog _tensorrt Reasoning

// tensorRT include
#include <NvInfer.h>
#include <NvInferRuntime.h>
 
// cuda include
#include <cuda_runtime.h>
 
// system include
#include <stdio.h>
#include <math.h>
 
#include <iostream>
#include <fstream>
#include <vector>
 
using namespace std;
//  The code from the previous section 
 
class TRTLogger : public nvinfer1::ILogger
{
public:
    virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override
    {
        if(severity <= Severity::kINFO)
        {
            printf("%d: %s\n", severity, msg);
        }
    }
} logger;
 
nvinfer1::Weights make_weights(float* ptr, int n)
{
    nvinfer1::Weights w;
    w.count = n;
    w.type = nvinfer1::DataType::kFLOAT;
    w.values = ptr;
    return w;
}
 
 
bool build_model()
{
    TRTLogger logger;
 
    //  This is the basic component 
    nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(logger);
    nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(1);
 
    //  Build a model 
    /*
        Network definition:
        image
          |
        linear (fully connected)  input = 3, output = 2, bias = True     w=[[1.0, 2.0, 0.5], [0.1, 0.2, 0.5]], b=[0.3, 0.8]
          |
        sigmoid
          |
        prob
    */
 
    const int num_input = 3;
    const int num_output = 2;
    float layer1_weight_values[] = {1.0, 2.0, 0.5, 0.1, 0.2, 0.5};
    float layer1_bias_values[]   = {0.3, 0.8};
 
    nvinfer1::ITensor* input = network->addInput("image", nvinfer1::DataType::kFLOAT, nvinfer1::Dims4(1, num_input, 1, 1));
    nvinfer1::Weights layer1_weight = make_weights(layer1_weight_values, 6);
    nvinfer1::Weights layer1_bias   = make_weights(layer1_bias_values, 2);
    auto layer1 = network->addFullyConnected(*input, num_output, layer1_weight, layer1_bias);
    auto prob = network->addActivation(*layer1->getOutput(0), nvinfer1::ActivationType::kSIGMOID);
    
    //  Will be what we need prob Mark as output 
    network->markOutput(*prob->getOutput(0));
 
    printf("Workspace Size = %.2f MB\n", (1 << 28) / 1024.0f / 1024.0f);
    config->setMaxWorkspaceSize(1 << 28);
    builder->setMaxBatchSize(1);
 
    nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    if(engine == nullptr)
    {
        printf("Build engine failed.\n");
        return false;
    }
 
    //  Serialize the model , And save as a file 
    nvinfer1::IHostMemory* model_data = engine->serialize();
    FILE* f = fopen("engine.trtmodel", "wb");
    fwrite(model_data->data(), 1, model_data->size(), f);
    fclose(f);
 
    //  The unloading order is in reverse order of the construction order 
    model_data->destroy();
    engine->destroy();
    network->destroy();
    config->destroy();
    builder->destroy();
    printf("Done.\n");
    return true;
}
 
 
 
vector<unsigned char> load_file(const string& file)
{
    ifstream in(file, ios::in | ios::binary);
    if (!in.is_open())
        return {};
 
    in.seekg(0, ios::end);
    size_t length = in.tellg();
 
    std::vector<uint8_t> data;
    if (length > 0){
        in.seekg(0, ios::beg);
        data.resize(length);
 
        in.read((char*)&data[0], length);
    }
    in.close();
    return data;
}
 
void inference(){
 
    // ------------------------------ 1.  Prepare the model and load    ----------------------------
    TRTLogger logger;
    auto engine_data = load_file("engine.trtmodel");
    //  Before executing reasoning , You need to create an inferential runtime Interface instance . And builer equally ,runtime need logger：
    nvinfer1::IRuntime* runtime   = nvinfer1::createInferRuntime(logger);
    //  Read the model from to engine_data in , You can deserialize it to get engine
    nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size());
    if(engine == nullptr){
        printf("Deserialize cuda engine failed.\n");
        runtime->destroy();
        return;
    }
 
    nvinfer1::IExecutionContext* execution_context = engine->createExecutionContext();
    cudaStream_t stream = nullptr;
    //  establish CUDA flow , To make sure that batch Your reasoning is independent 
    cudaStreamCreate(&stream);
 
    /*
        Network definition:
        image
          |
        linear (fully connected)  input = 3, output = 2, bias = True     w=[[1.0, 2.0, 0.5], [0.1, 0.2, 0.5]], b=[0.3, 0.8]
          |
        sigmoid
          |
        prob
    */
 
    // ------------------------------ 2.  Prepare the data for reasoning and move it to GPU   ----------------------------
    float input_data_host[] = {1, 2, 3};
    float* input_data_device = nullptr;
 
    float output_data_host[2];
    float* output_data_device = nullptr;
    cudaMalloc(&input_data_device, sizeof(input_data_host));
    cudaMalloc(&output_data_device, sizeof(output_data_host));
    cudaMemcpyAsync(input_data_device, input_data_host, sizeof(input_data_host), cudaMemcpyHostToDevice, stream);
 
    //  Specify... With an array of pointers input and output stay gpu The pointer in .
    float* bindings[] = {input_data_device, output_data_device};
 
    // ------------------------------ 3.  Reason and carry the results back to CPU   ----------------------------
    bool success = execution_context->enqueueV2((void**)bindings, stream, nullptr);
    cudaMemcpyAsync(output_data_host, output_data_device, sizeof(output_data_host), cudaMemcpyDeviceToHost, stream);
    cudaStreamSynchronize(stream);
 
    printf("output_data_host = %f, %f\n", output_data_host[0], output_data_host[1]);
 
    // ------------------------------ 4.  Free memory  ----------------------------
    printf("Clean memory\n");
    cudaStreamDestroy(stream);
    execution_context->destroy();
    engine->destroy();
    runtime->destroy();
 
    // ------------------------------ 5.  Manual reasoning for verification  ----------------------------
    const int num_input = 3;
    const int num_output = 2;
    float layer1_weight_values[] = {1.0, 2.0, 0.5, 0.1, 0.2, 0.5};
    float layer1_bias_values[]   = {0.3, 0.8};
 
    printf(" Manually verify the calculation results ：\n");
    for(int io = 0; io < num_output; ++io)
    {
        float output_host = layer1_bias_values[io];
        for(int ii = 0; ii < num_input; ++ii)
        {
            output_host += layer1_weight_values[io * num_input + ii] * input_data_host[ii];
        }
 
        // sigmoid
        float prob = 1 / (1 + exp(-output_host));
        printf("output_prob[%d] = %f\n", io, prob);
    }
}
 
int main()
{
 
    if(!build_model())
    {
        return -1;
    }
    inference();
    return 0;
}

Link to the original text ：https://blog.csdn.net/weicao1990/article/details/125034572

makefile:

cc        := g++
name      := pro
workdir   := workspace
srcdir    := src
objdir    := objs
stdcpp    := c++11
cuda_home := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/trt8cuda112cudnn8
syslib    := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/lib
cpp_pkg   := /home/liuhongyuan/miniconda3/envs/trtpy/lib/python3.8/site-packages/trtpy/cpp-packages
cuda_arch := 
nvcc      := $(cuda_home)/bin/nvcc -ccbin=$(cc)
 
#  Definition cpp Path lookup and dependencies for mk file 
cpp_srcs := $(shell find $(srcdir) -name "*.cpp")
cpp_objs := $(cpp_srcs:.cpp=.cpp.o)
cpp_objs := $(cpp_objs:$(srcdir)/%=$(objdir)/%)
cpp_mk   := $(cpp_objs:.cpp.o=.cpp.mk)
 
#  Definition cu File path lookup and dependencies mk file 
cu_srcs := $(shell find $(srcdir) -name "*.cu")
cu_objs := $(cu_srcs:.cu=.cu.o)
cu_objs := $(cu_objs:$(srcdir)/%=$(objdir)/%)
cu_mk   := $(cu_objs:.cu.o=.cu.mk)
 
#  Definition opencv and cuda Library files needed 
link_cuda      := cudart cudnn
link_trtpro    := 
link_tensorRT  := nvinfer
link_opencv    := 
link_sys       := stdc++ dl
link_librarys  := $(link_cuda) $(link_tensorRT) $(link_sys) $(link_opencv)
 
#  Define the header file path , Please note that there must be no spaces behind the slash 
#  Just write the path , No need to write -I
include_paths := src              \
    $(cuda_home)/include/cuda     \
	$(cuda_home)/include/tensorRT \
	$(cpp_pkg)/opencv4.2/include
 
#  Define library file path , Just write the path , No need to write -L
library_paths := $(cuda_home)/lib64 $(syslib) $(cpp_pkg)/opencv4.2/lib
 
#  hold library path To splice into a string , for example a b c => a:b:c
#  And then make LD_LIBRARY_PATH=a:b:c
empty := 
library_path_export := $(subst $(empty) $(empty),:,$(library_paths))
 
#  Concatenate the library path and header file path to form a , Batch automatic addition -I、-L、-l
run_paths     := $(foreach item,$(library_paths),-Wl,-rpath=$(item))
include_paths := $(foreach item,$(include_paths),-I$(item))
library_paths := $(foreach item,$(library_paths),-L$(item))
link_librarys := $(foreach item,$(link_librarys),-l$(item))
 
#  If it's another graphics card , Please amend -gencode=arch=compute_75,code=sm_75 For the ability of the corresponding graphics card 
#  The corresponding number of the graphics card is shown here ：https://developer.nvidia.com/zh-cn/cuda-gpus#compute
#  If it is  jetson nano, Hint not found -m64 Instructions , Please delete it  -m64 Options . It doesn't affect the result 
cpp_compile_flags := -std=$(stdcpp) -w -g -O0 -m64 -fPIC -fopenmp -pthread
cu_compile_flags  := -std=$(stdcpp) -w -g -O0 -m64 $(cuda_arch) -Xcompiler "$(cpp_compile_flags)"
link_flags        := -pthread -fopenmp -Wl,-rpath='$$ORIGIN'
 
cpp_compile_flags += $(include_paths)
cu_compile_flags  += $(include_paths)
link_flags        += $(library_paths) $(link_librarys) $(run_paths)
 
#  If the header file is modified , The instructions here allow him to automatically compile the dependent cpp perhaps cu file 
ifneq ($(MAKECMDGOALS), clean)
-include $(cpp_mk) $(cu_mk)
endif
 
$(name)   : $(workdir)/$(name)
 
all       : $(name)
run       : $(name)
	@cd $(workdir) && ./$(name) $(run_args)
 
$(workdir)/$(name) : $(cpp_objs) $(cu_objs)
	@echo Link [email protected]
	@mkdir -p $(dir [email protected])
	@$(cc) $^ -o [email protected] $(link_flags)
 
$(objdir)/%.cpp.o : $(srcdir)/%.cpp
	@echo Compile CXX $<
	@mkdir -p $(dir [email protected])
	@$(cc) -c $< -o [email protected] $(cpp_compile_flags)
 
$(objdir)/%.cu.o : $(srcdir)/%.cu
	@echo Compile CUDA $<
	@mkdir -p $(dir [email protected])
	@$(nvcc) -c $< -o [email protected] $(cu_compile_flags)
 
#  compile cpp Dependencies , Generate mk file 
$(objdir)/%.cpp.mk : $(srcdir)/%.cpp
	@echo Compile depends C++ $<
	@mkdir -p $(dir [email protected])
	@$(cc) -M $< -MF [email protected] -MT $(@:.cpp.mk=.cpp.o) $(cpp_compile_flags)
    
#  compile cu File Dependencies , Generate cumk file 
$(objdir)/%.cu.mk : $(srcdir)/%.cu
	@echo Compile depends CUDA $<
	@mkdir -p $(dir [email protected])
	@$(nvcc) -M $< -MF [email protected] -MT $(@:.cu.mk=.cu.o) $(cu_compile_flags)
 
#  Define cleanup instructions 
clean :
	@rm -rf $(objdir) $(workdir)/$(name) $(workdir)/*.trtmodel
 
#  Prevent symbols from being treated as files 
.PHONY : clean run $(name)
 
#  Export dependent library path , Make it possible to run 
export LD_LIBRARY_PATH:=$(library_path_export)

Link to the original text ：https://blog.csdn.net/weicao1990/article/details/125034572

Focus on refining :
1. You have to use createNetworkV2, And designated as 1（ Indicates dominant batch）,createNetwork Has been abandoned , non-overt batch The government does not recommend , This method directly affects reasoning enqueue still enqueueV2;

2. builder、config Equal pointer , Remember to release , Otherwise, there will be a memory leak , Use ptr->destroy() Release ;

3. markOutput Represents the output node of the model ,mark A few times , There are several outputs ,addInput A few times there are a few inputs ;

4. workspaceSize Is the size of the workspace , some layer When additional storage is needed , Don't allocate space by yourself , But for memory reuse , Direct search tensorRT want workspace Space ;

5. Remember that , The saved model can only be adapted to compile time trt edition 、 Device specified at compile time , It can only be guaranteed to be optimal under this configuration . If you use trt Execute across different devices , Sometimes you can run , But not the best , Also do not recommend ;

6. bindings yes tensorRT A description of the input-output tensor ,bindings = input-tensor + output-tensor. such as input Yes a,output Yes b, c, d, that bindings = [a, b, c, d],bindings[0] = a,bindings[2] = c;

7. enqueueV2 It's asynchronous reasoning , Add to stream Queue waiting to execute . Input bindings It is tensors The pointer to （ Note that device pointer）;

8. createExecutionContext It can be executed multiple times , Allow an engine to have multiple execution contexts .
————————————————
Copyright notice ： This paper is about CSDN Blogger 「 The source of the torrent 」 The original article of , follow CC 4.0 BY-SA Copyright agreement , For reprint, please attach the original source link and this statement .
Link to the original text ：https://blog.csdn.net/weicao1990/article/details/125034572

原网站

版权声明
本文为[AI vision netqi]所创，转载请带上原文链接，感谢
https://yzsam.com/2022/205/202207242036086839.html

当前位置：网站首页>"Iruntime": undeclared identifier

"Iruntime": undeclared identifier

边栏推荐

猜你喜欢

随机推荐