Browse Source

add depth

leon 1 month ago
parent
commit
466f816f7f
5 changed files with 162 additions and 23 deletions
  1. 3 1
      .vscode/settings.json
  2. 13 0
      src/infer/trt/affine.cu
  3. 116 1
      src/infer/trt/depth_any/depth.cu
  4. 22 5
      src/infer/trt/depth_any/depth.hpp
  5. 8 16
      src/main.cpp

+ 3 - 1
.vscode/settings.json

@@ -56,6 +56,8 @@
         "tuple": "cpp",
         "tuple": "cpp",
         "future": "cpp",
         "future": "cpp",
         "iomanip": "cpp",
         "iomanip": "cpp",
-        "fstream": "cpp"
+        "fstream": "cpp",
+        "iterator": "cpp",
+        "utility": "cpp"
     }
     }
 }
 }

+ 13 - 0
src/infer/trt/affine.cu

@@ -135,4 +135,17 @@ void warp_affine_bilinear_and_normalize_plane(uint8_t *src, int src_line_size, i
         matrix_2_3, norm));
         matrix_2_3, norm));
 }
 }
 
 
+void warp_affine_bilinear_single_channel_plane(float *src, int src_line_size, int src_width,
+    int src_height, float *dst, int dst_width,
+    int dst_height, float *matrix_2_3,
+    float const_value, cudaStream_t stream) 
+{
+    dim3 grid((dst_width + 31) / 32, (dst_height + 31) / 32);
+    dim3 block(32, 32);
+
+    checkKernel(warp_affine_bilinear_single_channel_kernel<<<grid, block, 0, stream>>>(
+    src, src_line_size, src_width, src_height, dst, dst_width, dst_height, const_value,
+    matrix_2_3));
+}
+
 } // namespace affine
 } // namespace affine

+ 116 - 1
src/infer/trt/depth_any/depth.cu

@@ -17,7 +17,6 @@ bool DepthModelImpl::load(const std::string &engine_file, ModelType model_type,
     trt_->print();
     trt_->print();
 
 
     auto input_dim = trt_->static_dims(0);
     auto input_dim = trt_->static_dims(0);
-    bbox_head_dims_ = trt_->static_dims(1);
     network_input_width_ = input_dim[3];
     network_input_width_ = input_dim[3];
     network_input_height_ = input_dim[2];
     network_input_height_ = input_dim[2];
     isdynamic_model_ = trt_->has_dynamic_dim();
     isdynamic_model_ = trt_->has_dynamic_dim();
@@ -26,5 +25,121 @@ bool DepthModelImpl::load(const std::string &engine_file, ModelType model_type,
     return true;
     return true;
 }
 }
 
 
+void DepthModelImpl::preprocess(const tensor::Image &image, affine::LetterBoxMatrix &affine, void *stream)
+{
+    affine.compute(std::make_tuple(image.width, image.height),
+                std::make_tuple(network_input_width_, network_input_height_));
+
+    size_t input_numel = network_input_width_ * network_input_height_ * 3;
+    float *input_device = input_buffer_.gpu();
+    size_t size_image = image.width * image.height * 3;
+    
+    preprocess_buffer_.gpu(size_image);
+    preprocess_buffer_.cpu(size_image);
+   
+    uint8_t *image_device = preprocess_buffer_.gpu();
+    uint8_t *image_host   = preprocess_buffer_.cpu();
+
+    float *affine_matrix_device = affine_matrix_.gpu();
+    float *affine_matrix_host = affine_matrix_.cpu();
+
+    // speed up
+    cudaStream_t stream_ = (cudaStream_t)stream;
+    memcpy(image_host, image.bgrptr, size_image);
+    checkRuntime(
+        cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
+
+    memcpy(affine_matrix_host, affine.d2i, sizeof(affine.d2i));
+    checkRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(affine.d2i),
+                                cudaMemcpyHostToDevice, stream_));
+
+    affine::warp_affine_bilinear_and_normalize_plane(image_device, image.width * 3, image.width,
+                                            image.height, input_device, network_input_width_,
+                                            network_input_height_, affine_matrix_device, 114,
+                                            normalize_, stream_);
+}
+
+
+void DepthModelImpl::postprocess(int width, int height, void *stream)
+{
+    adjust_memory(width, height);
+
+    size_t size_matrix = sizeof(affine.d2i);
+    
+    cudaStream_t stream_ = (cudaStream_t)stream;
+    float *affine_matrix_device = affine_matrix_.gpu();
+
+    float *image_device = output_buffer_.gpu();
+    float *dst_device = depth_map_buffer_.gpu();
+
+    affine::warp_affine_bilinear_single_channel_plane(
+        image_device, network_input_width_, network_input_width_, network_input_height_,
+        dst_device, width, height, affine_matrix_device, 1000,
+        stream_);
+}
+
+
+cv::Mat DepthModelImpl::forward(const tensor::Image &image, void *stream)
+{
+    int num_image = 1;
+    if (num_image == 0) return {};
+    
+    auto input_dims = trt_->static_dims(0);
+    int infer_batch_size = input_dims[0];
+    if (infer_batch_size != num_image) 
+    {
+        if (isdynamic_model_) 
+        {
+            infer_batch_size = num_image;
+            input_dims[0] = num_image;
+            if (!trt_->set_run_dims(0, input_dims)) 
+            {
+                printf("Fail to set run dims\n");
+                return {};
+            }
+        } 
+        else 
+        {
+            if (infer_batch_size < num_image) 
+            {
+                printf(
+                    "When using static shape model, number of images[%d] must be "
+                    "less than or equal to the maximum batch[%d].",
+                    num_image, infer_batch_size);
+                return {};
+            }
+        }
+    }
+    adjust_memory();
+    affine::LetterBoxMatrix affine_matrix;
+    cudaStream_t stream_ = (cudaStream_t)stream;
+    preprocess(image, affine_matrix, stream);
+
+    #ifdef TRT10
+    if (!trt_->forward(std::unordered_map<std::string, const void *>{
+            { "input", input_buffer_.gpu() }, 
+            { "output", bbox_predict_.gpu() }
+        }, stream_))
+    {
+        printf("Failed to tensorRT forward.");
+        return {};
+    }
+    #else
+    std::vector<void *> bindings{input_buffer_.gpu(), depth_map_buffer_.gpu()};
+    if (!trt_->forward(bindings, stream)) 
+    {
+        printf("Failed to tensorRT forward.");
+        return cv::Mat();
+    }
+    #endif
+
+    postprocess(image.width, image.height, stream);
+
+    checkRuntime(cudaMemcpyAsync(depth_map_buffer_.cpu(), depth_map_buffer_.gpu(),
+                                    depth_map_buffer_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
+    checkRuntime(cudaStreamSynchronize(stream_));
+    return cv::Mat();
+}
+
 
 
 }   // namespace depth
 }   // namespace depth

+ 22 - 5
src/infer/trt/depth_any/depth.hpp

@@ -19,11 +19,12 @@ namespace depth
     
     
         std::shared_ptr<TensorRT::Engine> trt_;
         std::shared_ptr<TensorRT::Engine> trt_;
         std::string engine_file_;
         std::string engine_file_;
-    
-        tensor::Memory<int> box_count_;
+        
+        tensor::Memory<unsigned char> preprocess_buffer_;
     
     
         tensor::Memory<float> affine_matrix_;
         tensor::Memory<float> affine_matrix_;
-        tensor::Memory<float>  input_buffer_, bbox_predict_, output_boxarray_;
+        tensor::Memory<float> input_buffer_, output_buffer_;
+        tensor::Memory<float> depth_map_buffer_;
     
     
         int network_input_width_, network_input_height_;
         int network_input_width_, network_input_height_;
         affine::Norm normalize_;
         affine::Norm normalize_;
@@ -34,9 +35,25 @@ namespace depth
     
     
         virtual ~DepthModelImpl() = default;
         virtual ~DepthModelImpl() = default;
     
     
-        void adjust_memory(int batch_size);
+        void adjust_memory(int width, int height)
+        {
+            depth_map_buffer_.gpu(width * height);
+            depth_map_buffer_.cpu(width * height);
+        }
+
+
+        void adjust_memory() 
+        {
+            // the inference batch_size
+            size_t input_numel = network_input_width_ * network_input_height_ * 3;
+            input_buffer_.gpu(batch_size * input_numel);
+
+            output_buffer_.gpu(batch_size * input_numel / 3);
+            output_buffer_.cpu(batch_size * input_numel / 3);
+        }
     
     
-        void preprocess(int ibatch, affine::LetterBoxMatrix &affine, void *stream = nullptr);
+        void preprocess(const tensor::Image &image, affine::LetterBoxMatrix &affine, void *stream = nullptr);
+        void postprocess(int width, int height, void *stream = nullptr)
         
         
     
     
         bool load(const std::string &engine_file, ModelType model_type, const std::vector<std::string>& names, float confidence_threshold, float nms_threshold);
         bool load(const std::string &engine_file, ModelType model_type, const std::vector<std::string>& names, float confidence_threshold, float nms_threshold);

+ 8 - 16
src/main.cpp

@@ -7,22 +7,6 @@
 
 
 int main()
 int main()
 {
 {
-    // create trt model
-    // std::vector<std::string> names = {
-    //     "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
-    //     "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
-    //     "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
-    //     "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
-    //     "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
-    //     "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
-    //     "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
-    //     "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa",
-    //     "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
-    //     "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
-    //     "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier",
-    //     "toothbrush"
-    // }; 
-
     std::vector<std::string> names = { "person", "clothes", "vest" };
     std::vector<std::string> names = { "person", "clothes", "vest" };
 
 
     std::shared_ptr<Node::StreamNode> src_node0   = std::make_shared<Node::StreamNode>("src0", "rtsp://admin:lww123456@172.16.22.16:554/Streaming/Channels/101", 0, Node::DecodeType::GPU);
     std::shared_ptr<Node::StreamNode> src_node0   = std::make_shared<Node::StreamNode>("src0", "rtsp://admin:lww123456@172.16.22.16:554/Streaming/Channels/101", 0, Node::DecodeType::GPU);
@@ -57,6 +41,14 @@ int main()
 
 
 
 
 // TODO
 // TODO
+// 硬解码、软解码    完成 软解使用opencv + ffmpeg,硬解使用nvcodec + ffmpeg
+// 模型复用         完成,基类加锁保证一致性
+// 画图节点         完成
+// 推送节点         基本完成
+
+// 分析节点         
+// depth-anything    进行中
+// YOLO11 seg 
 // 通过配置文件创建 pipeline
 // 通过配置文件创建 pipeline
 // 日志
 // 日志
 // 设置电子围栏
 // 设置电子围栏