5 月之前 · 466f816f7f
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -56,6 +56,8 @@
 
															         "tuple": "cpp",
														
 
															         "future": "cpp",
														
 
															         "iomanip": "cpp",
														
 
															-        "fstream": "cpp"
														
 
															+        "fstream": "cpp",
														
 
															+        "iterator": "cpp",
														
 
															+        "utility": "cpp"
														
 
															     }
														
 
															 }
														
--- a/src/infer/trt/affine.cu
+++ b/src/infer/trt/affine.cu
@@ -135,4 +135,17 @@ void warp_affine_bilinear_and_normalize_plane(uint8_t *src, int src_line_size, i
 
															         matrix_2_3, norm));
														
 
															 }
														
 
															+void warp_affine_bilinear_single_channel_plane(float *src, int src_line_size, int src_width,
														
 
															+    int src_height, float *dst, int dst_width,
														
 
															+    int dst_height, float *matrix_2_3,
														
 
															+    float const_value, cudaStream_t stream) 
														
 
															+{
														
 
															+    dim3 grid((dst_width + 31) / 32, (dst_height + 31) / 32);
														
 
															+    dim3 block(32, 32);
														
 
															+
														
 
															+    checkKernel(warp_affine_bilinear_single_channel_kernel<<<grid, block, 0, stream>>>(
														
 
															+    src, src_line_size, src_width, src_height, dst, dst_width, dst_height, const_value,
														
 
															+    matrix_2_3));
														
 
															+}
														
 
															+
														
 
															 } // namespace affine
														
--- a/src/infer/trt/depth_any/depth.cu
+++ b/src/infer/trt/depth_any/depth.cu
@@ -17,7 +17,6 @@ bool DepthModelImpl::load(const std::string &engine_file, ModelType model_type,
 
															     trt_->print();
														
 
															     auto input_dim = trt_->static_dims(0);
														
 
															-    bbox_head_dims_ = trt_->static_dims(1);
														
 
															     network_input_width_ = input_dim[3];
														
 
															     network_input_height_ = input_dim[2];
														
 
															     isdynamic_model_ = trt_->has_dynamic_dim();
														
@@ -26,5 +25,121 @@ bool DepthModelImpl::load(const std::string &engine_file, ModelType model_type,
 
															     return true;
														
 
															 }
														
 
															+void DepthModelImpl::preprocess(const tensor::Image &image, affine::LetterBoxMatrix &affine, void *stream)
														
 
															+{
														
 
															+    affine.compute(std::make_tuple(image.width, image.height),
														
 
															+                std::make_tuple(network_input_width_, network_input_height_));
														
 
															+
														
 
															+    size_t input_numel = network_input_width_ * network_input_height_ * 3;
														
 
															+    float *input_device = input_buffer_.gpu();
														
 
															+    size_t size_image = image.width * image.height * 3;
														
 
															+    
														
 
															+    preprocess_buffer_.gpu(size_image);
														
 
															+    preprocess_buffer_.cpu(size_image);
														
 
															+   
														
 
															+    uint8_t *image_device = preprocess_buffer_.gpu();
														
 
															+    uint8_t *image_host   = preprocess_buffer_.cpu();
														
 
															+
														
 
															+    float *affine_matrix_device = affine_matrix_.gpu();
														
 
															+    float *affine_matrix_host = affine_matrix_.cpu();
														
 
															+
														
 
															+    // speed up
														
 
															+    cudaStream_t stream_ = (cudaStream_t)stream;
														
 
															+    memcpy(image_host, image.bgrptr, size_image);
														
 
															+    checkRuntime(
														
 
															+        cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
														
 
															+
														
 
															+    memcpy(affine_matrix_host, affine.d2i, sizeof(affine.d2i));
														
 
															+    checkRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(affine.d2i),
														
 
															+                                cudaMemcpyHostToDevice, stream_));
														
 
															+
														
 
															+    affine::warp_affine_bilinear_and_normalize_plane(image_device, image.width * 3, image.width,
														
 
															+                                            image.height, input_device, network_input_width_,
														
 
															+                                            network_input_height_, affine_matrix_device, 114,
														
 
															+                                            normalize_, stream_);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+void DepthModelImpl::postprocess(int width, int height, void *stream)
														
 
															+{
														
 
															+    adjust_memory(width, height);
														
 
															+
														
 
															+    size_t size_matrix = sizeof(affine.d2i);
														
 
															+    
														
 
															+    cudaStream_t stream_ = (cudaStream_t)stream;
														
 
															+    float *affine_matrix_device = affine_matrix_.gpu();
														
 
															+
														
 
															+    float *image_device = output_buffer_.gpu();
														
 
															+    float *dst_device = depth_map_buffer_.gpu();
														
 
															+
														
 
															+    affine::warp_affine_bilinear_single_channel_plane(
														
 
															+        image_device, network_input_width_, network_input_width_, network_input_height_,
														
 
															+        dst_device, width, height, affine_matrix_device, 1000,
														
 
															+        stream_);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+cv::Mat DepthModelImpl::forward(const tensor::Image &image, void *stream)
														
 
															+{
														
 
															+    int num_image = 1;
														
 
															+    if (num_image == 0) return {};
														
 
															+    
														
 
															+    auto input_dims = trt_->static_dims(0);
														
 
															+    int infer_batch_size = input_dims[0];
														
 
															+    if (infer_batch_size != num_image) 
														
 
															+    {
														
 
															+        if (isdynamic_model_) 
														
 
															+        {
														
 
															+            infer_batch_size = num_image;
														
 
															+            input_dims[0] = num_image;
														
 
															+            if (!trt_->set_run_dims(0, input_dims)) 
														
 
															+            {
														
 
															+                printf("Fail to set run dims\n");
														
 
															+                return {};
														
 
															+            }
														
 
															+        } 
														
 
															+        else 
														
 
															+        {
														
 
															+            if (infer_batch_size < num_image) 
														
 
															+            {
														
 
															+                printf(
														
 
															+                    "When using static shape model, number of images[%d] must be "
														
 
															+                    "less than or equal to the maximum batch[%d].",
														
 
															+                    num_image, infer_batch_size);
														
 
															+                return {};
														
 
															+            }
														
 
															+        }
														
 
															+    }
														
 
															+    adjust_memory();
														
 
															+    affine::LetterBoxMatrix affine_matrix;
														
 
															+    cudaStream_t stream_ = (cudaStream_t)stream;
														
 
															+    preprocess(image, affine_matrix, stream);
														
 
															+
														
 
															+    #ifdef TRT10
														
 
															+    if (!trt_->forward(std::unordered_map<std::string, const void *>{
														
 
															+            { "input", input_buffer_.gpu() }, 
														
 
															+            { "output", bbox_predict_.gpu() }
														
 
															+        }, stream_))
														
 
															+    {
														
 
															+        printf("Failed to tensorRT forward.");
														
 
															+        return {};
														
 
															+    }
														
 
															+    #else
														
 
															+    std::vector<void *> bindings{input_buffer_.gpu(), depth_map_buffer_.gpu()};
														
 
															+    if (!trt_->forward(bindings, stream)) 
														
 
															+    {
														
 
															+        printf("Failed to tensorRT forward.");
														
 
															+        return cv::Mat();
														
 
															+    }
														
 
															+    #endif
														
 
															+
														
 
															+    postprocess(image.width, image.height, stream);
														
 
															+
														
 
															+    checkRuntime(cudaMemcpyAsync(depth_map_buffer_.cpu(), depth_map_buffer_.gpu(),
														
 
															+                                    depth_map_buffer_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
														
 
															+    checkRuntime(cudaStreamSynchronize(stream_));
														
 
															+    return cv::Mat();
														
 
															+}
														
 
															+
														
 
															 }   // namespace depth
														
--- a/src/infer/trt/depth_any/depth.hpp
+++ b/src/infer/trt/depth_any/depth.hpp
@@ -19,11 +19,12 @@ namespace depth
 
															         std::shared_ptr<TensorRT::Engine> trt_;
														
 
															         std::string engine_file_;
														
 
															-    
														
 
															-        tensor::Memory<int> box_count_;
														
 
															+        
														
 
															+        tensor::Memory<unsigned char> preprocess_buffer_;
														
 
															         tensor::Memory<float> affine_matrix_;
														
 
															-        tensor::Memory<float>  input_buffer_, bbox_predict_, output_boxarray_;
														
 
															+        tensor::Memory<float> input_buffer_, output_buffer_;
														
 
															+        tensor::Memory<float> depth_map_buffer_;
														
 
															         int network_input_width_, network_input_height_;
														
 
															         affine::Norm normalize_;
														
@@ -34,9 +35,25 @@ namespace depth
 
															         virtual ~DepthModelImpl() = default;
														
 
															-        void adjust_memory(int batch_size);
														
 
															+        void adjust_memory(int width, int height)
														
 
															+        {
														
 
															+            depth_map_buffer_.gpu(width * height);
														
 
															+            depth_map_buffer_.cpu(width * height);
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+        void adjust_memory() 
														
 
															+        {
														
 
															+            // the inference batch_size
														
 
															+            size_t input_numel = network_input_width_ * network_input_height_ * 3;
														
 
															+            input_buffer_.gpu(batch_size * input_numel);
														
 
															+
														
 
															+            output_buffer_.gpu(batch_size * input_numel / 3);
														
 
															+            output_buffer_.cpu(batch_size * input_numel / 3);
														
 
															+        }
														
 
															-        void preprocess(int ibatch, affine::LetterBoxMatrix &affine, void *stream = nullptr);
														
 
															+        void preprocess(const tensor::Image &image, affine::LetterBoxMatrix &affine, void *stream = nullptr);
														
 
															+        void postprocess(int width, int height, void *stream = nullptr)
														
 
															         bool load(const std::string &engine_file, ModelType model_type, const std::vector<std::string>& names, float confidence_threshold, float nms_threshold);
														
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -7,22 +7,6 @@
 
															 int main()
														
 
															 {
														
 
															-    // create trt model
														
 
															-    // std::vector<std::string> names = {
														
 
															-    //     "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
														
 
															-    //     "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
														
 
															-    //     "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
														
 
															-    //     "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
														
 
															-    //     "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
														
 
															-    //     "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
														
 
															-    //     "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
														
 
															-    //     "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa",
														
 
															-    //     "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
														
 
															-    //     "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
														
 
															-    //     "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier",
														
 
															-    //     "toothbrush"
														
 
															-    // }; 
														
 
															-
														
 
															     std::vector<std::string> names = { "person", "clothes", "vest" };
														
 
															     std::shared_ptr<Node::StreamNode> src_node0   = std::make_shared<Node::StreamNode>("src0", "rtsp://admin:lww123456@172.16.22.16:554/Streaming/Channels/101", 0, Node::DecodeType::GPU);
														
@@ -57,6 +41,14 @@ int main()
 
															 // TODO
														
 
															+// 硬解码、软解码    完成 软解使用opencv + ffmpeg，硬解使用nvcodec + ffmpeg
														
 
															+// 模型复用         完成，基类加锁保证一致性
														
 
															+// 画图节点         完成
														
 
															+// 推送节点         基本完成
														
 
															+
														
 
															+// 分析节点         
														
 
															+// depth-anything    进行中
														
 
															+// YOLO11 seg 
														
 
															 // 通过配置文件创建 pipeline
														
 
															 // 日志
														
 
															 // 设置电子围栏