leon hai 2 meses
pai
achega
753d46366d
Modificáronse 2 ficheiros con 13 adicións e 2 borrados
  1. 9 2
      src/infer/trt/depth/depth.cu
  2. 4 0
      src/infer/trt/depth/depth.hpp

+ 9 - 2
src/infer/trt/depth/depth.cu

@@ -43,6 +43,9 @@ void DepthModelImpl::preprocess(const tensor::Image &image, affine::LetterBoxMat
     float *affine_matrix_device = affine_matrix_.gpu();
     float *affine_matrix_host = affine_matrix_.cpu();
 
+    float *invert_affine_matrix_device = invert_affine_matrix_.gpu();
+    float *invert_affine_matrix_host = invert_affine_matrix_.cpu();
+    
     // speed up
     cudaStream_t stream_ = (cudaStream_t)stream;
     memcpy(image_host, image.bgrptr, size_image);
@@ -52,6 +55,10 @@ void DepthModelImpl::preprocess(const tensor::Image &image, affine::LetterBoxMat
     memcpy(affine_matrix_host, affine.d2i, sizeof(affine.d2i));
     checkRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(affine.d2i),
                                 cudaMemcpyHostToDevice, stream_));
+    
+    memcpy(invert_affine_matrix_host, affine.i2d, sizeof(affine.i2d));
+    checkRuntime(cudaMemcpyAsync(invert_affine_matrix_device, invert_affine_matrix_host, sizeof(affine.i2d),
+                                cudaMemcpyHostToDevice, stream_));
 
     affine::warp_affine_bilinear_and_normalize_plane(image_device, image.width * 3, image.width,
                                             image.height, input_device, network_input_width_,
@@ -65,14 +72,14 @@ void DepthModelImpl::postprocess(int width, int height, void *stream)
     adjust_memory(width, height);
     
     cudaStream_t stream_ = (cudaStream_t)stream;
-    float *affine_matrix_device = affine_matrix_.gpu();
+    float *invert_affine_matrix_device = invert_affine_matrix_.gpu();
 
     float *image_device = output_buffer_.gpu();
     float *dst_device = depth_map_buffer_.gpu();
 
     affine::warp_affine_bilinear_single_channel_plane(
         image_device, network_input_width_, network_input_width_, network_input_height_,
-        dst_device, width, height, affine_matrix_device, 1000,
+        dst_device, width, height, invert_affine_matrix_device, 0,
         stream_);
 }
 

+ 4 - 0
src/infer/trt/depth/depth.hpp

@@ -31,6 +31,7 @@ namespace depth
         tensor::Memory<unsigned char> preprocess_buffer_;
     
         tensor::Memory<float> affine_matrix_;
+        tensor::Memory<float> invert_affine_matrix_;
         tensor::Memory<float> input_buffer_, output_buffer_;
         tensor::Memory<float> depth_map_buffer_;
     
@@ -61,6 +62,9 @@ namespace depth
 
             affine_matrix_.gpu(6);
             affine_matrix_.cpu(6);
+
+            invert_affine_matrix_.gpu(6);
+            invert_affine_matrix_.cpu(6);
         }
     
         void preprocess(const tensor::Image &image, affine::LetterBoxMatrix &affine, void *stream = nullptr);