|
@@ -43,6 +43,9 @@ void DepthModelImpl::preprocess(const tensor::Image &image, affine::LetterBoxMat
|
|
|
float *affine_matrix_device = affine_matrix_.gpu();
|
|
|
float *affine_matrix_host = affine_matrix_.cpu();
|
|
|
|
|
|
+ float *invert_affine_matrix_device = invert_affine_matrix_.gpu();
|
|
|
+ float *invert_affine_matrix_host = invert_affine_matrix_.cpu();
|
|
|
+
|
|
|
// speed up
|
|
|
cudaStream_t stream_ = (cudaStream_t)stream;
|
|
|
memcpy(image_host, image.bgrptr, size_image);
|
|
@@ -52,6 +55,10 @@ void DepthModelImpl::preprocess(const tensor::Image &image, affine::LetterBoxMat
|
|
|
memcpy(affine_matrix_host, affine.d2i, sizeof(affine.d2i));
|
|
|
checkRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(affine.d2i),
|
|
|
cudaMemcpyHostToDevice, stream_));
|
|
|
+
|
|
|
+ memcpy(invert_affine_matrix_host, affine.i2d, sizeof(affine.i2d));
|
|
|
+ checkRuntime(cudaMemcpyAsync(invert_affine_matrix_device, invert_affine_matrix_host, sizeof(affine.i2d),
|
|
|
+ cudaMemcpyHostToDevice, stream_));
|
|
|
|
|
|
affine::warp_affine_bilinear_and_normalize_plane(image_device, image.width * 3, image.width,
|
|
|
image.height, input_device, network_input_width_,
|
|
@@ -65,14 +72,14 @@ void DepthModelImpl::postprocess(int width, int height, void *stream)
|
|
|
adjust_memory(width, height);
|
|
|
|
|
|
cudaStream_t stream_ = (cudaStream_t)stream;
|
|
|
- float *affine_matrix_device = affine_matrix_.gpu();
|
|
|
+ float *invert_affine_matrix_device = invert_affine_matrix_.gpu();
|
|
|
|
|
|
float *image_device = output_buffer_.gpu();
|
|
|
float *dst_device = depth_map_buffer_.gpu();
|
|
|
|
|
|
affine::warp_affine_bilinear_single_channel_plane(
|
|
|
image_device, network_input_width_, network_input_width_, network_input_height_,
|
|
|
- dst_device, width, height, affine_matrix_device, 1000,
|
|
|
+ dst_device, width, height, invert_affine_matrix_device, 0,
|
|
|
stream_);
|
|
|
}
|
|
|
|