123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- #include "infer/trt/depth/depth.hpp"
- #include <vector>
- #include <memory>
- #include "infer/trt/affine.hpp"
- #include "common/check.hpp"
- namespace depth
- {
- bool DepthModelImpl::load(const std::string &engine_file)
- {
- trt_ = TensorRT::load(engine_file);
- if (trt_ == nullptr) return false;
- trt_->print();
- auto input_dim = trt_->static_dims(0);
- network_input_width_ = input_dim[3];
- network_input_height_ = input_dim[2];
- isdynamic_model_ = trt_->has_dynamic_dim();
- normalize_ = affine::Norm::alpha_beta(1 / 255.0f, 0.0f, affine::ChannelType::SwapRB);
- return true;
- }
- void DepthModelImpl::preprocess(const tensor::Image &image, affine::LetterBoxMatrix &affine, void *stream)
- {
- affine.compute(std::make_tuple(image.width, image.height),
- std::make_tuple(network_input_width_, network_input_height_));
- float *input_device = input_buffer_.gpu();
- size_t size_image = image.width * image.height * 3;
-
- preprocess_buffer_.gpu(size_image);
- preprocess_buffer_.cpu(size_image);
-
- uint8_t *image_device = preprocess_buffer_.gpu();
- uint8_t *image_host = preprocess_buffer_.cpu();
- float *affine_matrix_device = affine_matrix_.gpu();
- float *affine_matrix_host = affine_matrix_.cpu();
- // speed up
- cudaStream_t stream_ = (cudaStream_t)stream;
- memcpy(image_host, image.bgrptr, size_image);
- checkRuntime(
- cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
- memcpy(affine_matrix_host, affine.d2i, sizeof(affine.d2i));
- checkRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(affine.d2i),
- cudaMemcpyHostToDevice, stream_));
- affine::warp_affine_bilinear_and_normalize_plane(image_device, image.width * 3, image.width,
- image.height, input_device, network_input_width_,
- network_input_height_, affine_matrix_device, 114,
- normalize_, stream_);
- }
- void DepthModelImpl::postprocess(int width, int height, void *stream)
- {
- adjust_memory(width, height);
-
- cudaStream_t stream_ = (cudaStream_t)stream;
- float *affine_matrix_device = affine_matrix_.gpu();
- float *image_device = output_buffer_.gpu();
- float *dst_device = depth_map_buffer_.gpu();
- affine::warp_affine_bilinear_single_channel_plane(
- image_device, network_input_width_, network_input_width_, network_input_height_,
- dst_device, width, height, affine_matrix_device, 1000,
- stream_);
- }
- Result DepthModelImpl::forward(const tensor::Image &image, int slice_width, int slice_height, float overlap_width_ratio, float overlap_height_ratio, void *stream)
- {
- return cv::Mat();
- }
- Result DepthModelImpl::forward(const tensor::Image &image, void *stream)
- {
- int num_image = 1;
- if (num_image == 0) return {};
-
- auto input_dims = trt_->static_dims(0);
- int infer_batch_size = input_dims[0];
- if (infer_batch_size != num_image)
- {
- if (isdynamic_model_)
- {
- infer_batch_size = num_image;
- input_dims[0] = num_image;
- if (!trt_->set_run_dims(0, input_dims))
- {
- printf("Fail to set run dims\n");
- return {};
- }
- }
- else
- {
- if (infer_batch_size < num_image)
- {
- printf(
- "When using static shape model, number of images[%d] must be "
- "less than or equal to the maximum batch[%d].",
- num_image, infer_batch_size);
- return {};
- }
- }
- }
- adjust_memory();
- affine::LetterBoxMatrix affine_matrix;
- cudaStream_t stream_ = (cudaStream_t)stream;
- preprocess(image, affine_matrix, stream);
- #ifdef TRT10
- if (!trt_->forward(std::unordered_map<std::string, const void *>{
- { "input", input_buffer_.gpu() },
- { "output", output_buffer_.gpu() }
- }, stream_))
- {
- printf("Failed to tensorRT forward.\n");
- return cv::Mat();
- }
- #else
- std::vector<void *> bindings{input_buffer_.gpu(), output_buffer_.gpu()};
- if (!trt_->forward(bindings, stream))
- {
- printf("Failed to tensorRT forward.");
- return cv::Mat();
- }
- #endif
- adjust_memory(image.width, image.height);
- postprocess(image.width, image.height, stream);
- checkRuntime(cudaMemcpyAsync(depth_map_buffer_.cpu(), depth_map_buffer_.gpu(),
- depth_map_buffer_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
- checkRuntime(cudaStreamSynchronize(stream_));
- cv::Mat depth_mat(image.height, image.width, CV_32FC1, depth_map_buffer_.cpu());
- return depth_mat;
- }
- Infer *loadraw(const std::string &engine_file)
- {
- DepthModelImpl *impl = new DepthModelImpl();
- if (!impl->load(engine_file))
- {
- delete impl;
- impl = nullptr;
- }
- return impl;
- }
- std::shared_ptr<Infer> load_depth(const std::string &engine_file, int gpu_id)
- {
- checkRuntime(cudaSetDevice(gpu_id));
- return std::shared_ptr<DepthModelImpl>((DepthModelImpl *)loadraw(engine_file));
- }
- } // namespace depth
|