|
@@ -78,6 +78,7 @@ static dim3 block_dims(int numJobs) {
|
|
return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
|
|
return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+inline int upbound(int n, int align = 32) { return (n + align - 1) / align * align; }
|
|
|
|
|
|
static __global__ void warp_affine_bilinear_and_normalize_plane_kernel(
|
|
static __global__ void warp_affine_bilinear_and_normalize_plane_kernel(
|
|
uint8_t *src, int src_line_size, int src_width, int src_height, float *dst, int dst_width,
|
|
uint8_t *src, int src_line_size, int src_width, int src_height, float *dst, int dst_width,
|
|
@@ -161,6 +162,8 @@ static __global__ void warp_affine_bilinear_and_normalize_plane_kernel(
|
|
*pdst_c2 = c2;
|
|
*pdst_c2 = c2;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+
|
|
|
|
+
|
|
static void warp_affine_bilinear_and_normalize_plane(uint8_t *src, int src_line_size, int src_width,
|
|
static void warp_affine_bilinear_and_normalize_plane(uint8_t *src, int src_line_size, int src_width,
|
|
int src_height, float *dst, int dst_width,
|
|
int src_height, float *dst, int dst_width,
|
|
int dst_height, float *matrix_2_3,
|
|
int dst_height, float *matrix_2_3,
|
|
@@ -232,8 +235,8 @@ class InferImpl : public Infer {
|
|
// the inference batch_size
|
|
// the inference batch_size
|
|
size_t input_numel = network_input_width_ * network_input_height_ * 3;
|
|
size_t input_numel = network_input_width_ * network_input_height_ * 3;
|
|
input_buffer_.gpu(batch_size * input_numel);
|
|
input_buffer_.gpu(batch_size * input_numel);
|
|
- output_boxarray_.gpu(batch_size * num_classes_);
|
|
|
|
- output_boxarray_.cpu(batch_size * num_classes_);
|
|
|
|
|
|
+ output_array_.gpu(batch_size * num_classes_);
|
|
|
|
+ output_array_.cpu(batch_size * num_classes_);
|
|
|
|
|
|
|
|
|
|
if ((int)preprocess_buffers_.size() < batch_size) {
|
|
if ((int)preprocess_buffers_.size() < batch_size) {
|