leon 7 hónapja
szülő
commit
3114df9d5d
1 módosított fájl, 5 hozzáadás és 2 törlés
  1. 5 2
      src/resnet.cu

+ 5 - 2
src/resnet.cu

@@ -78,6 +78,7 @@ static dim3 block_dims(int numJobs) {
   return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
 }
 
+inline int upbound(int n, int align = 32) { return (n + align - 1) / align * align; }
 
 static __global__ void warp_affine_bilinear_and_normalize_plane_kernel(
     uint8_t *src, int src_line_size, int src_width, int src_height, float *dst, int dst_width,
@@ -161,6 +162,8 @@ static __global__ void warp_affine_bilinear_and_normalize_plane_kernel(
   *pdst_c2 = c2;
 }
 
+
+
 static void warp_affine_bilinear_and_normalize_plane(uint8_t *src, int src_line_size, int src_width,
                                                      int src_height, float *dst, int dst_width,
                                                      int dst_height, float *matrix_2_3,
@@ -232,8 +235,8 @@ class InferImpl : public Infer {
     // the inference batch_size
     size_t input_numel = network_input_width_ * network_input_height_ * 3;
     input_buffer_.gpu(batch_size * input_numel);
-    output_boxarray_.gpu(batch_size * num_classes_);
-    output_boxarray_.cpu(batch_size * num_classes_);
+    output_array_.gpu(batch_size * num_classes_);
+    output_array_.cpu(batch_size * num_classes_);
 
 
     if ((int)preprocess_buffers_.size() < batch_size) {