Quellcode durchsuchen

继续完善代码

leon vor 7 Monaten
Ursprung
Commit
bae4f11342
1 geänderte Dateien mit 12 neuen und 4 gelöschten Zeilen
  1. 12 4
      src/resnet.cu

+ 12 - 4
src/resnet.cu

@@ -218,6 +218,11 @@ struct AffineMatrix {
 };
 
 
+static __global__ classfier(float *predict, cudaStream_t stream)
+{
+
+}
+
 class InferImpl : public Infer {
  public:
   shared_ptr<trt::Infer> trt_;
@@ -337,12 +342,15 @@ class InferImpl : public Infer {
       return {};
     }
 
+    
+    checkRuntime(cudaMemcpyAsync(output_array_.cpu(), output_array_.gpu(),
+                                 output_array_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
+    checkRuntime(cudaStreamSynchronize(stream_));
+
+    printf("size : %d\n", output_array_.cpu_size());
     // for (int ib = 0; ib < num_image; ++ib) {
-    //   float *boxarray_device = output_array_.gpu();
+      
     // }
-    // checkRuntime(cudaMemcpyAsync(output_boxarray_.cpu(), output_boxarray_.gpu(),
-    //                              output_boxarray_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
-    // checkRuntime(cudaStreamSynchronize(stream_));
 
     vector<Attribute> arrout(num_image);