7 月之前 · bae4f11342
--- a/src/resnet.cu
+++ b/src/resnet.cu
@@ -218,6 +218,11 @@ struct AffineMatrix {
 
															 };
														
 
															+static __global__ classfier(float *predict, cudaStream_t stream)
														
 
															+{
														
 
															+
														
 
															+}
														
 
															+
														
 
															 class InferImpl : public Infer {
														
 
															  public:
														
 
															   shared_ptr<trt::Infer> trt_;
														
@@ -337,12 +342,15 @@ class InferImpl : public Infer {
 
															       return {};
														
 
															     }
														
 
															+    
														
 
															+    checkRuntime(cudaMemcpyAsync(output_array_.cpu(), output_array_.gpu(),
														
 
															+                                 output_array_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
														
 
															+    checkRuntime(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+    printf("size : %d\n", output_array_.cpu_size());
														
 
															     // for (int ib = 0; ib < num_image; ++ib) {
														
 
															-    //   float *boxarray_device = output_array_.gpu();
														
 
															+      
														
 
															     // }
														
 
															-    // checkRuntime(cudaMemcpyAsync(output_boxarray_.cpu(), output_boxarray_.gpu(),
														
 
															-    //                              output_boxarray_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
														
 
															-    // checkRuntime(cudaStreamSynchronize(stream_));
														
 
															     vector<Attribute> arrout(num_image);