|
@@ -218,6 +218,11 @@ struct AffineMatrix {
|
|
};
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
+static __global__ classfier(float *predict, cudaStream_t stream)
|
|
|
|
+{
|
|
|
|
+
|
|
|
|
+}
|
|
|
|
+
|
|
class InferImpl : public Infer {
|
|
class InferImpl : public Infer {
|
|
public:
|
|
public:
|
|
shared_ptr<trt::Infer> trt_;
|
|
shared_ptr<trt::Infer> trt_;
|
|
@@ -337,12 +342,15 @@ class InferImpl : public Infer {
|
|
return {};
|
|
return {};
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+
|
|
|
|
+ checkRuntime(cudaMemcpyAsync(output_array_.cpu(), output_array_.gpu(),
|
|
|
|
+ output_array_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
|
|
|
|
+ checkRuntime(cudaStreamSynchronize(stream_));
|
|
|
|
+
|
|
|
|
+ printf("size : %d\n", output_array_.cpu_size());
|
|
// for (int ib = 0; ib < num_image; ++ib) {
|
|
// for (int ib = 0; ib < num_image; ++ib) {
|
|
- // float *boxarray_device = output_array_.gpu();
|
|
|
|
|
|
+
|
|
// }
|
|
// }
|
|
- // checkRuntime(cudaMemcpyAsync(output_boxarray_.cpu(), output_boxarray_.gpu(),
|
|
|
|
- // output_boxarray_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
|
|
|
|
- // checkRuntime(cudaStreamSynchronize(stream_));
|
|
|
|
|
|
|
|
vector<Attribute> arrout(num_image);
|
|
vector<Attribute> arrout(num_image);
|
|
|
|
|