|
@@ -245,7 +245,7 @@ static __global__ void softmax(float *predict, int length)
|
|
|
// 2. 计算指数并求和
|
|
|
float sum_exp = 0.0f;
|
|
|
for (int i = tid; i < length; i += blockDim.x) {
|
|
|
- predict[i] = expf(data[i] - max_val);
|
|
|
+ predict[i] = expf(predict[i] - max_val);
|
|
|
sum_exp += predict[i];
|
|
|
}
|
|
|
shared_data[tid] = sum_exp;
|
|
@@ -393,7 +393,7 @@ class InferImpl : public Infer {
|
|
|
|
|
|
for (int ib = 0; ib < num_image; ++ib) {
|
|
|
float *output_array_device = output_array_.gpu() + ib * num_classes_;
|
|
|
- checkRuntime(classfier_softmax(output_array_device, num_classes_));
|
|
|
+ checkRuntime(classfier_softmax(output_array_device, num_classes_, stream_));
|
|
|
}
|
|
|
|
|
|
|
|
@@ -405,7 +405,7 @@ class InferImpl : public Infer {
|
|
|
float *output_array_cpu = output_array_.cpu() + ib * num_classes_;
|
|
|
for (int i = 0; i < num_classes_; i++)
|
|
|
{
|
|
|
- printf("prob : %f\t", *(output_array_cpu+i))
|
|
|
+ printf("prob : %f\t", *(output_array_cpu+i));
|
|
|
}
|
|
|
}
|
|
|
|