未验证 提交 547a9ff8 编写于 作者: D dolphin8 提交者: GitHub

Merge pull request #1200 from dolphin8/opencl

fix softmax
......@@ -33,17 +33,17 @@ __kernel void softmax(__read_only image2d_t input_image,
maxv = max(maxv, max(temp.x, max(temp.y, max(temp.z, temp.w))));
}
half4 rsum = (half4)(0.0f);
half4 rsum = (half4)(0.0f);
for (int i = 0; i < group; ++i) {
half4 r = read_imageh(input_image, sampler, (int2)(i, 0));
rsum += exp(r - maxv);
rsum += convert_half4(exp(convert_float4(r - maxv)));
}
float sum = rsum.x + rsum.y + rsum.z + rsum.w;
half4 rr = read_imageh(input_image, sampler, (int2)(out_w, out_nh));
half4 result = exp(rr - maxv) / sum;
half4 result = convert_half4(exp(convert_float4(rr - maxv)) / sum);
write_imageh(output_image, (int2)(out_w, out_nh), result);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册