提交 e033569d 编写于 作者: W wanghaoshuang

Fix format

上级 eb3c774b
...@@ -12,14 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,14 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "hl_base.h"
#include "SwitchOp.h" #include "SwitchOp.h"
#include "hl_base.h"
namespace paddle { namespace paddle {
__global__ void KeNCHW2NHWC(real* outputs, const real* inputs, __global__ void KeNCHW2NHWC(real* outputs,
int inC, int inH, int inW, const real* inputs,
int nthreads, int argType) { int inC,
int inH,
int inW,
int nthreads,
int argType) {
const int idx = threadIdx.x + blockIdx.x * blockDim.x; const int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < nthreads) { if (idx < nthreads) {
const int w = idx % inW; const int w = idx % inW;
...@@ -27,7 +31,7 @@ __global__ void KeNCHW2NHWC(real* outputs, const real* inputs, ...@@ -27,7 +31,7 @@ __global__ void KeNCHW2NHWC(real* outputs, const real* inputs,
const int c = (idx / inW / inH) % inC; const int c = (idx / inW / inH) % inC;
const int n = idx / inW / inH / inC; const int n = idx / inW / inH / inC;
const int off = ((n * inH + h) * inW + w) * inC +c; const int off = ((n * inH + h) * inW + w) * inC + c;
if (argType == ADD_TO) { if (argType == ADD_TO) {
outputs[off] += inputs[idx]; outputs[off] += inputs[idx];
} else { } else {
...@@ -38,23 +42,27 @@ __global__ void KeNCHW2NHWC(real* outputs, const real* inputs, ...@@ -38,23 +42,27 @@ __global__ void KeNCHW2NHWC(real* outputs, const real* inputs,
template <> template <>
void NCHW2NHWC<DEVICE_TYPE_GPU>(real* outputs, void NCHW2NHWC<DEVICE_TYPE_GPU>(real* outputs,
const real* inputs, const real* inputs,
const int num, const int num,
const int inC, const int inC,
const int inH, const int inH,
const int inW, const int inW,
const int argType) { const int argType) {
size_t nth = num * inC * inH * inW; size_t nth = num * inC * inH * inW;
int blockSize = 1024; int blockSize = 1024;
int gridSize = (nth + 1024 - 1) / 1024; int gridSize = (nth + 1024 - 1) / 1024;
KeNCHW2NHWC<<<gridSize, blockSize, 0, STREAM_DEFAULT>>> KeNCHW2NHWC<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>(
(outputs, inputs, inC, inH, inW, nth, argType); outputs, inputs, inC, inH, inW, nth, argType);
CHECK_SYNC("NCHW2NHWC"); CHECK_SYNC("NCHW2NHWC");
} }
__global__ void KeNHWC2NCHW(real* outputs, const real* inputs, __global__ void KeNHWC2NCHW(real* outputs,
int inH, int inW, int inC, const real* inputs,
int nthreads, int argType) { int inH,
int inW,
int inC,
int nthreads,
int argType) {
const int idx = threadIdx.x + blockIdx.x * blockDim.x; const int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < nthreads) { if (idx < nthreads) {
const int c = idx % inC; const int c = idx % inC;
...@@ -73,17 +81,17 @@ __global__ void KeNHWC2NCHW(real* outputs, const real* inputs, ...@@ -73,17 +81,17 @@ __global__ void KeNHWC2NCHW(real* outputs, const real* inputs,
template <> template <>
void NHWC2NCHW<DEVICE_TYPE_GPU>(real* outputs, void NHWC2NCHW<DEVICE_TYPE_GPU>(real* outputs,
const real* inputs, const real* inputs,
const int num, const int num,
const int inH, const int inH,
const int inW, const int inW,
const int inC, const int inC,
const int argType) { const int argType) {
int nth = num * inC * inH * inW; int nth = num * inC * inH * inW;
int blockSize = 1024; int blockSize = 1024;
int gridSize = (nth + 1024 - 1) / 1024; int gridSize = (nth + 1024 - 1) / 1024;
KeNHWC2NCHW<<<gridSize, blockSize, 0, STREAM_DEFAULT>>> KeNHWC2NCHW<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>(
(outputs, inputs, inH, inW, inC, nth, argType); outputs, inputs, inH, inW, inC, nth, argType);
CHECK_SYNC("NHWC2NCHW"); CHECK_SYNC("NHWC2NCHW");
} }
......
...@@ -288,8 +288,8 @@ message PadConfig { ...@@ -288,8 +288,8 @@ message PadConfig {
} }
message ReshapeConfig { message ReshapeConfig {
repeated uint32 heightAxis = 1; repeated uint32 heightAxis = 1;
repeated uint32 widthAxis = 2; repeated uint32 widthAxis = 2;
} }
message MultiBoxLossConfig { message MultiBoxLossConfig {
...@@ -344,7 +344,6 @@ message LayerInputConfig { ...@@ -344,7 +344,6 @@ message LayerInputConfig {
} }
message LayerConfig { message LayerConfig {
required string name = 1; required string name = 1;
required string type = 2; required string type = 2;
optional uint64 size = 3; optional uint64 size = 3;
...@@ -516,13 +515,13 @@ message LayerConfig { ...@@ -516,13 +515,13 @@ message LayerConfig {
optional int32 axis = 54 [ default = 2 ]; optional int32 axis = 54 [ default = 2 ];
repeated uint32 offset = 55; repeated uint32 offset = 55;
repeated uint32 shape = 56; repeated uint32 shape = 56;
// for HuberRegressionLoss // for HuberRegressionLoss
optional double delta = 57 [ default = 1.0 ]; optional double delta = 57 [ default = 1.0 ];
optional uint64 depth = 58 [ default = 1 ]; optional uint64 depth = 58 [ default = 1 ];
// for switch order layer // for switch order layer
optional ReshapeConfig reshape_conf = 59; optional ReshapeConfig reshape_conf = 59;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册