diff --git a/paddle/function/SwitchOpGpu.cu b/paddle/function/SwitchOpGpu.cu index 0b9401dea1feaad8f8d51024e6ca524a09bd434f..45390a56c3f776ec18a65a6ba2f7149a7a6ef6c3 100644 --- a/paddle/function/SwitchOpGpu.cu +++ b/paddle/function/SwitchOpGpu.cu @@ -12,14 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "hl_base.h" #include "SwitchOp.h" +#include "hl_base.h" namespace paddle { -__global__ void KeNCHW2NHWC(real* outputs, const real* inputs, - int inC, int inH, int inW, - int nthreads, int argType) { +__global__ void KeNCHW2NHWC(real* outputs, + const real* inputs, + int inC, + int inH, + int inW, + int nthreads, + int argType) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx < nthreads) { const int w = idx % inW; @@ -27,7 +31,7 @@ __global__ void KeNCHW2NHWC(real* outputs, const real* inputs, const int c = (idx / inW / inH) % inC; const int n = idx / inW / inH / inC; - const int off = ((n * inH + h) * inW + w) * inC +c; + const int off = ((n * inH + h) * inW + w) * inC + c; if (argType == ADD_TO) { outputs[off] += inputs[idx]; } else { @@ -38,23 +42,27 @@ __global__ void KeNCHW2NHWC(real* outputs, const real* inputs, template <> void NCHW2NHWC(real* outputs, - const real* inputs, - const int num, - const int inC, - const int inH, - const int inW, - const int argType) { + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW, + const int argType) { size_t nth = num * inC * inH * inW; int blockSize = 1024; int gridSize = (nth + 1024 - 1) / 1024; - KeNCHW2NHWC<<>> - (outputs, inputs, inC, inH, inW, nth, argType); + KeNCHW2NHWC<<>>( + outputs, inputs, inC, inH, inW, nth, argType); CHECK_SYNC("NCHW2NHWC"); } -__global__ void KeNHWC2NCHW(real* outputs, const real* inputs, - int inH, int inW, int inC, - int nthreads, int argType) { +__global__ void KeNHWC2NCHW(real* outputs, + const real* inputs, + int inH, + int inW, + int inC, + int nthreads, + int argType) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx < nthreads) { const int c = idx % inC; @@ -73,17 +81,17 @@ __global__ void KeNHWC2NCHW(real* outputs, const real* inputs, template <> void NHWC2NCHW(real* outputs, - const real* inputs, - const int num, - const int inH, - const int inW, - const int inC, - const int argType) { + const real* inputs, + const int num, + const int inH, + const int inW, + const int inC, + const int argType) { int nth = num * inC * inH * inW; int blockSize = 1024; int gridSize = (nth + 1024 - 1) / 1024; - KeNHWC2NCHW<<>> - (outputs, inputs, inH, inW, inC, nth, argType); + KeNHWC2NCHW<<>>( + outputs, inputs, inH, inW, inC, nth, argType); CHECK_SYNC("NHWC2NCHW"); } diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index f5b15c3adb96a6634141226bf81aa998805110b3..0f44d8cb8d78ed23cc1105ac7aff37de5faeffa1 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -288,8 +288,8 @@ message PadConfig { } message ReshapeConfig { - repeated uint32 heightAxis = 1; - repeated uint32 widthAxis = 2; + repeated uint32 heightAxis = 1; + repeated uint32 widthAxis = 2; } message MultiBoxLossConfig { @@ -344,7 +344,6 @@ message LayerInputConfig { } message LayerConfig { - required string name = 1; required string type = 2; optional uint64 size = 3; @@ -516,13 +515,13 @@ message LayerConfig { optional int32 axis = 54 [ default = 2 ]; repeated uint32 offset = 55; repeated uint32 shape = 56; - + // for HuberRegressionLoss optional double delta = 57 [ default = 1.0 ]; optional uint64 depth = 58 [ default = 1 ]; - - // for switch order layer + + // for switch order layer optional ReshapeConfig reshape_conf = 59; }