提交 07fd121d 编写于 作者: L liutuo

remove transpose half and add transpose before shape() for gpu runtime.

上级 2b820d8b
...@@ -50,15 +50,6 @@ std::string TransformedName(const std::string &input_name, ...@@ -50,15 +50,6 @@ std::string TransformedName(const std::string &input_name,
return ss.str(); return ss.str();
} }
#ifdef MACE_ENABLE_OPENCL
bool TransformRequiredOp(const std::string &op_type) {
static const std::unordered_set<std::string> kNoTransformOp = {
"Shape", "InferConv2dShape"
};
return kNoTransformOp.count(op_type) == 0;
}
#endif // MACE_ENABLE_OPENCL
void BuildTransposeOpDef( void BuildTransposeOpDef(
const std::string &input_name, const std::string &input_name,
const std::string &output_name, const std::string &output_name,
...@@ -514,8 +505,6 @@ MaceStatus NetDefAdapter::AdaptMemoryType( ...@@ -514,8 +505,6 @@ MaceStatus NetDefAdapter::AdaptMemoryType(
// (only support one kind of memory type for multiple outputs) // (only support one kind of memory type for multiple outputs)
op_registry_->GetInOutMemoryTypes(op_def->type(), context); op_registry_->GetInOutMemoryTypes(op_def->type(), context);
#ifdef MACE_ENABLE_OPENCL #ifdef MACE_ENABLE_OPENCL
// if op is memory-unused op, no transformation
if (TransformRequiredOp(op_def->type())) {
int input_size = op_def->input_size(); int input_size = op_def->input_size();
for (int i = 0; i < input_size; ++i) { for (int i = 0; i < input_size; ++i) {
if (output_map->count(op_def->input(i)) == 0) { if (output_map->count(op_def->input(i)) == 0) {
...@@ -585,7 +574,6 @@ MaceStatus NetDefAdapter::AdaptMemoryType( ...@@ -585,7 +574,6 @@ MaceStatus NetDefAdapter::AdaptMemoryType(
op_def->set_input(i, transformed_name); op_def->set_input(i, transformed_name);
} }
} }
}
#else #else
MACE_UNUSED(output_map); MACE_UNUSED(output_map);
MACE_UNUSED(tensor_shape_map); MACE_UNUSED(tensor_shape_map);
......
...@@ -27,7 +27,10 @@ namespace mace { ...@@ -27,7 +27,10 @@ namespace mace {
namespace ops { namespace ops {
template<DeviceType D, typename T> template<DeviceType D, typename T>
class TransposeOp : public Operation { class TransposeOp;
template<DeviceType D>
class TransposeOp<D, float> : public Operation {
public: public:
explicit TransposeOp(OpConstructContext *context) explicit TransposeOp(OpConstructContext *context)
: Operation(context), : Operation(context),
...@@ -49,8 +52,8 @@ class TransposeOp : public Operation { ...@@ -49,8 +52,8 @@ class TransposeOp : public Operation {
Tensor::MappingGuard input_guard(input); Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard output_guard(output); Tensor::MappingGuard output_guard(output);
const T *input_data = input->data<T>(); const float *input_data = input->data<float>();
T *output_data = output->mutable_data<T>(); float *output_data = output->mutable_data<float>();
return Transpose(&context->device()->cpu_runtime()->thread_pool(), return Transpose(&context->device()->cpu_runtime()->thread_pool(),
input_data, input->shape(), dims_, output_data); input_data, input->shape(), dims_, output_data);
...@@ -63,8 +66,6 @@ class TransposeOp : public Operation { ...@@ -63,8 +66,6 @@ class TransposeOp : public Operation {
void RegisterTranspose(OpRegistryBase *op_registry) { void RegisterTranspose(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Transpose", TransposeOp, MACE_REGISTER_OP(op_registry, "Transpose", TransposeOp,
DeviceType::CPU, float); DeviceType::CPU, float);
MACE_REGISTER_OP(op_registry, "Transpose", TransposeOp,
DeviceType::CPU, half);
} }
} // namespace ops } // namespace ops
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册