// Copyright 2018 Xiaomi, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include "mace/core/macros.h" #include "mace/core/net.h" #include "mace/public/mace.h" #include "mace/utils/memory_logging.h" #include "mace/utils/timer.h" #include "mace/utils/utils.h" namespace mace { NetBase::NetBase(const std::shared_ptr op_registry, const std::shared_ptr net_def, Workspace *ws, Device *device) : name_(net_def->name()), op_registry_(op_registry) { MACE_UNUSED(ws); MACE_UNUSED(device); } SerialNet::SerialNet( const std::shared_ptr op_registry, const std::shared_ptr net_def, Workspace *ws, Device *device, const NetMode mode) : NetBase(op_registry, net_def, ws, device), device_(device), op_kernel_context_(new OpKernelContext(ws, device)) { MACE_LATENCY_LOGGER(1, "Constructing SerialNet ", net_def->name()); DeviceType device_type = device->device_type(); for (int idx = 0; idx < net_def->op_size(); ++idx) { const auto &operator_def = net_def->op(idx); // TODO(liuqi): refactor to add device_type to OperatorDef const int op_device = ProtoArgHelper::GetOptionalArg( operator_def, "device", static_cast(device_type)); if (op_device == device_type) { VLOG(3) << "Creating operator " << operator_def.name() << "(" << operator_def.type() << ")"; OperatorDef temp_def(operator_def); std::unique_ptr op( op_registry->CreateOperator(temp_def, op_kernel_context_.get(), device_type, mode)); if (op) { operators_.emplace_back(std::move(op)); } } } } MaceStatus SerialNet::Run(RunMetadata *run_metadata) { MACE_MEMORY_LOGGING_GUARD(); MACE_LATENCY_LOGGER(1, "Running net"); const DeviceType device_type = device_->device_type(); for (auto iter = operators_.begin(); iter != operators_.end(); ++iter) { auto &op = *iter; MACE_LATENCY_LOGGER(2, "Running operator ", op->debug_def().name(), "(", op->debug_def().type(), "), mem_id: ", MakeListString(op->debug_def().mem_id().data(), op->debug_def().mem_id().size())); bool future_wait = (device_type == DeviceType::GPU && (run_metadata != nullptr || std::distance(iter, operators_.end()) == 1)); CallStats call_stats; if (future_wait) { StatsFuture future; MACE_RETURN_IF_ERROR(op->Run(&future)); if (run_metadata != nullptr) { future.wait_fn(&call_stats); } else { future.wait_fn(nullptr); } #ifdef MACE_ENABLE_OPENCL device_->opencl_runtime()->command_queue().finish(); #endif } else if (run_metadata != nullptr) { call_stats.start_micros = NowMicros(); MACE_RETURN_IF_ERROR(op->Run(nullptr)); call_stats.end_micros = NowMicros(); } else { MACE_RETURN_IF_ERROR(op->Run(nullptr)); } if (run_metadata != nullptr) { std::vector strides; int padding_type = -1; std::vector paddings; std::vector dilations; std::vector kernels; std::string type = op->debug_def().type(); if (type.compare("Conv2D") == 0 || type.compare("FusedConv2D") == 0 || type.compare("DepthwiseConv2d") == 0 || type.compare("Pooling") == 0) { strides = op->GetRepeatedArgs("strides"); padding_type = op->GetOptionalArg("padding", -1); paddings = op->GetRepeatedArgs("padding_values"); dilations = op->GetRepeatedArgs("dilations"); if (type.compare("Pooling") == 0) { kernels = op->GetRepeatedArgs("kernels"); } else { kernels = op->Input(1)->shape(); } } std::vector> output_shapes; for (auto output : op->Outputs()) { output_shapes.push_back(output->shape()); } OperatorStats op_stats = {op->debug_def().name(), op->debug_def().type(), output_shapes, {strides, padding_type, paddings, dilations, kernels}, call_stats}; run_metadata->op_stats.emplace_back(op_stats); } VLOG(3) << "Operator " << op->debug_def().name() << " has shape: " << MakeString(op->Output(0)->shape()); if (EnvEnabled("MACE_LOG_TENSOR_RANGE") && device_type == CPU) { for (int i = 0; i < op->OutputSize(); ++i) { int data_type = op->GetOptionalArg("T", static_cast(DT_FLOAT)); if (data_type == static_cast(DT_FLOAT)) { float max_v = std::numeric_limits::lowest(); float min_v = std::numeric_limits::max(); Tensor::MappingGuard guard(op->Output(i)); const float *output_data = op->Output(i)->data(); for (index_t j = 0; j < op->Output(i)->size(); ++j) { max_v = std::max(max_v, output_data[j]); min_v = std::min(min_v, output_data[j]); } LOG(INFO) << "Tensor range @@" << op->debug_def().output(i) << "@@" << min_v << "," << max_v; } } } } return MACE_SUCCESS; } std::unique_ptr CreateNet( const std::shared_ptr op_registry, const NetDef &net_def, Workspace *ws, Device *device, const NetMode mode) { std::shared_ptr tmp_net_def(new NetDef(net_def)); return CreateNet(op_registry, tmp_net_def, ws, device, mode); } std::unique_ptr CreateNet( const std::shared_ptr op_registry, const std::shared_ptr net_def, Workspace *ws, Device *device, const NetMode mode) { std::unique_ptr net( new SerialNet(op_registry, net_def, ws, device, mode)); return net; } } // namespace mace