diff --git a/.gitignore b/.gitignore index 22ca82834ccdd50d55ac79c807cc378e33066e1c..babbc03dca3cf85475f0b4ff2b4f77bf16d72db4 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ cmake-build-debug *.pb.h *.pb.cc *.pb +*_grpc.py # Object files *.o diff --git a/include/inference.h b/include/inference.h index b8665db1fa4dc03afb4d2bfaa283f74fbb3acf39..082cae5d5c8d3d92686a498e852f66973ad05d07 100644 --- a/include/inference.h +++ b/include/inference.h @@ -24,20 +24,20 @@ namespace mindspore { namespace inference { - +enum Status { SUCCESS = 0, FAILED, INVALID_INPUTS }; class MS_API InferSession { public: InferSession() = default; virtual ~InferSession() = default; - virtual bool InitEnv(const std::string &device_type, uint32_t device_id) = 0; - virtual bool FinalizeEnv() = 0; - virtual bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id) = 0; - virtual bool UnloadModel(uint32_t model_id) = 0; + virtual Status InitEnv(const std::string &device_type, uint32_t device_id) = 0; + virtual Status FinalizeEnv() = 0; + virtual Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id) = 0; + virtual Status UnloadModel(uint32_t model_id) = 0; // override this method to avoid request/reply data copy - virtual bool ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) = 0; + virtual Status ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) = 0; - virtual bool ExecuteModel(uint32_t model_id, const std::vector &inputs, - std::vector &outputs) { + virtual Status ExecuteModel(uint32_t model_id, const std::vector &inputs, + std::vector &outputs) { VectorInferTensorWrapRequest request(inputs); VectorInferTensorWrapReply reply(outputs); return ExecuteModel(model_id, request, reply); diff --git a/mindspore/ccsrc/backend/session/infer_session.cc b/mindspore/ccsrc/backend/session/infer_session.cc index 5de815b8f747015fd80fa2d403b822bcc2a8b4ad..cec532fc8cc802eec2b7fcfd2340d72929d1cdac 100644 --- a/mindspore/ccsrc/backend/session/infer_session.cc +++ b/mindspore/ccsrc/backend/session/infer_session.cc @@ -37,8 +37,8 @@ namespace mindspore::inference { std::shared_ptr InferSession::CreateSession(const std::string &device, uint32_t device_id) { try { auto session = std::make_shared(); - bool ret = session->InitEnv(device, device_id); - if (!ret) { + Status ret = session->InitEnv(device, device_id); + if (ret != SUCCESS) { return nullptr; } return session; @@ -84,21 +84,21 @@ std::shared_ptr> MSInferSession::ReadFile(const std::string &f return buf; } -bool MSInferSession::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) { +Status MSInferSession::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) { auto graphBuf = ReadFile(file_name); if (graphBuf == nullptr) { MS_LOG(ERROR) << "Read model file failed, file name is " << file_name.c_str(); - return false; + return FAILED; } auto graph = LoadModel(graphBuf->data(), graphBuf->size(), device_type_); if (graph == nullptr) { MS_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); - return false; + return FAILED; } - bool ret = CompileGraph(graph, model_id); - if (!ret) { + Status ret = CompileGraph(graph, model_id); + if (ret != SUCCESS) { MS_LOG(ERROR) << "Compile graph model failed, file name is " << file_name.c_str(); - return false; + return FAILED; } MS_LOG(INFO) << "Load model from file " << file_name << " success"; @@ -107,14 +107,14 @@ bool MSInferSession::LoadModelFromFile(const std::string &file_name, uint32_t &m rtError_t rt_ret = rtCtxGetCurrent(&context_); if (rt_ret != RT_ERROR_NONE || context_ == nullptr) { MS_LOG(ERROR) << "the ascend device context is null"; - return false; + return FAILED; } #endif - return true; + return SUCCESS; } -bool MSInferSession::UnloadModel(uint32_t model_id) { return true; } +Status MSInferSession::UnloadModel(uint32_t model_id) { return SUCCESS; } tensor::TensorPtr ServingTensor2MSTensor(const InferTensorBase &out_tensor) { std::vector shape; @@ -170,16 +170,16 @@ void MSTensor2ServingTensor(tensor::TensorPtr ms_tensor, InferTensorBase &out_te out_tensor.set_data(ms_tensor->data_c(), ms_tensor->Size()); } -bool MSInferSession::ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) { +Status MSInferSession::ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) { #ifdef ENABLE_D if (context_ == nullptr) { MS_LOG(ERROR) << "rtCtx is nullptr"; - return false; + return FAILED; } rtError_t rt_ret = rtCtxSetCurrent(context_); if (rt_ret != RT_ERROR_NONE) { MS_LOG(ERROR) << "set Ascend rtCtx failed"; - return false; + return FAILED; } #endif @@ -187,47 +187,47 @@ bool MSInferSession::ExecuteModel(uint32_t model_id, const RequestBase &request, for (size_t i = 0; i < request.size(); i++) { if (request[i] == nullptr) { MS_LOG(ERROR) << "Execute Model " << model_id << " Failed, input tensor is null, index " << i; - return false; + return FAILED; } auto input = ServingTensor2MSTensor(*request[i]); if (input == nullptr) { MS_LOG(ERROR) << "Tensor convert failed"; - return false; + return FAILED; } inputs.push_back(input); } if (!CheckModelInputs(model_id, inputs)) { MS_LOG(ERROR) << "Check Model " << model_id << " Inputs Failed"; - return false; + return INVALID_INPUTS; } vector outputs = RunGraph(model_id, inputs); if (outputs.empty()) { MS_LOG(ERROR) << "Execute Model " << model_id << " Failed"; - return false; + return FAILED; } reply.clear(); for (const auto &tensor : outputs) { auto out_tensor = reply.add(); if (out_tensor == nullptr) { MS_LOG(ERROR) << "Execute Model " << model_id << " Failed, add output tensor failed"; - return false; + return FAILED; } MSTensor2ServingTensor(tensor, *out_tensor); } - return true; + return SUCCESS; } -bool MSInferSession::FinalizeEnv() { +Status MSInferSession::FinalizeEnv() { auto ms_context = MsContext::GetInstance(); if (ms_context == nullptr) { MS_LOG(ERROR) << "Get Context failed!"; - return false; + return FAILED; } if (!ms_context->CloseTsd()) { MS_LOG(ERROR) << "Inference CloseTsd failed!"; - return false; + return FAILED; } - return true; + return SUCCESS; } std::shared_ptr MSInferSession::LoadModel(const char *model_buf, size_t size, const std::string &device) { @@ -292,16 +292,16 @@ void MSInferSession::RegAllOp() { return; } -bool MSInferSession::CompileGraph(std::shared_ptr funcGraphPtr, uint32_t &model_id) { +Status MSInferSession::CompileGraph(std::shared_ptr funcGraphPtr, uint32_t &model_id) { MS_ASSERT(session_impl_ != nullptr); try { auto graph_id = session_impl_->CompileGraph(NOT_NULL(funcGraphPtr)); py::gil_scoped_release gil_release; model_id = graph_id; - return true; + return SUCCESS; } catch (std::exception &e) { MS_LOG(ERROR) << "Inference CompileGraph failed"; - return false; + return FAILED; } } @@ -327,31 +327,31 @@ string MSInferSession::AjustTargetName(const std::string &device) { } } -bool MSInferSession::InitEnv(const std::string &device, uint32_t device_id) { +Status MSInferSession::InitEnv(const std::string &device, uint32_t device_id) { RegAllOp(); auto ms_context = MsContext::GetInstance(); ms_context->set_execution_mode(kGraphMode); ms_context->set_device_id(device_id); auto ajust_device = AjustTargetName(device); if (ajust_device == "") { - return false; + return FAILED; } ms_context->set_device_target(device); session_impl_ = session::SessionFactory::Get().Create(ajust_device); if (session_impl_ == nullptr) { MS_LOG(ERROR) << "Session create failed!, please make sure target device:" << device << " is available."; - return false; + return FAILED; } session_impl_->Init(device_id); if (ms_context == nullptr) { MS_LOG(ERROR) << "Get Context failed!"; - return false; + return FAILED; } if (!ms_context->OpenTsd()) { MS_LOG(ERROR) << "Session init OpenTsd failed!"; - return false; + return FAILED; } - return true; + return SUCCESS; } bool MSInferSession::CheckModelInputs(uint32_t graph_id, const std::vector &inputs) const { diff --git a/mindspore/ccsrc/backend/session/infer_session.h b/mindspore/ccsrc/backend/session/infer_session.h index edebd8834ffefadbe9f28f78e847ad9b7a257e14..62ed5f9ff4a052285018f1693a8c269b0cb02267 100644 --- a/mindspore/ccsrc/backend/session/infer_session.h +++ b/mindspore/ccsrc/backend/session/infer_session.h @@ -38,11 +38,11 @@ class MSInferSession : public InferSession { MSInferSession(); ~MSInferSession(); - bool InitEnv(const std::string &device_type, uint32_t device_id) override; - bool FinalizeEnv() override; - bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override; - bool UnloadModel(uint32_t model_id) override; - bool ExecuteModel(uint32_t model_id, const RequestBase &inputs, ReplyBase &outputs) override; + Status InitEnv(const std::string &device_type, uint32_t device_id) override; + Status FinalizeEnv() override; + Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override; + Status UnloadModel(uint32_t model_id) override; + Status ExecuteModel(uint32_t model_id, const RequestBase &inputs, ReplyBase &outputs) override; private: std::shared_ptr session_impl_ = nullptr; @@ -57,7 +57,7 @@ class MSInferSession : public InferSession { std::shared_ptr> ReadFile(const std::string &file); static void RegAllOp(); string AjustTargetName(const std::string &device); - bool CompileGraph(std::shared_ptr funcGraphPtr, uint32_t &model_id); + Status CompileGraph(std::shared_ptr funcGraphPtr, uint32_t &model_id); bool CheckModelInputs(uint32_t graph_id, const std::vector &inputs) const; std::vector RunGraph(uint32_t graph_id, const std::vector &inputs); }; diff --git a/serving/README.md b/serving/README_CN.md similarity index 100% rename from serving/README.md rename to serving/README_CN.md diff --git a/serving/acl/acl_session.cc b/serving/acl/acl_session.cc index dcbb43689ddb0868ed671cc763a13343ec5636e7..d154cc7164d053561a55fcb1f7100ca0e9c5c567 100644 --- a/serving/acl/acl_session.cc +++ b/serving/acl/acl_session.cc @@ -35,53 +35,53 @@ std::shared_ptr InferSession::CreateSession(const std::string &dev } } -bool AclSession::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) { - return model_process_.LoadModelFromFile(file_name, model_id); +Status AclSession::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) { + return model_process_.LoadModelFromFile(file_name, model_id) ? SUCCESS : FAILED; } -bool AclSession::UnloadModel(uint32_t model_id) { +Status AclSession::UnloadModel(uint32_t model_id) { model_process_.UnLoad(); - return true; + return SUCCESS; } -bool AclSession::ExecuteModel(uint32_t model_id, const RequestBase &request, - ReplyBase &reply) { // set d context +Status AclSession::ExecuteModel(uint32_t model_id, const RequestBase &request, + ReplyBase &reply) { // set d context aclError rt_ret = aclrtSetCurrentContext(context_); if (rt_ret != ACL_ERROR_NONE) { MSI_LOG_ERROR << "set the ascend device context failed"; - return false; + return FAILED; } - return model_process_.Execute(request, reply); + return model_process_.Execute(request, reply) ? SUCCESS : FAILED; } -bool AclSession::InitEnv(const std::string &device_type, uint32_t device_id) { +Status AclSession::InitEnv(const std::string &device_type, uint32_t device_id) { device_type_ = device_type; device_id_ = device_id; auto ret = aclInit(nullptr); if (ret != ACL_ERROR_NONE) { MSI_LOG_ERROR << "Execute aclInit Failed"; - return false; + return FAILED; } MSI_LOG_INFO << "acl init success"; ret = aclrtSetDevice(device_id_); if (ret != ACL_ERROR_NONE) { MSI_LOG_ERROR << "acl open device " << device_id_ << " failed"; - return false; + return FAILED; } MSI_LOG_INFO << "open device " << device_id_ << " success"; ret = aclrtCreateContext(&context_, device_id_); if (ret != ACL_ERROR_NONE) { MSI_LOG_ERROR << "acl create context failed"; - return false; + return FAILED; } MSI_LOG_INFO << "create context success"; ret = aclrtCreateStream(&stream_); if (ret != ACL_ERROR_NONE) { MSI_LOG_ERROR << "acl create stream failed"; - return false; + return FAILED; } MSI_LOG_INFO << "create stream success"; @@ -89,17 +89,17 @@ bool AclSession::InitEnv(const std::string &device_type, uint32_t device_id) { ret = aclrtGetRunMode(&run_mode); if (ret != ACL_ERROR_NONE) { MSI_LOG_ERROR << "acl get run mode failed"; - return false; + return FAILED; } bool is_device = (run_mode == ACL_DEVICE); model_process_.SetIsDevice(is_device); MSI_LOG_INFO << "get run mode success is device input/output " << is_device; MSI_LOG_INFO << "Init acl success, device id " << device_id_; - return true; + return SUCCESS; } -bool AclSession::FinalizeEnv() { +Status AclSession::FinalizeEnv() { aclError ret; if (stream_ != nullptr) { ret = aclrtDestroyStream(stream_); @@ -129,7 +129,7 @@ bool AclSession::FinalizeEnv() { MSI_LOG_ERROR << "finalize acl failed"; } MSI_LOG_INFO << "end to finalize acl"; - return true; + return SUCCESS; } AclSession::AclSession() = default; diff --git a/serving/acl/acl_session.h b/serving/acl/acl_session.h index 206cf4b7fcf3f0523e717f380f493609b39574d6..2b44f9e1d58f0361b1744601bbce92c4cf1d8205 100644 --- a/serving/acl/acl_session.h +++ b/serving/acl/acl_session.h @@ -32,11 +32,11 @@ class AclSession : public InferSession { public: AclSession(); - bool InitEnv(const std::string &device_type, uint32_t device_id) override; - bool FinalizeEnv() override; - bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override; - bool UnloadModel(uint32_t model_id) override; - bool ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) override; + Status InitEnv(const std::string &device_type, uint32_t device_id) override; + Status FinalizeEnv() override; + Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override; + Status UnloadModel(uint32_t model_id) override; + Status ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) override; private: std::string device_type_; diff --git a/serving/core/server.cc b/serving/core/server.cc index 8b587896557422a3ff0fe75345fa609f591f14ed..180e07bb0cda0cb5a381fb9927c1721a25aa00ed 100644 --- a/serving/core/server.cc +++ b/serving/core/server.cc @@ -31,6 +31,7 @@ #include "core/version_control/version_controller.h" #include "core/util/file_system_operation.h" #include "core/serving_tensor.h" +#include "util/status.h" using ms_serving::MSService; using ms_serving::PredictReply; @@ -79,9 +80,9 @@ Status Session::Predict(const PredictRequest &request, PredictReply &reply) { auto ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply); MSI_LOG(INFO) << "run Predict finished"; - if (!ret) { + if (Status(ret) != SUCCESS) { MSI_LOG(ERROR) << "execute model return failed"; - return FAILED; + return Status(ret); } return SUCCESS; } @@ -97,9 +98,9 @@ Status Session::Warmup(const MindSporeModelPtr model) { MSI_TIME_STAMP_START(LoadModelFromFile) auto ret = session_->LoadModelFromFile(file_name, graph_id_); MSI_TIME_STAMP_END(LoadModelFromFile) - if (!ret) { + if (Status(ret) != SUCCESS) { MSI_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); - return FAILED; + return Status(ret); } model_loaded_ = true; MSI_LOG(INFO) << "Session Warmup finished"; @@ -119,12 +120,22 @@ namespace { static const uint32_t uint32max = 0x7FFFFFFF; std::promise exit_requested; -void ClearEnv() { - Session::Instance().Clear(); - // inference::ExitInference(); -} +void ClearEnv() { Session::Instance().Clear(); } void HandleSignal(int sig) { exit_requested.set_value(); } +grpc::Status CreatGRPCStatus(Status status) { + switch (status) { + case SUCCESS: + return grpc::Status::OK; + case FAILED: + return grpc::Status::CANCELLED; + case INVALID_INPUTS: + return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "The Predict Inputs do not match the Model Request!"); + default: + return grpc::Status::CANCELLED; + } +} + } // namespace // Service Implement @@ -134,8 +145,8 @@ class MSServiceImpl final : public MSService::Service { MSI_TIME_STAMP_START(Predict) auto res = Session::Instance().Predict(*request, *reply); MSI_TIME_STAMP_END(Predict) - if (res != SUCCESS) { - return grpc::Status::CANCELLED; + if (res != inference::SUCCESS) { + return CreatGRPCStatus(res); } MSI_LOG(INFO) << "Finish call service Eval"; return grpc::Status::OK; diff --git a/serving/core/util/status.h b/serving/core/util/status.h index 5f97f9b0b798460663c9972b94bc5f4dc63cc661..d416b8c88591afe715ea229db9fc9222098d05bc 100644 --- a/serving/core/util/status.h +++ b/serving/core/util/status.h @@ -18,7 +18,7 @@ namespace mindspore { namespace serving { using Status = uint32_t; -enum ServingStatus { SUCCESS = 0, FAILED }; +enum ServingStatus { SUCCESS = 0, FAILED, INVALID_INPUTS }; } // namespace serving } // namespace mindspore diff --git a/serving/example/cpp_client/ms_client.cc b/serving/example/cpp_client/ms_client.cc index a689971943601be7a75e2695059800a6ffc8a4de..720f1cf7044417f807cea6d19d1bf111acbc2a63 100644 --- a/serving/example/cpp_client/ms_client.cc +++ b/serving/example/cpp_client/ms_client.cc @@ -31,51 +31,51 @@ using ms_serving::TensorShape; class MSClient { public: - explicit MSClient(std::shared_ptr channel) : stub_(MSService::NewStub(channel)) {} + explicit MSClient(std::shared_ptr channel) : stub_(MSService::NewStub(channel)) {} - ~MSClient() = default; + ~MSClient() = default; - std::string Predict() { - // Data we are sending to the server. - PredictRequest request; + std::string Predict() { + // Data we are sending to the server. + PredictRequest request; - Tensor data; - TensorShape shape; - shape.add_dims(4); - *data.mutable_tensor_shape() = shape; - data.set_tensor_type(ms_serving::MS_FLOAT32); - std::vector input_data{1, 2, 3, 4}; - data.set_data(input_data.data(), input_data.size() * sizeof(float)); - *request.add_data() = data; - *request.add_data() = data; - std::cout << "intput tensor size is " << request.data_size() << std::endl; - // Container for the data we expect from the server. - PredictReply reply; + Tensor data; + TensorShape shape; + shape.add_dims(4); + *data.mutable_tensor_shape() = shape; + data.set_tensor_type(ms_serving::MS_FLOAT32); + std::vector input_data{1, 2, 3, 4}; + data.set_data(input_data.data(), input_data.size() * sizeof(float)); + *request.add_data() = data; + *request.add_data() = data; + std::cout << "intput tensor size is " << request.data_size() << std::endl; + // Container for the data we expect from the server. + PredictReply reply; - // Context for the client. It could be used to convey extra information to - // the server and/or tweak certain RPC behaviors. - ClientContext context; + // Context for the client. It could be used to convey extra information to + // the server and/or tweak certain RPC behaviors. + ClientContext context; - // The actual RPC. - Status status = stub_->Predict(&context, request, &reply); - std::cout << "Compute [1, 2, 3, 4] + [1, 2, 3, 4]" << std::endl; + // The actual RPC. + Status status = stub_->Predict(&context, request, &reply); + std::cout << "Compute [1, 2, 3, 4] + [1, 2, 3, 4]" << std::endl; + + // Act upon its status. + if (status.ok()) { std::cout << "Add result is"; for (size_t i = 0; i < reply.result(0).data().size() / sizeof(float); i++) { std::cout << " " << (reinterpret_cast(reply.mutable_result(0)->mutable_data()->data()))[i]; } std::cout << std::endl; - - // Act upon its status. - if (status.ok()) { - return "RPC OK"; - } else { - std::cout << status.error_code() << ": " << status.error_message() << std::endl; - return "RPC failed"; - } + return "RPC OK"; + } else { + std::cout << status.error_code() << ": " << status.error_message() << std::endl; + return "RPC failed"; } + } private: - std::unique_ptr stub_; + std::unique_ptr stub_; }; int main(int argc, char **argv) { diff --git a/serving/example/python_client/ms_client.py b/serving/example/python_client/ms_client.py index 82c8944fc7f8b4f0c48a2dbddfe390d21a403d67..8ea64916d9862306405b3b0e077e0c0e86b1fc92 100644 --- a/serving/example/python_client/ms_client.py +++ b/serving/example/python_client/ms_client.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ +import sys import grpc import numpy as np import ms_service_pb2 @@ -19,7 +20,19 @@ import ms_service_pb2_grpc def run(): - channel = grpc.insecure_channel('localhost:5500') + if len(sys.argv) > 2: + sys.exit("input error") + channel_str = "" + if len(sys.argv) == 2: + split_args = sys.argv[1].split('=') + if len(split_args) > 1: + channel_str = split_args[1] + else: + channel_str = 'localhost:5500' + else: + channel_str = 'localhost:5500' + + channel = grpc.insecure_channel(channel_str) stub = ms_service_pb2_grpc.MSServiceStub(channel) request = ms_service_pb2.PredictRequest() @@ -33,11 +46,17 @@ def run(): y.tensor_type = ms_service_pb2.MS_FLOAT32 y.data = (np.ones([4]).astype(np.float32)).tobytes() - result = stub.Predict(request) - print(result) - result_np = np.frombuffer(result.result[0].data, dtype=np.float32).reshape(result.result[0].tensor_shape.dims) - print("ms client received: ") - print(result_np) + try: + result = stub.Predict(request) + print(result) + result_np = np.frombuffer(result.result[0].data, dtype=np.float32).reshape(result.result[0].tensor_shape.dims) + print("ms client received: ") + print(result_np) + except grpc.RpcError as e: + print(e.details()) + status_code = e.code() + print(status_code.name) + print(status_code.value) if __name__ == '__main__': run()