__init__.py

#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .serving_client import PredictorClient
from .proto import sdk_configure_pb2 as sdk
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
import time

int_type = 0
float_type = 1


class SDKConfig(object):
    def __init__(self):
        self.sdk_desc = sdk.SDKConf()
        self.endpoints = []

    def set_server_endpoints(self, endpoints):
        self.endpoints = endpoints

    def gen_desc(self):
        predictor_desc = sdk.Predictor()
        predictor_desc.name = "general_model"
        predictor_desc.service_name = \
            "baidu.paddle_serving.predictor.general_model.GeneralModelService"
        predictor_desc.endpoint_router = "WeightedRandomRender"
        predictor_desc.weighted_random_render_conf.variant_weight_list = "100"

        variant_desc = sdk.VariantConf()
        variant_desc.tag = "var1"
        variant_desc.naming_conf.cluster = "list://{}".format(":".join(
            self.endpoints))

        predictor_desc.variants.extend([variant_desc])

        self.sdk_desc.predictors.extend([predictor_desc])
        self.sdk_desc.default_variant_conf.tag = "default"
        self.sdk_desc.default_variant_conf.connection_conf.connect_timeout_ms = 2000
        self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = 20000
        self.sdk_desc.default_variant_conf.connection_conf.connect_retry_count = 2
        self.sdk_desc.default_variant_conf.connection_conf.max_connection_per_host = 100
        self.sdk_desc.default_variant_conf.connection_conf.hedge_request_timeout_ms = -1
        self.sdk_desc.default_variant_conf.connection_conf.hedge_fetch_retry_count = 2
        self.sdk_desc.default_variant_conf.connection_conf.connection_type = "pooled"

        self.sdk_desc.default_variant_conf.naming_conf.cluster_filter_strategy = "Default"
        self.sdk_desc.default_variant_conf.naming_conf.load_balance_strategy = "la"

        self.sdk_desc.default_variant_conf.rpc_parameter.compress_type = 0
        self.sdk_desc.default_variant_conf.rpc_parameter.package_size = 20
        self.sdk_desc.default_variant_conf.rpc_parameter.protocol = "baidu_std"
        self.sdk_desc.default_variant_conf.rpc_parameter.max_channel_per_request = 3

        return self.sdk_desc


class Client(object):
    def __init__(self):
        self.feed_names_ = []
        self.fetch_names_ = []
        self.client_handle_ = None
        self.feed_shapes_ = []
        self.feed_types_ = {}
        self.feed_names_to_idx_ = {}

    def load_client_config(self, path):
        model_conf = m_config.GeneralModelConfig()
        f = open(path, 'r')
        model_conf = google.protobuf.text_format.Merge(
            str(f.read()), model_conf)

        # load configuraion here
        # get feed vars, fetch vars
        # get feed shapes, feed types
        # map feed names to index
        self.client_handle_ = PredictorClient()
        self.client_handle_.init(path)
        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
        self.feed_shapes_ = [var.shape for var in model_conf.feed_var]
        self.feed_names_to_idx_ = {}
        for i, var in enumerate(model_conf.feed_var):
            self.feed_names_to_idx_[var.alias_name] = i
            self.feed_types_[var.alias_name] = var.feed_type

        return

    def connect(self, endpoints):
        # check whether current endpoint is available
        # init from client config
        # create predictor here
        predictor_sdk = SDKConfig()
        predictor_sdk.set_server_endpoints(endpoints)
        sdk_desc = predictor_sdk.gen_desc()
        self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
        ))

    def get_feed_names(self):
        return self.feed_names_

    def get_fetch_names(self):
        return self.fetch_names_

    def predict(self, feed={}, fetch=[], profile=False):
        int_slot = []
        float_slot = []
        int_feed_names = []
        float_feed_names = []
        fetch_names = []
        for key in feed:
            if key not in self.feed_names_:
                continue
            if self.feed_types_[key] == int_type:
                int_feed_names.append(key)
                int_slot.append(feed[key])
            elif self.feed_types_[key] == float_type:
                float_feed_names.append(key)
                float_slot.append(feed[key])

        for key in fetch:
            if key in self.fetch_names_:
                fetch_names.append(key)

        result = self.client_handle_.predict(
            float_slot, float_feed_names, int_slot, int_feed_names, fetch_names)

        # TODO(guru4elephant): the order of fetch var name should be consistent with
        #                      general_model_config, this is not friendly
        #                      In the future, we need make the number of fetched variable changable
        result_map = {}
        for i, name in enumerate(fetch_names):
            result_map[name] = result[i]

        if profile:
            result_map["infer_time"] = result[-1][0]

        return result_map

    def batch_predict(self, feed_batch=[], fetch=[], profile=False):
        int_slot_batch = []
        float_slot_batch = []
        int_feed_names = []
        float_feed_names = []
        fetch_names = []
        counter = 0
        for feed in feed_batch:
            int_slot = []
            float_slot = []
            for key in feed:
                if key not in self.feed_names_:
                    continue
                if self.feed_types_[key] == int_type:
                    if counter == 0:
                        int_feed_names.append(key)
                    int_slot.append(feed[key])
                elif self.feed_types_[key] == float_type:
                    if counter == 0:
                        float_feed_names.append(key)
                    float_slot.append(feed[key])
            counter += 1
            int_slot_batch.append(int_slot)
            float_slot_batch.append(float_slot)

        for key in fetch:
            if key in self.fetch_names_:
                fetch_names.append(key)

        result_batch = self.client_handle_.batch_predict(
            float_slot_batch, float_feed_names, int_slot_batch, int_feed_names,
            fetch_names)

        result_map_batch = []
        for result in result_batch[:-1]:
            result_map = {}
            for i, name in enumerate(fetch_names):
                result_map[name] = result[i]
            result_map_batch.append(result_map)

        infer_time = result_batch[-1][0][0]

        if profile:
            return result_map_batch, infer_time
        else:
            return result_map_batch

    def release(self):
        self.client_handle_.destroy_predictor()