diff --git a/python/paddle_serving_client/metric/__init__.py b/python/paddle_serving_client/metric/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..847ddc47ac89114f2012bc6b9990a69abfe39fb3 --- /dev/null +++ b/python/paddle_serving_client/metric/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/paddle_serving_client/metric/auc.py b/python/paddle_serving_client/metric/auc.py new file mode 100644 index 0000000000000000000000000000000000000000..0265b2c430afc2498e594327ba405b5db0d48ea3 --- /dev/null +++ b/python/paddle_serving_client/metric/auc.py @@ -0,0 +1,51 @@ +def tied_rank(x): + """ + Computes the tied rank of elements in x. + This function computes the tied rank of elements in x. + Parameters + ---------- + x : list of numbers, numpy array + Returns + ------- + score : list of numbers + The tied rank f each element in x + """ + sorted_x = sorted(zip(x,range(len(x)))) + r = [0 for k in x] + cur_val = sorted_x[0][0] + last_rank = 0 + for i in range(len(sorted_x)): + if cur_val != sorted_x[i][0]: + cur_val = sorted_x[i][0] + for j in range(last_rank, i): + r[sorted_x[j][1]] = float(last_rank+1+i)/2.0 + last_rank = i + if i==len(sorted_x)-1: + for j in range(last_rank, i+1): + r[sorted_x[j][1]] = float(last_rank+i+2)/2.0 + return r + +def auc(actual, posterior): + """ + Computes the area under the receiver-operater characteristic (AUC) + This function computes the AUC error metric for binary classification. + Parameters + ---------- + actual : list of binary numbers, numpy array + The ground truth value + posterior : same type as actual + Defines a ranking on the binary numbers, from most likely to + be positive to least likely to be positive. + Returns + ------- + score : double + The mean squared error between actual and posterior + """ + r = tied_rank(posterior) + num_positive = len([0 for x in actual if x==1]) + num_negative = len(actual)-num_positive + sum_positive = sum([r[i] for i in range(len(r)) if actual[i]==1]) + auc = ((sum_positive - num_positive*(num_positive+1)/2.0) / + (num_negative*num_positive)) + return auc + diff --git a/python/setup.py.client.in b/python/setup.py.client.in index 593e7683e4ef330b9fb4d805cacba401fc8b0f72..702d3c6ed836b7ebde870fa9774983cab37e673c 100644 --- a/python/setup.py.client.in +++ b/python/setup.py.client.in @@ -34,14 +34,17 @@ REQUIRED_PACKAGES = [ packages=['paddle_serving_client', 'paddle_serving_client.proto', - 'paddle_serving_client.io'] + 'paddle_serving_client.io', + 'paddle_serving_client.metric'] package_data={'paddle_serving_client': ['serving_client.so']} package_dir={'paddle_serving_client': '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client', 'paddle_serving_client.proto': '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto', 'paddle_serving_client.io': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/io'} + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/io', + 'paddle_serving_client.metric': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/metric'} setup( name='paddle-serving-client',