Merge remote-tracking branch 'upstream/develop' into develop

85948602 · MRXLT · 64742be8 · 4dba4f99 · 85948602 · 85948602
Showing with 115 addition and 48 deletion

README.md README.md +12 -11

cube/cube-transfer/src/transfer/deployer.go cube/cube-transfer/src/transfer/deployer.go +35 -30

elastic-ctr/client/demo/elastic_ctr.py elastic-ctr/client/demo/elastic_ctr.py +68 -7

未找到文件。
--- a/README.md
+++ b/README.md
 # 概述
-PaddlePaddle是百度开源的机器学习框架，广泛支持各种深度学习模型的定制化开发; Paddle serving是Paddle的在线预测部分，与Paddle模型训练环节无缝衔接，提供机器学习预测云服务。
+PaddlePaddle是百度开源的机器学习框架，广泛支持各种深度学习模型的定制化开发; Paddle serving是PaddlePaddle的在线预估服务框架，通过加载PaddlePaddle训练得到的模型，利用PaddlePaddle的预测库，提供机器学习预测云服务。
+
+# 文档
+
+[设计文档](doc/DESIGN.md)
+
+[从零开始写一个预测服务](doc/CREATING.md)
+
+[编译安装](doc/INSTALL.md)
+
+[FAQ](doc/FAQ.md)
+

 # 框架简介

@@ -80,13 +91,3 @@ Paddle serving框架为策略工程师提供以下三层面的功能性扩展：
 `-- tools                               # CI工具
    `-- codestyle
 ```
-
-# 文档
-
-[设计文档](doc/DESIGN.md)
-
-[从零开始写一个预测服务](doc/CREATING.md)
-
-[编译安装](doc/INSTALL.md)
-
-[FAQ](doc/FAQ.md)
--- a/cube/cube-transfer/src/transfer/deployer.go
+++ b/cube/cube-transfer/src/transfer/deployer.go
@@ -81,16 +81,17 @@ func CmdInstsDownload() {
 			}
 		}
 		for i, inst := range Dict.Instances {
-			err := <-chs[i]
-			logex.Noticef("[instance resp]download:%v", Dict.Instances)
-			if err != nil || keyAndRespSlice[i].Success != "0" {
-				logex.Warningf("cmd cube online downlaod of %v:%v, shard:%v failed", inst.AgentIp, inst.AgentPort, inst.Shard)
-				continue
-			}
-			if inst.Status < dict.Instance_Status_Download_Succ {
-				Dict.Instances[i].Status = dict.Instance_Status_Download_Succ
-				Dict.Instances[i].DownloadedTime = int(time.Now().Unix())
-				Dict.DownloadSuccInsts++
+			if inst.Status != dict.Instance_Status_Download_Succ {
+				err := <-chs[i]
+				if err != nil || keyAndRespSlice[i].Success != "0" {
+					logex.Warningf("cmd cube online downlaod of %v:%v, shard:%v failed", inst.AgentIp, inst.AgentPort, inst.Shard)
+					continue
+				}
+				if inst.Status < dict.Instance_Status_Download_Succ {
+					Dict.Instances[i].Status = dict.Instance_Status_Download_Succ
+					Dict.Instances[i].DownloadedTime = int(time.Now().Unix())
+					Dict.DownloadSuccInsts++
+				}
 			}
 		}
 		if Dict.DownloadSuccInsts == Dict.InstancesNum {
@@ -130,16 +131,18 @@ func CmdInstsReload() {
 			}
 		}
 		for i, inst := range Dict.Instances {
-			err := <-chs[i]
-			logex.Noticef("[instance resp]reload:%v", Dict.Instances)
-			if err != nil || keyAndRespSlice[i].Success != "0" {
-				logex.Warningf("cmd cube online reload of %v:%v, shard:%v failed", inst.AgentIp, inst.AgentPort, inst.Shard)
-				continue
-			}
-			if inst.Status < dict.Instance_Status_Reload_Succ {
-				Dict.Instances[i].Status = dict.Instance_Status_Reload_Succ
-				Dict.Instances[i].ReloadedTime = int(time.Now().Unix())
-				Dict.ReloadSuccInsts++
+			if inst.Status != dict.Instance_Status_Reload_Succ {
+				err := <-chs[i]
+				logex.Noticef("[instance resp]reload:%v", Dict.Instances)
+				if err != nil || keyAndRespSlice[i].Success != "0" {
+					logex.Warningf("cmd cube online reload of %v:%v, shard:%v failed", inst.AgentIp, inst.AgentPort, inst.Shard)
+					continue
+				}
+				if inst.Status < dict.Instance_Status_Reload_Succ {
+					Dict.Instances[i].Status = dict.Instance_Status_Reload_Succ
+					Dict.Instances[i].ReloadedTime = int(time.Now().Unix())
+					Dict.ReloadSuccInsts++
+				}
 			}
 		}
 		if Dict.ReloadSuccInsts == Dict.InstancesNum {
@@ -179,16 +182,18 @@ func CmdInstsEnable() {
 			}
 		}
 		for i, inst := range Dict.Instances {
-			err := <-chs[i]
-			logex.Noticef("[instance resp]enable:%v", Dict.Instances)
-			if err != nil || keyAndRespSlice[i].Success != "0" {
-				logex.Warningf("cmd cube online enable of %v:%v, shard:%v failed", inst.AgentIp, inst.AgentPort, inst.Shard)
-				continue
-			}
-			if inst.Status < dict.Instance_Status_Enable_Succ {
-				Dict.Instances[i].Status = dict.Instance_Status_Enable_Succ
-				Dict.Instances[i].EnabledTime = int(time.Now().Unix())
-				Dict.EnableSuccInsts++
+			if inst.Status != dict.Instance_Status_Enable_Succ {
+				err := <-chs[i]
+				logex.Noticef("[instance resp]enable:%v", Dict.Instances)
+				if err != nil || keyAndRespSlice[i].Success != "0" {
+					logex.Warningf("cmd cube online enable of %v:%v, shard:%v failed", inst.AgentIp, inst.AgentPort, inst.Shard)
+					continue
+				}
+				if inst.Status < dict.Instance_Status_Enable_Succ {
+					Dict.Instances[i].Status = dict.Instance_Status_Enable_Succ
+					Dict.Instances[i].EnabledTime = int(time.Now().Unix())
+					Dict.EnableSuccInsts++
+				}
 			}
 		}
 		if Dict.EnableSuccInsts == Dict.InstancesNum {

--- a/elastic-ctr/client/demo/elastic_ctr.py
+++ b/elastic-ctr/client/demo/elastic_ctr.py
@@ -19,7 +19,7 @@ import os

 from elastic_ctr_api import ElasticCTRAPI

-BATCH_SIZE = 3
+BATCH_SIZE = 10
 SERVING_IP = "127.0.0.1"
 SLOT_CONF_FILE = "./conf/slot.conf"
 CTR_EMBEDDING_TABLE_SIZE = 100000001
@@ -33,6 +33,59 @@ def str2long(str):
        return int(str)


+def tied_rank(x):
+    """
+    Computes the tied rank of elements in x.
+    This function computes the tied rank of elements in x.
+    Parameters
+    ----------
+    x : list of numbers, numpy array
+    Returns
+    -------
+    score : list of numbers
+            The tied rank f each element in x
+    """
+    sorted_x = sorted(zip(x,range(len(x))))
+    r = [0 for k in x]
+    cur_val = sorted_x[0][0]
+    last_rank = 0
+    for i in range(len(sorted_x)):
+        if cur_val != sorted_x[i][0]:
+            cur_val = sorted_x[i][0]
+            for j in range(last_rank, i): 
+                r[sorted_x[j][1]] = float(last_rank+1+i)/2.0
+            last_rank = i
+        if i==len(sorted_x)-1:
+            for j in range(last_rank, i+1): 
+                r[sorted_x[j][1]] = float(last_rank+i+2)/2.0
+    return r
+
+
+def auc(actual, posterior):
+    """
+    Computes the area under the receiver-operater characteristic (AUC)
+    This function computes the AUC error metric for binary classification.
+    Parameters
+    ----------
+    actual : list of binary numbers, numpy array
+             The ground truth value
+    posterior : same type as actual
+                Defines a ranking on the binary numbers, from most likely to
+                be positive to least likely to be positive.
+    Returns
+    -------
+    score : double
+            The mean squared error between actual and posterior
+    """
+    r = tied_rank(posterior)
+    num_positive = len([0 for x in actual if x==1])
+    num_negative = len(actual)-num_positive
+    sum_positive = sum([r[i] for i in range(len(r)) if actual[i]==1])
+    auc = ((sum_positive - num_positive*(num_positive+1)/2.0) /
+           (num_negative*num_positive))
+    return auc
+
+
 def data_reader(data_file, samples, labels):
    if not os.path.exists(data_file):
        print("Path %s not exist" % data_file)
@@ -66,7 +119,7 @@ def data_reader(data_file, samples, labels):
                    sample[x] = [0]
            samples.append(sample)

-
+            
 if __name__ == "__main__":
    """ main
    """
@@ -89,8 +142,10 @@ if __name__ == "__main__":
        sys.exit(-1)

    ret = data_reader(sys.argv[4], samples, labels)
-
+    print(len(samples))
    correct = 0
+    wrong_label_1_count = 0
+    result_list = []
    for i in range(0, len(samples) - BATCH_SIZE, BATCH_SIZE):
        api.clear()
        batch = samples[i:i + BATCH_SIZE]
@@ -110,6 +165,7 @@ if __name__ == "__main__":

        idx = 0
        for x in predictions:
+            result_list.append(x["prob1"])
            if x["prob0"] >= x["prob1"]:
                pred = 0
            else:
@@ -118,9 +174,14 @@ if __name__ == "__main__":
            if labels[i + idx] == pred:
                correct += 1
            else:
-                print("id=%d predict incorrect: pred=%d label=%d (%f %f)" %
-                      (i + idx, pred, labels[i + idx], x["prob0"], x["prob1"]))
-
+                #if labels[i + idx] == 1:
+                #    wrong_label_1_count += 1
+                #    print("error label=1 count", wrong_label_1_count)
+                #print("id=%d predict incorrect: pred=%d label=%d (%f %f)" %
+                #      (i + idx, pred, labels[i + idx], x["prob0"], x["prob1"]))
+                pass
            idx = idx + 1
+    

-    print("Acc=%f" % (float(correct) / len(samples)))
+    #print("Acc=%f" % (float(correct) / len(samples)))
+    print("auc = ", auc(labels, result_list) )