diff --git a/.gitignore b/.gitignore index 2b46c0cf61cfeea586c4e3d8ec5dcfe9091b230f..cfa88408d0b4ac0e81c34937226768c96bc0117b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.o output +.idea/ *~ diff --git a/BCLOUD b/BCLOUD deleted file mode 100755 index c813028dd740c6ddcc3530a8a6d4ddf543399691..0000000000000000000000000000000000000000 --- a/BCLOUD +++ /dev/null @@ -1,8 +0,0 @@ -WORKROOT('../../../') -COMPILER('gcc82') -CPPFLAGS('-D_GNU_SOURCE -DNDEBUG') -GLOBAL_CFLAGS_STR = '-g -O3 -pipe -fopenmp ' -CFLAGS(GLOBAL_CFLAGS_STR) -GLOBAL_CXXFLAGS_STR = GLOBAL_CFLAGS_STR + ' -std=c++11 ' -CXXFLAGS(GLOBAL_CXXFLAGS_STR) -CONFIGS('baidu/paddlepaddle/paddle@feed_paddle_gcc8@git_branch') diff --git a/build.sh b/build.sh deleted file mode 100755 index d4a61f19615e53ad5ff718a66d8ebab49c9c713b..0000000000000000000000000000000000000000 --- a/build.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!bash -RUN_DIR="$(cd "$(dirname "$0")"&&pwd)" -cd ${RUN_DIR} -build_mode=$1 -function print_usage() { - echo "++++++++++++++++++++++++++++++++++++++++++++++++++++" - echo "sh build.sh all|make|clean" - echo "- all: will update all env && make it" - echo "- make: just do make, never update env" - echo "- clean: make clean" - echo "++++++++++++++++++++++++++++++++++++++++++++++++++++" - exit 0 -} -if [ $# -lt 1 ];then - print_usage -fi - -cd ~ -user_dir=`pwd` -cd - - -python_binary=${user_dir}/.jumbo/bin/python2.7 -python_library=${user_dir}/.jumbo/lib/python2.7.so -python_include_dir=${user_dir}/.jumbo/include/python2.7 -if [ ! -f ${python_binary} ];then - echo "Miss python ${python_binary}, please install with this cmd: jumbo install python" - exit -1 -fi - - -function copy_paddle_env() { - cd ${RUN_DIR} - rm -rf build_env - mkdir build_env - echo "xxh copy" - cp -r ../../paddlepaddle/paddle/* build_env - cp -r feed ./build_env/paddlepaddle/paddle/paddle/fluid/ - cd build_env -} - -function apply_feed_code() { - #apply feed code - if [ -f "paddle/fluid/feed/apply_feed_code.sh" ];then - sh paddle/fluid/feed/apply_feed_code.sh - fi -} - -function makeit() { - cd build - make -j8 - cd .. -} - -function cmake_all() { - mkdir build - cd build - #make clean - cmake -DCMAKE_INSTALL_PREFIX=./output/ -DCMAKE_BUILD_TYPE=Release -DWITH_PYTHON=ON -DWITH_MKL=OFF -DWITH_GPU=OFF -DWITH_PSLIB=ON -DPYTHON_INCLUDE_DIR=${python_include_dir} -DPYTHON_LIBRARY=${python_library} -DPYTHON_EXECUTABLE=${python_binary} .. - cd .. -} - -if [ ! -d build_env ];then - copy_paddle_env -fi -cd ${RUN_DIR}/build_env - -if [ "${build_mode}" = "all" ];then - cmake_all - makeit -elif [ "${build_mode}" = "make" ];then - makeit -elif "${build_mode}" = "clean" ];then - copy_paddle_env - #cd build - #make clean -fi diff --git a/feed/CMakeLists.txt b/feed/CMakeLists.txt deleted file mode 100755 index 88ccf29d47342105b079dd48f79072cb5e3b3479..0000000000000000000000000000000000000000 --- a/feed/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_subdirectory(src) -add_subdirectory(pybind) -add_subdirectory(tool) diff --git a/feed/apply_feed_code.sh b/feed/apply_feed_code.sh deleted file mode 100755 index a5a2e1e6924674ad3275b5014af0a459856feda8..0000000000000000000000000000000000000000 --- a/feed/apply_feed_code.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!bash - -#将FEED定制化代码生效到Paddle代码库(如FEED插件注册) 编译前执行 - - -function fatal_log() { - echo "$1" - exit -1 -} - -#处理pybind 拓展 -function apply_pybind() { - pybind_file='paddle/fluid/pybind/pybind.cc' - if [ ! -f ${pybind_file} ];then - fatal_log "Missing Requied File:${pybind_file}" - fi - - find_inferece_api=`grep 'inference_api.h' ${pybind_file} |wc -l` - if [ ${find_inferece_api} -ne 1 ];then - fatal_log "Missing inference_api.h, Need Code Adjust" - fi - find_inferece_api=`grep 'BindInferenceApi' ${pybind_file} |wc -l` - if [ ${find_inferece_api} -ne 1 ];then - fatal_log "Missing BindInferenceApi, Need Code Adjust" - fi - - makefile='paddle/fluid/pybind/CMakeLists.txt' - if [ ! -f ${makefile} ];then - fatal_log "Missing Requied File:${makefile}" - fi - - sed -i '/expand_api/d' ${pybind_file} - sed -i '/BindExpandApi/d' ${pybind_file} - sed -i '/feed_data_set/d' ${makefile} - sed -i '/feed_paddle_pybind/d' ${makefile} - sed -i '/APPEND PYBIND_DEPS fs/d' ${makefile} - - sed -i '/inference_api.h/a\#include "paddle/fluid/feed/pybind/expand_api.h"' ${pybind_file} - sed -i '/BindInferenceApi/a\ BindExpandApi(&m);' ${pybind_file} - sed -i '/set(PYBIND_SRCS/i\list(APPEND PYBIND_DEPS feed_data_set)' ${makefile} - sed -i '/set(PYBIND_SRCS/i\list(APPEND PYBIND_DEPS feed_paddle_pybind)' ${makefile} - sed -i '/set(PYBIND_SRCS/i\list(APPEND PYBIND_DEPS fs)' ${makefile} -} - -function apply_feed_src() { - makefile='paddle/fluid/CMakeLists.txt' - if [ ! -f ${makefile} ];then - fatal_log "Missing Requied File:${makefile}" - fi - find_py=`grep 'pybind' ${makefile} |wc -l` - if [ ${find_py} -ne 1 ];then - fatal_log "Missing pybind, Need Code Adjust" - fi - sed -i '/feed/d' ${makefile} - sed -i '/pybind/i\add_subdirectory(feed)' ${makefile} - - dataset_file='paddle/fluid/framework/dataset_factory.cc' - if [ ! -f ${dataset_file} ];then - fatal_log "Missing Requied File:${dataset_file}" - fi - sed -i '/FeedMultiSlotDataset/d' ${dataset_file} - sed -i '/data_reader/d' ${dataset_file} - sed -i '/REGISTER_DATASET_CLASS(MultiSlotDataset)/a\REGISTER_DATASET_CLASS(FeedMultiSlotDataset);' ${dataset_file} - sed -i '/data_set.h/a\#include "paddle/fluid/feed/src/data_reader/data_set.h"' ${dataset_file} - sed -i '/feed_data_set/d' paddle/fluid/framework/CMakeLists.txt - #sed -i '/target_link_libraries(executor/a\target_link_libraries(feed_data_set)' paddle/fluid/framework/CMakeLists.txt - #sed -i '/target_link_libraries(executor/a\add_dependencies(feed_data_set)' paddle/fluid/framework/CMakeLists.txt -} - -apply_pybind -apply_feed_src - diff --git a/feed/feed_deploy/news_jingpai/job.sh b/feed/feed_deploy/news_jingpai/job.sh deleted file mode 100755 index 034682ae47865199a51562572308928af9d88770..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/job.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -WORKDIR=`pwd` - -echo "current:"$WORKDIR - -mpirun -npernode 1 mv package/* ./ - -export LIBRARY_PATH=$WORKDIR/python/lib:$LIBRARY_PATH -export HADOOP_HOME="${WORKDIR}/hadoop-client/hadoop" - -ulimit -c unlimited - -mpirun -npernode 1 sh clear_ssd.sh $WORKDIR -mpirun -npernode 2 -timestamp-output -tag-output python/bin/python -u trainer_online.py - -if [[ $? -ne 0 ]]; then - echo "Failed to run mpi!" 1>&2 - exit 1 -fi diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/all_slot.dict b/feed/feed_deploy/news_jingpai/package/my_nets/all_slot.dict deleted file mode 100644 index 8ad76f38e0ab440344be9c05a902a89c730398bd..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/all_slot.dict +++ /dev/null @@ -1,409 +0,0 @@ -6048 -6002 -6145 -6202 -6201 -6121 -6738 -6119 -6146 -6120 -6147 -6122 -6123 -6118 -6142 -6143 -6008 -6148 -6151 -6127 -6144 -6094 -6083 -6952 -6739 -6150 -6109 -6003 -6099 -6149 -6129 -6203 -6153 -6152 -6128 -6106 -6251 -7082 -7515 -6951 -6949 -7080 -6066 -7507 -6186 -6007 -7514 -6125 -7506 -10001 -6006 -7023 -6085 -10000 -6098 -6250 -6110 -6124 -6090 -6082 -6067 -6101 -6004 -6191 -7075 -6948 -6157 -6126 -6188 -7077 -6070 -6111 -6087 -6103 -6107 -6194 -6156 -6005 -6247 -6814 -6158 -7122 -6058 -6189 -7058 -6059 -6115 -7079 -7081 -6833 -7024 -6108 -13342 -13345 -13412 -13343 -13350 -13346 -13409 -6009 -6011 -6012 -6013 -6014 -6015 -6019 -6023 -6024 -6027 -6029 -6031 -6050 -6060 -6068 -6069 -6089 -6095 -6105 -6112 -6130 -6131 -6132 -6134 -6161 -6162 -6163 -6166 -6182 -6183 -6185 -6190 -6212 -6213 -6231 -6233 -6234 -6236 -6238 -6239 -6240 -6241 -6242 -6243 -6244 -6245 -6354 -7002 -7005 -7008 -7010 -7013 -7015 -7019 -7020 -7045 -7046 -7048 -7049 -7052 -7054 -7056 -7064 -7066 -7076 -7078 -7083 -7084 -7085 -7086 -7087 -7088 -7089 -7090 -7099 -7100 -7101 -7102 -7103 -7104 -7105 -7109 -7124 -7126 -7136 -7142 -7143 -7144 -7145 -7146 -7147 -7148 -7150 -7151 -7152 -7153 -7154 -7155 -7156 -7157 -7047 -7050 -6257 -6259 -6260 -6261 -7170 -7185 -7186 -6751 -6755 -6757 -6759 -6760 -6763 -6764 -6765 -6766 -6767 -6768 -6769 -6770 -7502 -7503 -7504 -7505 -7510 -7511 -7512 -7513 -6806 -6807 -6808 -6809 -6810 -6811 -6812 -6813 -6815 -6816 -6817 -6819 -6823 -6828 -6831 -6840 -6845 -6875 -6879 -6881 -6888 -6889 -6947 -6950 -6956 -6957 -6959 -10006 -10008 -10009 -10010 -10011 -10016 -10017 -10018 -10019 -10020 -10021 -10022 -10023 -10024 -10029 -10030 -10031 -10032 -10033 -10034 -10035 -10036 -10037 -10038 -10039 -10040 -10041 -10042 -10044 -10045 -10046 -10051 -10052 -10053 -10054 -10055 -10056 -10057 -10060 -10066 -10069 -6820 -6821 -6822 -13333 -13334 -13335 -13336 -13337 -13338 -13339 -13340 -13341 -13351 -13352 -13353 -13359 -13361 -13362 -13363 -13366 -13367 -13368 -13369 -13370 -13371 -13375 -13376 -5700 -5702 -13400 -13401 -13402 -13403 -13404 -13406 -13407 -13408 -13410 -13417 -13418 -13419 -13420 -13422 -13425 -13427 -13428 -13429 -13430 -13431 -13433 -13434 -13436 -13437 -13326 -13330 -13331 -5717 -13442 -13451 -13452 -13455 -13456 -13457 -13458 -13459 -13460 -13461 -13462 -13463 -13464 -13465 -13466 -13467 -13468 -1104 -1106 -1107 -1108 -1109 -1110 -1111 -1112 -1113 -1114 -1115 -1116 -1117 -1119 -1120 -1121 -1122 -1123 -1124 -1125 -1126 -1127 -1128 -1129 -13812 -13813 -6740 -1490 -32915 -32950 -32952 -32953 -32954 -33077 -33085 -33086 -12345 -23456 diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/clear_ssd.sh b/feed/feed_deploy/news_jingpai/package/my_nets/clear_ssd.sh deleted file mode 100644 index a26c21a0f577623e9c9b90d353b0b090ad212d04..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/clear_ssd.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!bash - -function check_appid_valid() { - appid="$1" - num=`echo "${appid}" |awk -F '-' '{print NF}'` - if [ $num -ne 4 ];then - return 1 - fi - return 0 -} - -function appid_running_num() { - appid="$1" - proc_num=`ps -ef |grep "${appid}"|grep -v grep|wc -l` - if [ $? -ne 0 ];then - #if failed, return 1, avoid - return 1 - fi - return ${proc_num} -} - -work_dir="$1" -base_dir=`echo "${work_dir}" |awk -F 'app-user-' '{print $1}'` -database_list=`find ${base_dir} -type d -name 'database'` -for element in ${database_list[@]} -do - app_id=`echo "$element"|awk -F 'app-user-' '{print $2}' |awk -F '/' '{print "app-user-"$1}'` - check_appid_valid "${app_id}" - if [ $? -ne 0 ];then - continue - fi - appid_running_num "${app_id}" - if [ $? -eq 0 ];then - echo "remove ${element}" - rm -rf ${element} - fi -done - diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/config.py b/feed/feed_deploy/news_jingpai/package/my_nets/config.py deleted file mode 100644 index 185c68423e84a9b93ef62e00196023b259e48681..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/config.py +++ /dev/null @@ -1,40 +0,0 @@ -dataset_type="InMemoryDataset" -batch_size=32 -thread_num=12 -shuffle_thread=12 -preload_thread=12 -join_common_thread=16 -update_thread=12 -fs_name="afs://xingtian.afs.baidu.com:9902" -fs_ugi="mlarch_pro,proisvip" -train_data_path=["afs:/user/feed/mlarch/samplejoin/mondr_shoubai_dnn_master/feasign"] -init_model_path="" -days="{20191201..20191231} {20200101..20200131} {20200201..20200228} {20200301..20200331}" -hours="{0..23}" -split_interval=5 -split_per_pass=2 -is_data_hourly_placed=False -save_first_base=False -output_path="afs:/user/feed/mlarch/model/feed_muye_news_paddle" -pipe_command="./read_feasign | python/bin/python ins_weight.py | awk -f format_newcate_hotnews.awk | ./parse_feasign all_slot.dict" -save_xbox_before_update=True -check_exist_seconds=30 -checkpoint_per_pass=36 -save_delta_frequency=6 -prefetch=True -write_stdout_frequency=10 - -need_reqi_changeslot=True -hdfs_dnn_plugin_path="afs:/user/feed/mlarch/sequence_generator/wuzhihua02/xujiaqi/test_combinejoincommon_0918_amd/20191006/base/dnn_plugin" -reqi_dnn_plugin_day=20191006 -reqi_dnn_plugin_pass=0 - -task_name="feed_production_shoubai_video_ctr_fsort_session_cut" -nodes=119 -node_memory=100000 -mpi_server="yq01-hpc-lvliang01-smart-master.dmop.baidu.com" -mpi_queue="feed5" -mpi_priority="very_high" -smart_client_home="/home/work/xiexionghang/news_paddle_online/smart_client/" -local_hadoop_home="/home/work/xiexionghang/news_paddle_online/hadoop-client/hadoop" -sparse_table_storage="ssd" diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/data_generate_base.py b/feed/feed_deploy/news_jingpai/package/my_nets/data_generate_base.py deleted file mode 100644 index 7abce3bd3bfeea6a442a371b6c40a6c113ce605f..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/data_generate_base.py +++ /dev/null @@ -1,358 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys - -__all__ = ['MultiSlotDataGenerator'] - - -class DataGenerator(object): - """ - DataGenerator is a general Base class for user to inherit - A user who wants to define his/her own python processing logic - with paddle.fluid.dataset should inherit this class. - """ - - def __init__(self): - self._proto_info = None - self.batch_size_ = 32 - - def _set_line_limit(self, line_limit): - if not isinstance(line_limit, int): - raise ValueError("line_limit%s must be in int type" % - type(line_limit)) - if line_limit < 1: - raise ValueError("line_limit can not less than 1") - self._line_limit = line_limit - - def set_batch(self, batch_size): - ''' - Set batch size of current DataGenerator - This is necessary only if a user wants to define generator_batch - - Example: - - .. code-block:: python - import paddle.fluid.incubate.data_generator as dg - class MyData(dg.DataGenerator): - - def generate_sample(self, line): - def local_iter(): - int_words = [int(x) for x in line.split()] - yield ("words", int_words) - return local_iter - - def generate_batch(self, samples): - def local_iter(): - for s in samples: - yield ("words", s[1].extend([s[1][0]])) - mydata = MyData() - mydata.set_batch(128) - - ''' - self.batch_size_ = batch_size - - def run_from_memory(self): - ''' - This function generator data from memory, it is usually used for - debug and benchmarking - - Example: - .. code-block:: python - import paddle.fluid.incubate.data_generator as dg - class MyData(dg.DataGenerator): - - def generate_sample(self, line): - def local_iter(): - yield ("words", [1, 2, 3, 4]) - return local_iter - - mydata = MyData() - mydata.run_from_memory() - ''' - batch_samples = [] - line_iter = self.generate_sample(None) - for user_parsed_line in line_iter(): - if user_parsed_line == None: - continue - batch_samples.append(user_parsed_line) - if len(batch_samples) == self.batch_size_: - batch_iter = self.generate_batch(batch_samples) - for sample in batch_iter(): - sys.stdout.write(self._gen_str(sample)) - batch_samples = [] - if len(batch_samples) > 0: - batch_iter = self.generate_batch(batch_samples) - for sample in batch_iter(): - sys.stdout.write(self._gen_str(sample)) - - - def run_from_stdin(self): - ''' - This function reads the data row from stdin, parses it with the - process function, and further parses the return value of the - process function with the _gen_str function. The parsed data will - be wrote to stdout and the corresponding protofile will be - generated. - - Example: - - .. code-block:: python - import paddle.fluid.incubate.data_generator as dg - class MyData(dg.DataGenerator): - - def generate_sample(self, line): - def local_iter(): - int_words = [int(x) for x in line.split()] - yield ("words", [int_words]) - return local_iter - - mydata = MyData() - mydata.run_from_stdin() - - ''' - batch_samples = [] - for line in sys.stdin: - line_iter = self.generate_sample(line) - for user_parsed_line in line_iter(): - if user_parsed_line == None: - continue - batch_samples.append(user_parsed_line) - if len(batch_samples) == self.batch_size_: - batch_iter = self.generate_batch(batch_samples) - for sample in batch_iter(): - sys.stdout.write(self._gen_str(sample)) - batch_samples = [] - if len(batch_samples) > 0: - batch_iter = self.generate_batch(batch_samples) - for sample in batch_iter(): - sys.stdout.write(self._gen_str(sample)) - - def _gen_str(self, line): - ''' - Further processing the output of the process() function rewritten by - user, outputting data that can be directly read by the datafeed,and - updating proto_info infomation. - - Args: - line(str): the output of the process() function rewritten by user. - - Returns: - Return a string data that can be read directly by the datafeed. - ''' - raise NotImplementedError( - "pls use MultiSlotDataGenerator or PairWiseDataGenerator") - - def generate_sample(self, line): - ''' - This function needs to be overridden by the user to process the - original data row into a list or tuple. - - Args: - line(str): the original data row - - Returns: - Returns the data processed by the user. - The data format is list or tuple: - [(name, [feasign, ...]), ...] - or ((name, [feasign, ...]), ...) - - For example: - [("words", [1926, 08, 17]), ("label", [1])] - or (("words", [1926, 08, 17]), ("label", [1])) - - Note: - The type of feasigns must be in int or float. Once the float - element appears in the feasign, the type of that slot will be - processed into a float. - - Example: - - .. code-block:: python - import paddle.fluid.incubate.data_generator as dg - class MyData(dg.DataGenerator): - - def generate_sample(self, line): - def local_iter(): - int_words = [int(x) for x in line.split()] - yield ("words", [int_words]) - return local_iter - - ''' - raise NotImplementedError( - "Please rewrite this function to return a list or tuple: " + - "[(name, [feasign, ...]), ...] or ((name, [feasign, ...]), ...)") - - def generate_batch(self, samples): - ''' - This function needs to be overridden by the user to process the - generated samples from generate_sample(self, str) function - It is usually used as batch processing when a user wants to - do preprocessing on a batch of samples, e.g. padding according to - the max length of a sample in the batch - - Args: - samples(list tuple): generated sample from generate_sample - - Returns: - a python generator, the same format as return value of generate_sample - - Example: - - .. code-block:: python - import paddle.fluid.incubate.data_generator as dg - class MyData(dg.DataGenerator): - - def generate_sample(self, line): - def local_iter(): - int_words = [int(x) for x in line.split()] - yield ("words", int_words) - return local_iter - - def generate_batch(self, samples): - def local_iter(): - for s in samples: - yield ("words", s[1].extend([s[1][0]])) - mydata = MyData() - mydata.set_batch(128) - ''' - - def local_iter(): - for sample in samples: - yield sample - - return local_iter - - -class MultiSlotDataGenerator(DataGenerator): - - def _gen_str(self, line): - ''' - Further processing the output of the process() function rewritten by - user, outputting data that can be directly read by the MultiSlotDataFeed, - and updating proto_info infomation. - - The input line will be in this format: - >>> [(name, [feasign, ...]), ...] - >>> or ((name, [feasign, ...]), ...) - The output will be in this format: - >>> [ids_num id1 id2 ...] ... - The proto_info will be in this format: - >>> [(name, type), ...] - - For example, if the input is like this: - >>> [("words", [1926, 08, 17]), ("label", [1])] - >>> or (("words", [1926, 08, 17]), ("label", [1])) - the output will be: - >>> 3 1234 2345 3456 1 1 - the proto_info will be: - >>> [("words", "uint64"), ("label", "uint64")] - - Args: - line(str): the output of the process() function rewritten by user. - - Returns: - Return a string data that can be read directly by the MultiSlotDataFeed. - ''' - if not isinstance(line, list) and not isinstance(line, tuple): - raise ValueError( - "the output of process() must be in list or tuple type") - output = "" - - for index, item in enumerate(line): - name, elements = item - if output: - output += " " - out_str = [] - out_str.append(str(len(elements))) - out_str.extend(elements) - output += " ".join(out_str) - return output + "\n" - - if self._proto_info is None: - self._proto_info = [] - for index, item in enumerate(line): - name, elements = item - ''' - if not isinstance(name, str): - raise ValueError("name%s must be in str type" % type(name)) - if not isinstance(elements, list): - raise ValueError("elements%s must be in list type" % - type(elements)) - if not elements: - raise ValueError( - "the elements of each field can not be empty, you need padding it in process()." - ) - self._proto_info.append((name, "uint64")) - if output: - output += " " - output += str(len(elements)) - for elem in elements: - if isinstance(elem, float): - self._proto_info[-1] = (name, "float") - elif not isinstance(elem, int) and not isinstance(elem, - long): - raise ValueError( - "the type of element%s must be in int or float" % - type(elem)) - output += " " + str(elem) - ''' - if output: - output += " " - out_str = [] - out_str.append(str(len(elements))) - out_str.extend(elements) - output += " ".join(out_str) - else: - if len(line) != len(self._proto_info): - raise ValueError( - "the complete field set of two given line are inconsistent.") - for index, item in enumerate(line): - name, elements = item - ''' - if not isinstance(name, str): - raise ValueError("name%s must be in str type" % type(name)) - if not isinstance(elements, list): - raise ValueError("elements%s must be in list type" % - type(elements)) - if not elements: - raise ValueError( - "the elements of each field can not be empty, you need padding it in process()." - ) - if name != self._proto_info[index][0]: - raise ValueError( - "the field name of two given line are not match: require<%s>, get<%s>." - % (self._proto_info[index][0], name)) - ''' - if output: - output += " " - out_str = [] - out_str.append(str(len(elements))) - #out_str.extend([str(x) for x in elements]) - out_str.extend(elements) - output += " ".join(out_str) - ''' - for elem in elements: - if self._proto_info[index][1] != "float": - if isinstance(elem, float): - self._proto_info[index] = (name, "float") - elif not isinstance(elem, int) and not isinstance(elem, - long): - raise ValueError( - "the type of element%s must be in int or float" - % type(elem)) - output += " " + str(elem) - ''' - return output + "\n" diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/fleet_desc_combinejoincommon.prototxt b/feed/feed_deploy/news_jingpai/package/my_nets/fleet_desc_combinejoincommon.prototxt deleted file mode 100644 index e29be5c4794e9e288a9578f52ee739f02d4f78df..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/fleet_desc_combinejoincommon.prototxt +++ /dev/null @@ -1,1466 +0,0 @@ -server_param { - downpour_server_param { - downpour_table_param { - table_id: 0 - table_class: "DownpourSparseTable" - shard_num: 1950 - accessor { - accessor_class: "DownpourCtrAccessor" - sparse_sgd_param { - learning_rate: 0.05 - initial_g2sum: 3.0 - initial_range: 0.0001 - weight_bounds: -10.0 - weight_bounds: 10.0 - } - fea_dim: 11 - embedx_dim: 8 - embedx_threshold: 10 - downpour_accessor_param { - nonclk_coeff: 0.1 - click_coeff: 1 - base_threshold: 1.5 - delta_threshold: 0.25 - delta_keep_days: 16 - delete_after_unseen_days: 30 - show_click_decay_rate: 0.98 - delete_threshold: 0.8 - } - table_accessor_save_param { - param: 1 - converter: "(scripts/xbox_compressor_mf.py | bin/xbox_pb_converter)" - deconverter: "(bin/xbox_pb_deconverter | scripts/xbox_decompressor_mf.awk)" - } - table_accessor_save_param { - param: 2 - converter: "(scripts/xbox_compressor_mf.py | bin/xbox_pb_converter)" - deconverter: "(bin/xbox_pb_deconverter | scripts/xbox_decompressor_mf.awk)" - } - } - type: PS_SPARSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 1 - table_class: "DownpourDenseTable" - accessor { - accessor_class: "DownpourDenseValueAccessor" - dense_sgd_param { - name: "adam" - adam { - learning_rate: 5e-06 - avg_decay_rate: 0.999993 - ada_decay_rate: 0.9999 - ada_epsilon: 1e-08 - mom_decay_rate: 0.99 - } - naive { - learning_rate: 0.0002 - } - } - fea_dim: 3405365 - } - type: PS_DENSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 2 - table_class: "DownpourDenseDoubleTable" - accessor { - accessor_class: "DownpourDenseValueDoubleAccessor" - dense_sgd_param { - name: "summarydouble" - summary { - summary_decay_rate: 0.999999 - } - } - fea_dim: 16731 - } - type: PS_DENSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 3 - table_class: "DownpourDenseTable" - accessor { - accessor_class: "DownpourDenseValueAccessor" - dense_sgd_param { - name: "adam" - adam { - learning_rate: 5e-06 - avg_decay_rate: 0.999993 - ada_decay_rate: 0.9999 - ada_epsilon: 1e-08 - mom_decay_rate: 0.99 - } - naive { - learning_rate: 0.0002 - } - } - fea_dim: 2072615 - } - type: PS_DENSE_TABLE - compress_in_save: true - } - service_param { - server_class: "DownpourBrpcPsServer" - client_class: "DownpourBrpcPsClient" - service_class: "DownpourPsService" - start_server_port: 0 - server_thread_num: 12 - } - } -} -trainer_param { - dense_table { - table_id: 1 - - dense_variable_name: "join_0.w_0" - dense_variable_name: "join_0.b_0" - dense_variable_name: "join_1.w_0" - dense_variable_name: "join_1.b_0" - dense_variable_name: "join_2.w_0" - dense_variable_name: "join_2.b_0" - dense_variable_name: "join_3.w_0" - dense_variable_name: "join_3.b_0" - dense_variable_name: "join_4.w_0" - dense_variable_name: "join_4.b_0" - dense_variable_name: "join_5.w_0" - dense_variable_name: "join_5.b_0" - dense_variable_name: "join_6.w_0" - dense_variable_name: "join_6.b_0" - dense_variable_name: "join_7.w_0" - dense_variable_name: "join_7.b_0" - - dense_variable_name: "common_0.w_0" - dense_variable_name: "common_0.b_0" - dense_variable_name: "common_1.w_0" - dense_variable_name: "common_1.b_0" - dense_variable_name: "common_2.w_0" - dense_variable_name: "common_2.b_0" - dense_variable_name: "common_3.w_0" - dense_variable_name: "common_3.b_0" - dense_variable_name: "common_4.w_0" - dense_variable_name: "common_4.b_0" - dense_variable_name: "common_5.w_0" - dense_variable_name: "common_5.b_0" - dense_variable_name: "common_6.w_0" - dense_variable_name: "common_6.b_0" - dense_variable_name: "common_7.w_0" - dense_variable_name: "common_7.b_0" - - dense_gradient_variable_name: "join_0.w_0@GRAD" - dense_gradient_variable_name: "join_0.b_0@GRAD" - dense_gradient_variable_name: "join_1.w_0@GRAD" - dense_gradient_variable_name: "join_1.b_0@GRAD" - dense_gradient_variable_name: "join_2.w_0@GRAD" - dense_gradient_variable_name: "join_2.b_0@GRAD" - dense_gradient_variable_name: "join_3.w_0@GRAD" - dense_gradient_variable_name: "join_3.b_0@GRAD" - dense_gradient_variable_name: "join_4.w_0@GRAD" - dense_gradient_variable_name: "join_4.b_0@GRAD" - dense_gradient_variable_name: "join_5.w_0@GRAD" - dense_gradient_variable_name: "join_5.b_0@GRAD" - dense_gradient_variable_name: "join_6.w_0@GRAD" - dense_gradient_variable_name: "join_6.b_0@GRAD" - dense_gradient_variable_name: "join_7.w_0@GRAD" - dense_gradient_variable_name: "join_7.b_0@GRAD" - - dense_gradient_variable_name: "common_0.w_0@GRAD" - dense_gradient_variable_name: "common_0.b_0@GRAD" - dense_gradient_variable_name: "common_1.w_0@GRAD" - dense_gradient_variable_name: "common_1.b_0@GRAD" - dense_gradient_variable_name: "common_2.w_0@GRAD" - dense_gradient_variable_name: "common_2.b_0@GRAD" - dense_gradient_variable_name: "common_3.w_0@GRAD" - dense_gradient_variable_name: "common_3.b_0@GRAD" - dense_gradient_variable_name: "common_4.w_0@GRAD" - dense_gradient_variable_name: "common_4.b_0@GRAD" - dense_gradient_variable_name: "common_5.w_0@GRAD" - dense_gradient_variable_name: "common_5.b_0@GRAD" - dense_gradient_variable_name: "common_6.w_0@GRAD" - dense_gradient_variable_name: "common_6.b_0@GRAD" - dense_gradient_variable_name: "common_7.w_0@GRAD" - dense_gradient_variable_name: "common_7.b_0@GRAD" - } - dense_table { - table_id: 2 - dense_variable_name: "join.batch_size" - dense_variable_name: "join.batch_sum" - dense_variable_name: "join.batch_square_sum" - - dense_variable_name: "common.batch_size" - dense_variable_name: "common.batch_sum" - dense_variable_name: "common.batch_square_sum" - - dense_gradient_variable_name: "join.batch_size@GRAD" - dense_gradient_variable_name: "join.batch_sum@GRAD" - dense_gradient_variable_name: "join.batch_square_sum@GRAD" - - dense_gradient_variable_name: "common.batch_size@GRAD" - dense_gradient_variable_name: "common.batch_sum@GRAD" - dense_gradient_variable_name: "common.batch_square_sum@GRAD" - } - dense_table { - table_id: 3 - dense_variable_name: "fc_0.w_0" - dense_variable_name: "fc_0.b_0" - dense_variable_name: "fc_1.w_0" - dense_variable_name: "fc_1.b_0" - dense_variable_name: "fc_2.w_0" - dense_variable_name: "fc_2.b_0" - dense_variable_name: "fc_3.w_0" - dense_variable_name: "fc_3.b_0" - dense_variable_name: "fc_4.w_0" - dense_variable_name: "fc_4.b_0" - dense_variable_name: "fc_5.w_0" - dense_variable_name: "fc_5.b_0" - dense_gradient_variable_name: "fc_0.w_0@GRAD" - dense_gradient_variable_name: "fc_0.b_0@GRAD" - dense_gradient_variable_name: "fc_1.w_0@GRAD" - dense_gradient_variable_name: "fc_1.b_0@GRAD" - dense_gradient_variable_name: "fc_2.w_0@GRAD" - dense_gradient_variable_name: "fc_2.b_0@GRAD" - dense_gradient_variable_name: "fc_3.w_0@GRAD" - dense_gradient_variable_name: "fc_3.b_0@GRAD" - dense_gradient_variable_name: "fc_4.w_0@GRAD" - dense_gradient_variable_name: "fc_4.b_0@GRAD" - dense_gradient_variable_name: "fc_5.w_0@GRAD" - dense_gradient_variable_name: "fc_5.b_0@GRAD" - } - sparse_table { - table_id: 0 - slot_key: "6048" - slot_key: "6002" - slot_key: "6145" - slot_key: "6202" - slot_key: "6201" - slot_key: "6121" - slot_key: "6738" - slot_key: "6119" - slot_key: "6146" - slot_key: "6120" - slot_key: "6147" - slot_key: "6122" - slot_key: "6123" - slot_key: "6118" - slot_key: "6142" - slot_key: "6143" - slot_key: "6008" - slot_key: "6148" - slot_key: "6151" - slot_key: "6127" - slot_key: "6144" - slot_key: "6094" - slot_key: "6083" - slot_key: "6952" - slot_key: "6739" - slot_key: "6150" - slot_key: "6109" - slot_key: "6003" - slot_key: "6099" - slot_key: "6149" - slot_key: "6129" - slot_key: "6203" - slot_key: "6153" - slot_key: "6152" - slot_key: "6128" - slot_key: "6106" - slot_key: "6251" - slot_key: "7082" - slot_key: "7515" - slot_key: "6951" - slot_key: "6949" - slot_key: "7080" - slot_key: "6066" - slot_key: "7507" - slot_key: "6186" - slot_key: "6007" - slot_key: "7514" - slot_key: "6125" - slot_key: "7506" - slot_key: "10001" - slot_key: "6006" - slot_key: "7023" - slot_key: "6085" - slot_key: "10000" - slot_key: "6098" - slot_key: "6250" - slot_key: "6110" - slot_key: "6124" - slot_key: "6090" - slot_key: "6082" - slot_key: "6067" - slot_key: "6101" - slot_key: "6004" - slot_key: "6191" - slot_key: "7075" - slot_key: "6948" - slot_key: "6157" - slot_key: "6126" - slot_key: "6188" - slot_key: "7077" - slot_key: "6070" - slot_key: "6111" - slot_key: "6087" - slot_key: "6103" - slot_key: "6107" - slot_key: "6194" - slot_key: "6156" - slot_key: "6005" - slot_key: "6247" - slot_key: "6814" - slot_key: "6158" - slot_key: "7122" - slot_key: "6058" - slot_key: "6189" - slot_key: "7058" - slot_key: "6059" - slot_key: "6115" - slot_key: "7079" - slot_key: "7081" - slot_key: "6833" - slot_key: "7024" - slot_key: "6108" - slot_key: "13342" - slot_key: "13345" - slot_key: "13412" - slot_key: "13343" - slot_key: "13350" - slot_key: "13346" - slot_key: "13409" - slot_key: "6009" - slot_key: "6011" - slot_key: "6012" - slot_key: "6013" - slot_key: "6014" - slot_key: "6015" - slot_key: "6019" - slot_key: "6023" - slot_key: "6024" - slot_key: "6027" - slot_key: "6029" - slot_key: "6031" - slot_key: "6050" - slot_key: "6060" - slot_key: "6068" - slot_key: "6069" - slot_key: "6089" - slot_key: "6095" - slot_key: "6105" - slot_key: "6112" - slot_key: "6130" - slot_key: "6131" - slot_key: "6132" - slot_key: "6134" - slot_key: "6161" - slot_key: "6162" - slot_key: "6163" - slot_key: "6166" - slot_key: "6182" - slot_key: "6183" - slot_key: "6185" - slot_key: "6190" - slot_key: "6212" - slot_key: "6213" - slot_key: "6231" - slot_key: "6233" - slot_key: "6234" - slot_key: "6236" - slot_key: "6238" - slot_key: "6239" - slot_key: "6240" - slot_key: "6241" - slot_key: "6242" - slot_key: "6243" - slot_key: "6244" - slot_key: "6245" - slot_key: "6354" - slot_key: "7002" - slot_key: "7005" - slot_key: "7008" - slot_key: "7010" - slot_key: "7012" - slot_key: "7013" - slot_key: "7015" - slot_key: "7016" - slot_key: "7017" - slot_key: "7018" - slot_key: "7019" - slot_key: "7020" - slot_key: "7045" - slot_key: "7046" - slot_key: "7048" - slot_key: "7049" - slot_key: "7052" - slot_key: "7054" - slot_key: "7056" - slot_key: "7064" - slot_key: "7066" - slot_key: "7076" - slot_key: "7078" - slot_key: "7083" - slot_key: "7084" - slot_key: "7085" - slot_key: "7086" - slot_key: "7087" - slot_key: "7088" - slot_key: "7089" - slot_key: "7090" - slot_key: "7099" - slot_key: "7100" - slot_key: "7101" - slot_key: "7102" - slot_key: "7103" - slot_key: "7104" - slot_key: "7105" - slot_key: "7109" - slot_key: "7124" - slot_key: "7126" - slot_key: "7136" - slot_key: "7142" - slot_key: "7143" - slot_key: "7144" - slot_key: "7145" - slot_key: "7146" - slot_key: "7147" - slot_key: "7148" - slot_key: "7150" - slot_key: "7151" - slot_key: "7152" - slot_key: "7153" - slot_key: "7154" - slot_key: "7155" - slot_key: "7156" - slot_key: "7157" - slot_key: "7047" - slot_key: "7050" - slot_key: "6253" - slot_key: "6254" - slot_key: "6255" - slot_key: "6256" - slot_key: "6257" - slot_key: "6259" - slot_key: "6260" - slot_key: "6261" - slot_key: "7170" - slot_key: "7185" - slot_key: "7186" - slot_key: "6751" - slot_key: "6755" - slot_key: "6757" - slot_key: "6759" - slot_key: "6760" - slot_key: "6763" - slot_key: "6764" - slot_key: "6765" - slot_key: "6766" - slot_key: "6767" - slot_key: "6768" - slot_key: "6769" - slot_key: "6770" - slot_key: "7502" - slot_key: "7503" - slot_key: "7504" - slot_key: "7505" - slot_key: "7510" - slot_key: "7511" - slot_key: "7512" - slot_key: "7513" - slot_key: "6806" - slot_key: "6807" - slot_key: "6808" - slot_key: "6809" - slot_key: "6810" - slot_key: "6811" - slot_key: "6812" - slot_key: "6813" - slot_key: "6815" - slot_key: "6816" - slot_key: "6817" - slot_key: "6819" - slot_key: "6823" - slot_key: "6828" - slot_key: "6831" - slot_key: "6840" - slot_key: "6845" - slot_key: "6875" - slot_key: "6879" - slot_key: "6881" - slot_key: "6888" - slot_key: "6889" - slot_key: "6947" - slot_key: "6950" - slot_key: "6956" - slot_key: "6957" - slot_key: "6959" - slot_key: "10006" - slot_key: "10008" - slot_key: "10009" - slot_key: "10010" - slot_key: "10011" - slot_key: "10016" - slot_key: "10017" - slot_key: "10018" - slot_key: "10019" - slot_key: "10020" - slot_key: "10021" - slot_key: "10022" - slot_key: "10023" - slot_key: "10024" - slot_key: "10029" - slot_key: "10030" - slot_key: "10031" - slot_key: "10032" - slot_key: "10033" - slot_key: "10034" - slot_key: "10035" - slot_key: "10036" - slot_key: "10037" - slot_key: "10038" - slot_key: "10039" - slot_key: "10040" - slot_key: "10041" - slot_key: "10042" - slot_key: "10044" - slot_key: "10045" - slot_key: "10046" - slot_key: "10051" - slot_key: "10052" - slot_key: "10053" - slot_key: "10054" - slot_key: "10055" - slot_key: "10056" - slot_key: "10057" - slot_key: "10060" - slot_key: "10066" - slot_key: "10069" - slot_key: "6820" - slot_key: "6821" - slot_key: "6822" - slot_key: "13333" - slot_key: "13334" - slot_key: "13335" - slot_key: "13336" - slot_key: "13337" - slot_key: "13338" - slot_key: "13339" - slot_key: "13340" - slot_key: "13341" - slot_key: "13351" - slot_key: "13352" - slot_key: "13353" - slot_key: "13359" - slot_key: "13361" - slot_key: "13362" - slot_key: "13363" - slot_key: "13366" - slot_key: "13367" - slot_key: "13368" - slot_key: "13369" - slot_key: "13370" - slot_key: "13371" - slot_key: "13375" - slot_key: "13376" - slot_key: "5700" - slot_key: "5702" - slot_key: "13400" - slot_key: "13401" - slot_key: "13402" - slot_key: "13403" - slot_key: "13404" - slot_key: "13406" - slot_key: "13407" - slot_key: "13408" - slot_key: "13410" - slot_key: "13417" - slot_key: "13418" - slot_key: "13419" - slot_key: "13420" - slot_key: "13422" - slot_key: "13425" - slot_key: "13427" - slot_key: "13428" - slot_key: "13429" - slot_key: "13430" - slot_key: "13431" - slot_key: "13433" - slot_key: "13434" - slot_key: "13436" - slot_key: "13437" - slot_key: "13326" - slot_key: "13330" - slot_key: "13331" - slot_key: "5717" - slot_key: "13442" - slot_key: "13451" - slot_key: "13452" - slot_key: "13455" - slot_key: "13456" - slot_key: "13457" - slot_key: "13458" - slot_key: "13459" - slot_key: "13460" - slot_key: "13461" - slot_key: "13462" - slot_key: "13463" - slot_key: "13464" - slot_key: "13465" - slot_key: "13466" - slot_key: "13467" - slot_key: "13468" - slot_key: "1104" - slot_key: "1106" - slot_key: "1107" - slot_key: "1108" - slot_key: "1109" - slot_key: "1110" - slot_key: "1111" - slot_key: "1112" - slot_key: "1113" - slot_key: "1114" - slot_key: "1115" - slot_key: "1116" - slot_key: "1117" - slot_key: "1119" - slot_key: "1120" - slot_key: "1121" - slot_key: "1122" - slot_key: "1123" - slot_key: "1124" - slot_key: "1125" - slot_key: "1126" - slot_key: "1127" - slot_key: "1128" - slot_key: "1129" - slot_key: "13812" - slot_key: "13813" - slot_key: "6740" - slot_key: "1490" - slot_key: "1491" - slot_value: "embedding_0.tmp_0" - slot_value: "embedding_1.tmp_0" - slot_value: "embedding_2.tmp_0" - slot_value: "embedding_3.tmp_0" - slot_value: "embedding_4.tmp_0" - slot_value: "embedding_5.tmp_0" - slot_value: "embedding_6.tmp_0" - slot_value: "embedding_7.tmp_0" - slot_value: "embedding_8.tmp_0" - slot_value: "embedding_9.tmp_0" - slot_value: "embedding_10.tmp_0" - slot_value: "embedding_11.tmp_0" - slot_value: "embedding_12.tmp_0" - slot_value: "embedding_13.tmp_0" - slot_value: "embedding_14.tmp_0" - slot_value: "embedding_15.tmp_0" - slot_value: "embedding_16.tmp_0" - slot_value: "embedding_17.tmp_0" - slot_value: "embedding_18.tmp_0" - slot_value: "embedding_19.tmp_0" - slot_value: "embedding_20.tmp_0" - slot_value: "embedding_21.tmp_0" - slot_value: "embedding_22.tmp_0" - slot_value: "embedding_23.tmp_0" - slot_value: "embedding_24.tmp_0" - slot_value: "embedding_25.tmp_0" - slot_value: "embedding_26.tmp_0" - slot_value: "embedding_27.tmp_0" - slot_value: "embedding_28.tmp_0" - slot_value: "embedding_29.tmp_0" - slot_value: "embedding_30.tmp_0" - slot_value: "embedding_31.tmp_0" - slot_value: "embedding_32.tmp_0" - slot_value: "embedding_33.tmp_0" - slot_value: "embedding_34.tmp_0" - slot_value: "embedding_35.tmp_0" - slot_value: "embedding_36.tmp_0" - slot_value: "embedding_37.tmp_0" - slot_value: "embedding_38.tmp_0" - slot_value: "embedding_39.tmp_0" - slot_value: "embedding_40.tmp_0" - slot_value: "embedding_41.tmp_0" - slot_value: "embedding_42.tmp_0" - slot_value: "embedding_43.tmp_0" - slot_value: "embedding_44.tmp_0" - slot_value: "embedding_45.tmp_0" - slot_value: "embedding_46.tmp_0" - slot_value: "embedding_47.tmp_0" - slot_value: "embedding_48.tmp_0" - slot_value: "embedding_49.tmp_0" - slot_value: "embedding_50.tmp_0" - slot_value: "embedding_51.tmp_0" - slot_value: "embedding_52.tmp_0" - slot_value: "embedding_53.tmp_0" - slot_value: "embedding_54.tmp_0" - slot_value: "embedding_55.tmp_0" - slot_value: "embedding_56.tmp_0" - slot_value: "embedding_57.tmp_0" - slot_value: "embedding_58.tmp_0" - slot_value: "embedding_59.tmp_0" - slot_value: "embedding_60.tmp_0" - slot_value: "embedding_61.tmp_0" - slot_value: "embedding_62.tmp_0" - slot_value: "embedding_63.tmp_0" - slot_value: "embedding_64.tmp_0" - slot_value: "embedding_65.tmp_0" - slot_value: "embedding_66.tmp_0" - slot_value: "embedding_67.tmp_0" - slot_value: "embedding_68.tmp_0" - slot_value: "embedding_69.tmp_0" - slot_value: "embedding_70.tmp_0" - slot_value: "embedding_71.tmp_0" - slot_value: "embedding_72.tmp_0" - slot_value: "embedding_73.tmp_0" - slot_value: "embedding_74.tmp_0" - slot_value: "embedding_75.tmp_0" - slot_value: "embedding_76.tmp_0" - slot_value: "embedding_77.tmp_0" - slot_value: "embedding_78.tmp_0" - slot_value: "embedding_79.tmp_0" - slot_value: "embedding_80.tmp_0" - slot_value: "embedding_81.tmp_0" - slot_value: "embedding_82.tmp_0" - slot_value: "embedding_83.tmp_0" - slot_value: "embedding_84.tmp_0" - slot_value: "embedding_85.tmp_0" - slot_value: "embedding_86.tmp_0" - slot_value: "embedding_87.tmp_0" - slot_value: "embedding_88.tmp_0" - slot_value: "embedding_89.tmp_0" - slot_value: "embedding_90.tmp_0" - slot_value: "embedding_91.tmp_0" - slot_value: "embedding_92.tmp_0" - slot_value: "embedding_93.tmp_0" - slot_value: "embedding_94.tmp_0" - slot_value: "embedding_95.tmp_0" - slot_value: "embedding_96.tmp_0" - slot_value: "embedding_97.tmp_0" - slot_value: "embedding_98.tmp_0" - slot_value: "embedding_99.tmp_0" - slot_value: "embedding_100.tmp_0" - slot_value: "embedding_101.tmp_0" - slot_value: "embedding_102.tmp_0" - slot_value: "embedding_103.tmp_0" - slot_value: "embedding_104.tmp_0" - slot_value: "embedding_105.tmp_0" - slot_value: "embedding_106.tmp_0" - slot_value: "embedding_107.tmp_0" - slot_value: "embedding_108.tmp_0" - slot_value: "embedding_109.tmp_0" - slot_value: "embedding_110.tmp_0" - slot_value: "embedding_111.tmp_0" - slot_value: "embedding_112.tmp_0" - slot_value: "embedding_113.tmp_0" - slot_value: "embedding_114.tmp_0" - slot_value: "embedding_115.tmp_0" - slot_value: "embedding_116.tmp_0" - slot_value: "embedding_117.tmp_0" - slot_value: "embedding_118.tmp_0" - slot_value: "embedding_119.tmp_0" - slot_value: "embedding_120.tmp_0" - slot_value: "embedding_121.tmp_0" - slot_value: "embedding_122.tmp_0" - slot_value: "embedding_123.tmp_0" - slot_value: "embedding_124.tmp_0" - slot_value: "embedding_125.tmp_0" - slot_value: "embedding_126.tmp_0" - slot_value: "embedding_127.tmp_0" - slot_value: "embedding_128.tmp_0" - slot_value: "embedding_129.tmp_0" - slot_value: "embedding_130.tmp_0" - slot_value: "embedding_131.tmp_0" - slot_value: "embedding_132.tmp_0" - slot_value: "embedding_133.tmp_0" - slot_value: "embedding_134.tmp_0" - slot_value: "embedding_135.tmp_0" - slot_value: "embedding_136.tmp_0" - slot_value: "embedding_137.tmp_0" - slot_value: "embedding_138.tmp_0" - slot_value: "embedding_139.tmp_0" - slot_value: "embedding_140.tmp_0" - slot_value: "embedding_141.tmp_0" - slot_value: "embedding_142.tmp_0" - slot_value: "embedding_143.tmp_0" - slot_value: "embedding_144.tmp_0" - slot_value: "embedding_145.tmp_0" - slot_value: "embedding_146.tmp_0" - slot_value: "embedding_147.tmp_0" - slot_value: "embedding_148.tmp_0" - slot_value: "embedding_149.tmp_0" - slot_value: "embedding_150.tmp_0" - slot_value: "embedding_151.tmp_0" - slot_value: "embedding_152.tmp_0" - slot_value: "embedding_153.tmp_0" - slot_value: "embedding_154.tmp_0" - slot_value: "embedding_155.tmp_0" - slot_value: "embedding_156.tmp_0" - slot_value: "embedding_157.tmp_0" - slot_value: "embedding_158.tmp_0" - slot_value: "embedding_159.tmp_0" - slot_value: "embedding_160.tmp_0" - slot_value: "embedding_161.tmp_0" - slot_value: "embedding_162.tmp_0" - slot_value: "embedding_163.tmp_0" - slot_value: "embedding_164.tmp_0" - slot_value: "embedding_165.tmp_0" - slot_value: "embedding_166.tmp_0" - slot_value: "embedding_167.tmp_0" - slot_value: "embedding_168.tmp_0" - slot_value: "embedding_169.tmp_0" - slot_value: "embedding_170.tmp_0" - slot_value: "embedding_171.tmp_0" - slot_value: "embedding_172.tmp_0" - slot_value: "embedding_173.tmp_0" - slot_value: "embedding_174.tmp_0" - slot_value: "embedding_175.tmp_0" - slot_value: "embedding_176.tmp_0" - slot_value: "embedding_177.tmp_0" - slot_value: "embedding_178.tmp_0" - slot_value: "embedding_179.tmp_0" - slot_value: "embedding_180.tmp_0" - slot_value: "embedding_181.tmp_0" - slot_value: "embedding_182.tmp_0" - slot_value: "embedding_183.tmp_0" - slot_value: "embedding_184.tmp_0" - slot_value: "embedding_185.tmp_0" - slot_value: "embedding_186.tmp_0" - slot_value: "embedding_187.tmp_0" - slot_value: "embedding_188.tmp_0" - slot_value: "embedding_189.tmp_0" - slot_value: "embedding_190.tmp_0" - slot_value: "embedding_191.tmp_0" - slot_value: "embedding_192.tmp_0" - slot_value: "embedding_193.tmp_0" - slot_value: "embedding_194.tmp_0" - slot_value: "embedding_195.tmp_0" - slot_value: "embedding_196.tmp_0" - slot_value: "embedding_197.tmp_0" - slot_value: "embedding_198.tmp_0" - slot_value: "embedding_199.tmp_0" - slot_value: "embedding_200.tmp_0" - slot_value: "embedding_201.tmp_0" - slot_value: "embedding_202.tmp_0" - slot_value: "embedding_203.tmp_0" - slot_value: "embedding_204.tmp_0" - slot_value: "embedding_205.tmp_0" - slot_value: "embedding_206.tmp_0" - slot_value: "embedding_207.tmp_0" - slot_value: "embedding_208.tmp_0" - slot_value: "embedding_209.tmp_0" - slot_value: "embedding_210.tmp_0" - slot_value: "embedding_211.tmp_0" - slot_value: "embedding_212.tmp_0" - slot_value: "embedding_213.tmp_0" - slot_value: "embedding_214.tmp_0" - slot_value: "embedding_215.tmp_0" - slot_value: "embedding_216.tmp_0" - slot_value: "embedding_217.tmp_0" - slot_value: "embedding_218.tmp_0" - slot_value: "embedding_219.tmp_0" - slot_value: "embedding_220.tmp_0" - slot_value: "embedding_221.tmp_0" - slot_value: "embedding_222.tmp_0" - slot_value: "embedding_223.tmp_0" - slot_value: "embedding_224.tmp_0" - slot_value: "embedding_225.tmp_0" - slot_value: "embedding_226.tmp_0" - slot_value: "embedding_227.tmp_0" - slot_value: "embedding_228.tmp_0" - slot_value: "embedding_229.tmp_0" - slot_value: "embedding_230.tmp_0" - slot_value: "embedding_231.tmp_0" - slot_value: "embedding_232.tmp_0" - slot_value: "embedding_233.tmp_0" - slot_value: "embedding_234.tmp_0" - slot_value: "embedding_235.tmp_0" - slot_value: "embedding_236.tmp_0" - slot_value: "embedding_237.tmp_0" - slot_value: "embedding_238.tmp_0" - slot_value: "embedding_239.tmp_0" - slot_value: "embedding_240.tmp_0" - slot_value: "embedding_241.tmp_0" - slot_value: "embedding_242.tmp_0" - slot_value: "embedding_243.tmp_0" - slot_value: "embedding_244.tmp_0" - slot_value: "embedding_245.tmp_0" - slot_value: "embedding_246.tmp_0" - slot_value: "embedding_247.tmp_0" - slot_value: "embedding_248.tmp_0" - slot_value: "embedding_249.tmp_0" - slot_value: "embedding_250.tmp_0" - slot_value: "embedding_251.tmp_0" - slot_value: "embedding_252.tmp_0" - slot_value: "embedding_253.tmp_0" - slot_value: "embedding_254.tmp_0" - slot_value: "embedding_255.tmp_0" - slot_value: "embedding_256.tmp_0" - slot_value: "embedding_257.tmp_0" - slot_value: "embedding_258.tmp_0" - slot_value: "embedding_259.tmp_0" - slot_value: "embedding_260.tmp_0" - slot_value: "embedding_261.tmp_0" - slot_value: "embedding_262.tmp_0" - slot_value: "embedding_263.tmp_0" - slot_value: "embedding_264.tmp_0" - slot_value: "embedding_265.tmp_0" - slot_value: "embedding_266.tmp_0" - slot_value: "embedding_267.tmp_0" - slot_value: "embedding_268.tmp_0" - slot_value: "embedding_269.tmp_0" - slot_value: "embedding_270.tmp_0" - slot_value: "embedding_271.tmp_0" - slot_value: "embedding_272.tmp_0" - slot_value: "embedding_273.tmp_0" - slot_value: "embedding_274.tmp_0" - slot_value: "embedding_275.tmp_0" - slot_value: "embedding_276.tmp_0" - slot_value: "embedding_277.tmp_0" - slot_value: "embedding_278.tmp_0" - slot_value: "embedding_279.tmp_0" - slot_value: "embedding_280.tmp_0" - slot_value: "embedding_281.tmp_0" - slot_value: "embedding_282.tmp_0" - slot_value: "embedding_283.tmp_0" - slot_value: "embedding_284.tmp_0" - slot_value: "embedding_285.tmp_0" - slot_value: "embedding_286.tmp_0" - slot_value: "embedding_287.tmp_0" - slot_value: "embedding_288.tmp_0" - slot_value: "embedding_289.tmp_0" - slot_value: "embedding_290.tmp_0" - slot_value: "embedding_291.tmp_0" - slot_value: "embedding_292.tmp_0" - slot_value: "embedding_293.tmp_0" - slot_value: "embedding_294.tmp_0" - slot_value: "embedding_295.tmp_0" - slot_value: "embedding_296.tmp_0" - slot_value: "embedding_297.tmp_0" - slot_value: "embedding_298.tmp_0" - slot_value: "embedding_299.tmp_0" - slot_value: "embedding_300.tmp_0" - slot_value: "embedding_301.tmp_0" - slot_value: "embedding_302.tmp_0" - slot_value: "embedding_303.tmp_0" - slot_value: "embedding_304.tmp_0" - slot_value: "embedding_305.tmp_0" - slot_value: "embedding_306.tmp_0" - slot_value: "embedding_307.tmp_0" - slot_value: "embedding_308.tmp_0" - slot_value: "embedding_309.tmp_0" - slot_value: "embedding_310.tmp_0" - slot_value: "embedding_311.tmp_0" - slot_value: "embedding_312.tmp_0" - slot_value: "embedding_313.tmp_0" - slot_value: "embedding_314.tmp_0" - slot_value: "embedding_315.tmp_0" - slot_value: "embedding_316.tmp_0" - slot_value: "embedding_317.tmp_0" - slot_value: "embedding_318.tmp_0" - slot_value: "embedding_319.tmp_0" - slot_value: "embedding_320.tmp_0" - slot_value: "embedding_321.tmp_0" - slot_value: "embedding_322.tmp_0" - slot_value: "embedding_323.tmp_0" - slot_value: "embedding_324.tmp_0" - slot_value: "embedding_325.tmp_0" - slot_value: "embedding_326.tmp_0" - slot_value: "embedding_327.tmp_0" - slot_value: "embedding_328.tmp_0" - slot_value: "embedding_329.tmp_0" - slot_value: "embedding_330.tmp_0" - slot_value: "embedding_331.tmp_0" - slot_value: "embedding_332.tmp_0" - slot_value: "embedding_333.tmp_0" - slot_value: "embedding_334.tmp_0" - slot_value: "embedding_335.tmp_0" - slot_value: "embedding_336.tmp_0" - slot_value: "embedding_337.tmp_0" - slot_value: "embedding_338.tmp_0" - slot_value: "embedding_339.tmp_0" - slot_value: "embedding_340.tmp_0" - slot_value: "embedding_341.tmp_0" - slot_value: "embedding_342.tmp_0" - slot_value: "embedding_343.tmp_0" - slot_value: "embedding_344.tmp_0" - slot_value: "embedding_345.tmp_0" - slot_value: "embedding_346.tmp_0" - slot_value: "embedding_347.tmp_0" - slot_value: "embedding_348.tmp_0" - slot_value: "embedding_349.tmp_0" - slot_value: "embedding_350.tmp_0" - slot_value: "embedding_351.tmp_0" - slot_value: "embedding_352.tmp_0" - slot_value: "embedding_353.tmp_0" - slot_value: "embedding_354.tmp_0" - slot_value: "embedding_355.tmp_0" - slot_value: "embedding_356.tmp_0" - slot_value: "embedding_357.tmp_0" - slot_value: "embedding_358.tmp_0" - slot_value: "embedding_359.tmp_0" - slot_value: "embedding_360.tmp_0" - slot_value: "embedding_361.tmp_0" - slot_value: "embedding_362.tmp_0" - slot_value: "embedding_363.tmp_0" - slot_value: "embedding_364.tmp_0" - slot_value: "embedding_365.tmp_0" - slot_value: "embedding_366.tmp_0" - slot_value: "embedding_367.tmp_0" - slot_value: "embedding_368.tmp_0" - slot_value: "embedding_369.tmp_0" - slot_value: "embedding_370.tmp_0" - slot_value: "embedding_371.tmp_0" - slot_value: "embedding_372.tmp_0" - slot_value: "embedding_373.tmp_0" - slot_value: "embedding_374.tmp_0" - slot_value: "embedding_375.tmp_0" - slot_value: "embedding_376.tmp_0" - slot_value: "embedding_377.tmp_0" - slot_value: "embedding_378.tmp_0" - slot_value: "embedding_379.tmp_0" - slot_value: "embedding_380.tmp_0" - slot_value: "embedding_381.tmp_0" - slot_value: "embedding_382.tmp_0" - slot_value: "embedding_383.tmp_0" - slot_value: "embedding_384.tmp_0" - slot_value: "embedding_385.tmp_0" - slot_value: "embedding_386.tmp_0" - slot_value: "embedding_387.tmp_0" - slot_value: "embedding_388.tmp_0" - slot_value: "embedding_389.tmp_0" - slot_value: "embedding_390.tmp_0" - slot_value: "embedding_391.tmp_0" - slot_value: "embedding_392.tmp_0" - slot_value: "embedding_393.tmp_0" - slot_value: "embedding_394.tmp_0" - slot_value: "embedding_395.tmp_0" - slot_value: "embedding_396.tmp_0" - slot_value: "embedding_397.tmp_0" - slot_value: "embedding_398.tmp_0" - slot_value: "embedding_399.tmp_0" - slot_value: "embedding_400.tmp_0" - slot_value: "embedding_401.tmp_0" - slot_value: "embedding_402.tmp_0" - slot_value: "embedding_403.tmp_0" - slot_value: "embedding_404.tmp_0" - slot_value: "embedding_405.tmp_0" - slot_value: "embedding_406.tmp_0" - slot_value: "embedding_407.tmp_0" - slot_gradient: "embedding_0.tmp_0@GRAD" - slot_gradient: "embedding_1.tmp_0@GRAD" - slot_gradient: "embedding_2.tmp_0@GRAD" - slot_gradient: "embedding_3.tmp_0@GRAD" - slot_gradient: "embedding_4.tmp_0@GRAD" - slot_gradient: "embedding_5.tmp_0@GRAD" - slot_gradient: "embedding_6.tmp_0@GRAD" - slot_gradient: "embedding_7.tmp_0@GRAD" - slot_gradient: "embedding_8.tmp_0@GRAD" - slot_gradient: "embedding_9.tmp_0@GRAD" - slot_gradient: "embedding_10.tmp_0@GRAD" - slot_gradient: "embedding_11.tmp_0@GRAD" - slot_gradient: "embedding_12.tmp_0@GRAD" - slot_gradient: "embedding_13.tmp_0@GRAD" - slot_gradient: "embedding_14.tmp_0@GRAD" - slot_gradient: "embedding_15.tmp_0@GRAD" - slot_gradient: "embedding_16.tmp_0@GRAD" - slot_gradient: "embedding_17.tmp_0@GRAD" - slot_gradient: "embedding_18.tmp_0@GRAD" - slot_gradient: "embedding_19.tmp_0@GRAD" - slot_gradient: "embedding_20.tmp_0@GRAD" - slot_gradient: "embedding_21.tmp_0@GRAD" - slot_gradient: "embedding_22.tmp_0@GRAD" - slot_gradient: "embedding_23.tmp_0@GRAD" - slot_gradient: "embedding_24.tmp_0@GRAD" - slot_gradient: "embedding_25.tmp_0@GRAD" - slot_gradient: "embedding_26.tmp_0@GRAD" - slot_gradient: "embedding_27.tmp_0@GRAD" - slot_gradient: "embedding_28.tmp_0@GRAD" - slot_gradient: "embedding_29.tmp_0@GRAD" - slot_gradient: "embedding_30.tmp_0@GRAD" - slot_gradient: "embedding_31.tmp_0@GRAD" - slot_gradient: "embedding_32.tmp_0@GRAD" - slot_gradient: "embedding_33.tmp_0@GRAD" - slot_gradient: "embedding_34.tmp_0@GRAD" - slot_gradient: "embedding_35.tmp_0@GRAD" - slot_gradient: "embedding_36.tmp_0@GRAD" - slot_gradient: "embedding_37.tmp_0@GRAD" - slot_gradient: "embedding_38.tmp_0@GRAD" - slot_gradient: "embedding_39.tmp_0@GRAD" - slot_gradient: "embedding_40.tmp_0@GRAD" - slot_gradient: "embedding_41.tmp_0@GRAD" - slot_gradient: "embedding_42.tmp_0@GRAD" - slot_gradient: "embedding_43.tmp_0@GRAD" - slot_gradient: "embedding_44.tmp_0@GRAD" - slot_gradient: "embedding_45.tmp_0@GRAD" - slot_gradient: "embedding_46.tmp_0@GRAD" - slot_gradient: "embedding_47.tmp_0@GRAD" - slot_gradient: "embedding_48.tmp_0@GRAD" - slot_gradient: "embedding_49.tmp_0@GRAD" - slot_gradient: "embedding_50.tmp_0@GRAD" - slot_gradient: "embedding_51.tmp_0@GRAD" - slot_gradient: "embedding_52.tmp_0@GRAD" - slot_gradient: "embedding_53.tmp_0@GRAD" - slot_gradient: "embedding_54.tmp_0@GRAD" - slot_gradient: "embedding_55.tmp_0@GRAD" - slot_gradient: "embedding_56.tmp_0@GRAD" - slot_gradient: "embedding_57.tmp_0@GRAD" - slot_gradient: "embedding_58.tmp_0@GRAD" - slot_gradient: "embedding_59.tmp_0@GRAD" - slot_gradient: "embedding_60.tmp_0@GRAD" - slot_gradient: "embedding_61.tmp_0@GRAD" - slot_gradient: "embedding_62.tmp_0@GRAD" - slot_gradient: "embedding_63.tmp_0@GRAD" - slot_gradient: "embedding_64.tmp_0@GRAD" - slot_gradient: "embedding_65.tmp_0@GRAD" - slot_gradient: "embedding_66.tmp_0@GRAD" - slot_gradient: "embedding_67.tmp_0@GRAD" - slot_gradient: "embedding_68.tmp_0@GRAD" - slot_gradient: "embedding_69.tmp_0@GRAD" - slot_gradient: "embedding_70.tmp_0@GRAD" - slot_gradient: "embedding_71.tmp_0@GRAD" - slot_gradient: "embedding_72.tmp_0@GRAD" - slot_gradient: "embedding_73.tmp_0@GRAD" - slot_gradient: "embedding_74.tmp_0@GRAD" - slot_gradient: "embedding_75.tmp_0@GRAD" - slot_gradient: "embedding_76.tmp_0@GRAD" - slot_gradient: "embedding_77.tmp_0@GRAD" - slot_gradient: "embedding_78.tmp_0@GRAD" - slot_gradient: "embedding_79.tmp_0@GRAD" - slot_gradient: "embedding_80.tmp_0@GRAD" - slot_gradient: "embedding_81.tmp_0@GRAD" - slot_gradient: "embedding_82.tmp_0@GRAD" - slot_gradient: "embedding_83.tmp_0@GRAD" - slot_gradient: "embedding_84.tmp_0@GRAD" - slot_gradient: "embedding_85.tmp_0@GRAD" - slot_gradient: "embedding_86.tmp_0@GRAD" - slot_gradient: "embedding_87.tmp_0@GRAD" - slot_gradient: "embedding_88.tmp_0@GRAD" - slot_gradient: "embedding_89.tmp_0@GRAD" - slot_gradient: "embedding_90.tmp_0@GRAD" - slot_gradient: "embedding_91.tmp_0@GRAD" - slot_gradient: "embedding_92.tmp_0@GRAD" - slot_gradient: "embedding_93.tmp_0@GRAD" - slot_gradient: "embedding_94.tmp_0@GRAD" - slot_gradient: "embedding_95.tmp_0@GRAD" - slot_gradient: "embedding_96.tmp_0@GRAD" - slot_gradient: "embedding_97.tmp_0@GRAD" - slot_gradient: "embedding_98.tmp_0@GRAD" - slot_gradient: "embedding_99.tmp_0@GRAD" - slot_gradient: "embedding_100.tmp_0@GRAD" - slot_gradient: "embedding_101.tmp_0@GRAD" - slot_gradient: "embedding_102.tmp_0@GRAD" - slot_gradient: "embedding_103.tmp_0@GRAD" - slot_gradient: "embedding_104.tmp_0@GRAD" - slot_gradient: "embedding_105.tmp_0@GRAD" - slot_gradient: "embedding_106.tmp_0@GRAD" - slot_gradient: "embedding_107.tmp_0@GRAD" - slot_gradient: "embedding_108.tmp_0@GRAD" - slot_gradient: "embedding_109.tmp_0@GRAD" - slot_gradient: "embedding_110.tmp_0@GRAD" - slot_gradient: "embedding_111.tmp_0@GRAD" - slot_gradient: "embedding_112.tmp_0@GRAD" - slot_gradient: "embedding_113.tmp_0@GRAD" - slot_gradient: "embedding_114.tmp_0@GRAD" - slot_gradient: "embedding_115.tmp_0@GRAD" - slot_gradient: "embedding_116.tmp_0@GRAD" - slot_gradient: "embedding_117.tmp_0@GRAD" - slot_gradient: "embedding_118.tmp_0@GRAD" - slot_gradient: "embedding_119.tmp_0@GRAD" - slot_gradient: "embedding_120.tmp_0@GRAD" - slot_gradient: "embedding_121.tmp_0@GRAD" - slot_gradient: "embedding_122.tmp_0@GRAD" - slot_gradient: "embedding_123.tmp_0@GRAD" - slot_gradient: "embedding_124.tmp_0@GRAD" - slot_gradient: "embedding_125.tmp_0@GRAD" - slot_gradient: "embedding_126.tmp_0@GRAD" - slot_gradient: "embedding_127.tmp_0@GRAD" - slot_gradient: "embedding_128.tmp_0@GRAD" - slot_gradient: "embedding_129.tmp_0@GRAD" - slot_gradient: "embedding_130.tmp_0@GRAD" - slot_gradient: "embedding_131.tmp_0@GRAD" - slot_gradient: "embedding_132.tmp_0@GRAD" - slot_gradient: "embedding_133.tmp_0@GRAD" - slot_gradient: "embedding_134.tmp_0@GRAD" - slot_gradient: "embedding_135.tmp_0@GRAD" - slot_gradient: "embedding_136.tmp_0@GRAD" - slot_gradient: "embedding_137.tmp_0@GRAD" - slot_gradient: "embedding_138.tmp_0@GRAD" - slot_gradient: "embedding_139.tmp_0@GRAD" - slot_gradient: "embedding_140.tmp_0@GRAD" - slot_gradient: "embedding_141.tmp_0@GRAD" - slot_gradient: "embedding_142.tmp_0@GRAD" - slot_gradient: "embedding_143.tmp_0@GRAD" - slot_gradient: "embedding_144.tmp_0@GRAD" - slot_gradient: "embedding_145.tmp_0@GRAD" - slot_gradient: "embedding_146.tmp_0@GRAD" - slot_gradient: "embedding_147.tmp_0@GRAD" - slot_gradient: "embedding_148.tmp_0@GRAD" - slot_gradient: "embedding_149.tmp_0@GRAD" - slot_gradient: "embedding_150.tmp_0@GRAD" - slot_gradient: "embedding_151.tmp_0@GRAD" - slot_gradient: "embedding_152.tmp_0@GRAD" - slot_gradient: "embedding_153.tmp_0@GRAD" - slot_gradient: "embedding_154.tmp_0@GRAD" - slot_gradient: "embedding_155.tmp_0@GRAD" - slot_gradient: "embedding_156.tmp_0@GRAD" - slot_gradient: "embedding_157.tmp_0@GRAD" - slot_gradient: "embedding_158.tmp_0@GRAD" - slot_gradient: "embedding_159.tmp_0@GRAD" - slot_gradient: "embedding_160.tmp_0@GRAD" - slot_gradient: "embedding_161.tmp_0@GRAD" - slot_gradient: "embedding_162.tmp_0@GRAD" - slot_gradient: "embedding_163.tmp_0@GRAD" - slot_gradient: "embedding_164.tmp_0@GRAD" - slot_gradient: "embedding_165.tmp_0@GRAD" - slot_gradient: "embedding_166.tmp_0@GRAD" - slot_gradient: "embedding_167.tmp_0@GRAD" - slot_gradient: "embedding_168.tmp_0@GRAD" - slot_gradient: "embedding_169.tmp_0@GRAD" - slot_gradient: "embedding_170.tmp_0@GRAD" - slot_gradient: "embedding_171.tmp_0@GRAD" - slot_gradient: "embedding_172.tmp_0@GRAD" - slot_gradient: "embedding_173.tmp_0@GRAD" - slot_gradient: "embedding_174.tmp_0@GRAD" - slot_gradient: "embedding_175.tmp_0@GRAD" - slot_gradient: "embedding_176.tmp_0@GRAD" - slot_gradient: "embedding_177.tmp_0@GRAD" - slot_gradient: "embedding_178.tmp_0@GRAD" - slot_gradient: "embedding_179.tmp_0@GRAD" - slot_gradient: "embedding_180.tmp_0@GRAD" - slot_gradient: "embedding_181.tmp_0@GRAD" - slot_gradient: "embedding_182.tmp_0@GRAD" - slot_gradient: "embedding_183.tmp_0@GRAD" - slot_gradient: "embedding_184.tmp_0@GRAD" - slot_gradient: "embedding_185.tmp_0@GRAD" - slot_gradient: "embedding_186.tmp_0@GRAD" - slot_gradient: "embedding_187.tmp_0@GRAD" - slot_gradient: "embedding_188.tmp_0@GRAD" - slot_gradient: "embedding_189.tmp_0@GRAD" - slot_gradient: "embedding_190.tmp_0@GRAD" - slot_gradient: "embedding_191.tmp_0@GRAD" - slot_gradient: "embedding_192.tmp_0@GRAD" - slot_gradient: "embedding_193.tmp_0@GRAD" - slot_gradient: "embedding_194.tmp_0@GRAD" - slot_gradient: "embedding_195.tmp_0@GRAD" - slot_gradient: "embedding_196.tmp_0@GRAD" - slot_gradient: "embedding_197.tmp_0@GRAD" - slot_gradient: "embedding_198.tmp_0@GRAD" - slot_gradient: "embedding_199.tmp_0@GRAD" - slot_gradient: "embedding_200.tmp_0@GRAD" - slot_gradient: "embedding_201.tmp_0@GRAD" - slot_gradient: "embedding_202.tmp_0@GRAD" - slot_gradient: "embedding_203.tmp_0@GRAD" - slot_gradient: "embedding_204.tmp_0@GRAD" - slot_gradient: "embedding_205.tmp_0@GRAD" - slot_gradient: "embedding_206.tmp_0@GRAD" - slot_gradient: "embedding_207.tmp_0@GRAD" - slot_gradient: "embedding_208.tmp_0@GRAD" - slot_gradient: "embedding_209.tmp_0@GRAD" - slot_gradient: "embedding_210.tmp_0@GRAD" - slot_gradient: "embedding_211.tmp_0@GRAD" - slot_gradient: "embedding_212.tmp_0@GRAD" - slot_gradient: "embedding_213.tmp_0@GRAD" - slot_gradient: "embedding_214.tmp_0@GRAD" - slot_gradient: "embedding_215.tmp_0@GRAD" - slot_gradient: "embedding_216.tmp_0@GRAD" - slot_gradient: "embedding_217.tmp_0@GRAD" - slot_gradient: "embedding_218.tmp_0@GRAD" - slot_gradient: "embedding_219.tmp_0@GRAD" - slot_gradient: "embedding_220.tmp_0@GRAD" - slot_gradient: "embedding_221.tmp_0@GRAD" - slot_gradient: "embedding_222.tmp_0@GRAD" - slot_gradient: "embedding_223.tmp_0@GRAD" - slot_gradient: "embedding_224.tmp_0@GRAD" - slot_gradient: "embedding_225.tmp_0@GRAD" - slot_gradient: "embedding_226.tmp_0@GRAD" - slot_gradient: "embedding_227.tmp_0@GRAD" - slot_gradient: "embedding_228.tmp_0@GRAD" - slot_gradient: "embedding_229.tmp_0@GRAD" - slot_gradient: "embedding_230.tmp_0@GRAD" - slot_gradient: "embedding_231.tmp_0@GRAD" - slot_gradient: "embedding_232.tmp_0@GRAD" - slot_gradient: "embedding_233.tmp_0@GRAD" - slot_gradient: "embedding_234.tmp_0@GRAD" - slot_gradient: "embedding_235.tmp_0@GRAD" - slot_gradient: "embedding_236.tmp_0@GRAD" - slot_gradient: "embedding_237.tmp_0@GRAD" - slot_gradient: "embedding_238.tmp_0@GRAD" - slot_gradient: "embedding_239.tmp_0@GRAD" - slot_gradient: "embedding_240.tmp_0@GRAD" - slot_gradient: "embedding_241.tmp_0@GRAD" - slot_gradient: "embedding_242.tmp_0@GRAD" - slot_gradient: "embedding_243.tmp_0@GRAD" - slot_gradient: "embedding_244.tmp_0@GRAD" - slot_gradient: "embedding_245.tmp_0@GRAD" - slot_gradient: "embedding_246.tmp_0@GRAD" - slot_gradient: "embedding_247.tmp_0@GRAD" - slot_gradient: "embedding_248.tmp_0@GRAD" - slot_gradient: "embedding_249.tmp_0@GRAD" - slot_gradient: "embedding_250.tmp_0@GRAD" - slot_gradient: "embedding_251.tmp_0@GRAD" - slot_gradient: "embedding_252.tmp_0@GRAD" - slot_gradient: "embedding_253.tmp_0@GRAD" - slot_gradient: "embedding_254.tmp_0@GRAD" - slot_gradient: "embedding_255.tmp_0@GRAD" - slot_gradient: "embedding_256.tmp_0@GRAD" - slot_gradient: "embedding_257.tmp_0@GRAD" - slot_gradient: "embedding_258.tmp_0@GRAD" - slot_gradient: "embedding_259.tmp_0@GRAD" - slot_gradient: "embedding_260.tmp_0@GRAD" - slot_gradient: "embedding_261.tmp_0@GRAD" - slot_gradient: "embedding_262.tmp_0@GRAD" - slot_gradient: "embedding_263.tmp_0@GRAD" - slot_gradient: "embedding_264.tmp_0@GRAD" - slot_gradient: "embedding_265.tmp_0@GRAD" - slot_gradient: "embedding_266.tmp_0@GRAD" - slot_gradient: "embedding_267.tmp_0@GRAD" - slot_gradient: "embedding_268.tmp_0@GRAD" - slot_gradient: "embedding_269.tmp_0@GRAD" - slot_gradient: "embedding_270.tmp_0@GRAD" - slot_gradient: "embedding_271.tmp_0@GRAD" - slot_gradient: "embedding_272.tmp_0@GRAD" - slot_gradient: "embedding_273.tmp_0@GRAD" - slot_gradient: "embedding_274.tmp_0@GRAD" - slot_gradient: "embedding_275.tmp_0@GRAD" - slot_gradient: "embedding_276.tmp_0@GRAD" - slot_gradient: "embedding_277.tmp_0@GRAD" - slot_gradient: "embedding_278.tmp_0@GRAD" - slot_gradient: "embedding_279.tmp_0@GRAD" - slot_gradient: "embedding_280.tmp_0@GRAD" - slot_gradient: "embedding_281.tmp_0@GRAD" - slot_gradient: "embedding_282.tmp_0@GRAD" - slot_gradient: "embedding_283.tmp_0@GRAD" - slot_gradient: "embedding_284.tmp_0@GRAD" - slot_gradient: "embedding_285.tmp_0@GRAD" - slot_gradient: "embedding_286.tmp_0@GRAD" - slot_gradient: "embedding_287.tmp_0@GRAD" - slot_gradient: "embedding_288.tmp_0@GRAD" - slot_gradient: "embedding_289.tmp_0@GRAD" - slot_gradient: "embedding_290.tmp_0@GRAD" - slot_gradient: "embedding_291.tmp_0@GRAD" - slot_gradient: "embedding_292.tmp_0@GRAD" - slot_gradient: "embedding_293.tmp_0@GRAD" - slot_gradient: "embedding_294.tmp_0@GRAD" - slot_gradient: "embedding_295.tmp_0@GRAD" - slot_gradient: "embedding_296.tmp_0@GRAD" - slot_gradient: "embedding_297.tmp_0@GRAD" - slot_gradient: "embedding_298.tmp_0@GRAD" - slot_gradient: "embedding_299.tmp_0@GRAD" - slot_gradient: "embedding_300.tmp_0@GRAD" - slot_gradient: "embedding_301.tmp_0@GRAD" - slot_gradient: "embedding_302.tmp_0@GRAD" - slot_gradient: "embedding_303.tmp_0@GRAD" - slot_gradient: "embedding_304.tmp_0@GRAD" - slot_gradient: "embedding_305.tmp_0@GRAD" - slot_gradient: "embedding_306.tmp_0@GRAD" - slot_gradient: "embedding_307.tmp_0@GRAD" - slot_gradient: "embedding_308.tmp_0@GRAD" - slot_gradient: "embedding_309.tmp_0@GRAD" - slot_gradient: "embedding_310.tmp_0@GRAD" - slot_gradient: "embedding_311.tmp_0@GRAD" - slot_gradient: "embedding_312.tmp_0@GRAD" - slot_gradient: "embedding_313.tmp_0@GRAD" - slot_gradient: "embedding_314.tmp_0@GRAD" - slot_gradient: "embedding_315.tmp_0@GRAD" - slot_gradient: "embedding_316.tmp_0@GRAD" - slot_gradient: "embedding_317.tmp_0@GRAD" - slot_gradient: "embedding_318.tmp_0@GRAD" - slot_gradient: "embedding_319.tmp_0@GRAD" - slot_gradient: "embedding_320.tmp_0@GRAD" - slot_gradient: "embedding_321.tmp_0@GRAD" - slot_gradient: "embedding_322.tmp_0@GRAD" - slot_gradient: "embedding_323.tmp_0@GRAD" - slot_gradient: "embedding_324.tmp_0@GRAD" - slot_gradient: "embedding_325.tmp_0@GRAD" - slot_gradient: "embedding_326.tmp_0@GRAD" - slot_gradient: "embedding_327.tmp_0@GRAD" - slot_gradient: "embedding_328.tmp_0@GRAD" - slot_gradient: "embedding_329.tmp_0@GRAD" - slot_gradient: "embedding_330.tmp_0@GRAD" - slot_gradient: "embedding_331.tmp_0@GRAD" - slot_gradient: "embedding_332.tmp_0@GRAD" - slot_gradient: "embedding_333.tmp_0@GRAD" - slot_gradient: "embedding_334.tmp_0@GRAD" - slot_gradient: "embedding_335.tmp_0@GRAD" - slot_gradient: "embedding_336.tmp_0@GRAD" - slot_gradient: "embedding_337.tmp_0@GRAD" - slot_gradient: "embedding_338.tmp_0@GRAD" - slot_gradient: "embedding_339.tmp_0@GRAD" - slot_gradient: "embedding_340.tmp_0@GRAD" - slot_gradient: "embedding_341.tmp_0@GRAD" - slot_gradient: "embedding_342.tmp_0@GRAD" - slot_gradient: "embedding_343.tmp_0@GRAD" - slot_gradient: "embedding_344.tmp_0@GRAD" - slot_gradient: "embedding_345.tmp_0@GRAD" - slot_gradient: "embedding_346.tmp_0@GRAD" - slot_gradient: "embedding_347.tmp_0@GRAD" - slot_gradient: "embedding_348.tmp_0@GRAD" - slot_gradient: "embedding_349.tmp_0@GRAD" - slot_gradient: "embedding_350.tmp_0@GRAD" - slot_gradient: "embedding_351.tmp_0@GRAD" - slot_gradient: "embedding_352.tmp_0@GRAD" - slot_gradient: "embedding_353.tmp_0@GRAD" - slot_gradient: "embedding_354.tmp_0@GRAD" - slot_gradient: "embedding_355.tmp_0@GRAD" - slot_gradient: "embedding_356.tmp_0@GRAD" - slot_gradient: "embedding_357.tmp_0@GRAD" - slot_gradient: "embedding_358.tmp_0@GRAD" - slot_gradient: "embedding_359.tmp_0@GRAD" - slot_gradient: "embedding_360.tmp_0@GRAD" - slot_gradient: "embedding_361.tmp_0@GRAD" - slot_gradient: "embedding_362.tmp_0@GRAD" - slot_gradient: "embedding_363.tmp_0@GRAD" - slot_gradient: "embedding_364.tmp_0@GRAD" - slot_gradient: "embedding_365.tmp_0@GRAD" - slot_gradient: "embedding_366.tmp_0@GRAD" - slot_gradient: "embedding_367.tmp_0@GRAD" - slot_gradient: "embedding_368.tmp_0@GRAD" - slot_gradient: "embedding_369.tmp_0@GRAD" - slot_gradient: "embedding_370.tmp_0@GRAD" - slot_gradient: "embedding_371.tmp_0@GRAD" - slot_gradient: "embedding_372.tmp_0@GRAD" - slot_gradient: "embedding_373.tmp_0@GRAD" - slot_gradient: "embedding_374.tmp_0@GRAD" - slot_gradient: "embedding_375.tmp_0@GRAD" - slot_gradient: "embedding_376.tmp_0@GRAD" - slot_gradient: "embedding_377.tmp_0@GRAD" - slot_gradient: "embedding_378.tmp_0@GRAD" - slot_gradient: "embedding_379.tmp_0@GRAD" - slot_gradient: "embedding_380.tmp_0@GRAD" - slot_gradient: "embedding_381.tmp_0@GRAD" - slot_gradient: "embedding_382.tmp_0@GRAD" - slot_gradient: "embedding_383.tmp_0@GRAD" - slot_gradient: "embedding_384.tmp_0@GRAD" - slot_gradient: "embedding_385.tmp_0@GRAD" - slot_gradient: "embedding_386.tmp_0@GRAD" - slot_gradient: "embedding_387.tmp_0@GRAD" - slot_gradient: "embedding_388.tmp_0@GRAD" - slot_gradient: "embedding_389.tmp_0@GRAD" - slot_gradient: "embedding_390.tmp_0@GRAD" - slot_gradient: "embedding_391.tmp_0@GRAD" - slot_gradient: "embedding_392.tmp_0@GRAD" - slot_gradient: "embedding_393.tmp_0@GRAD" - slot_gradient: "embedding_394.tmp_0@GRAD" - slot_gradient: "embedding_395.tmp_0@GRAD" - slot_gradient: "embedding_396.tmp_0@GRAD" - slot_gradient: "embedding_397.tmp_0@GRAD" - slot_gradient: "embedding_398.tmp_0@GRAD" - slot_gradient: "embedding_399.tmp_0@GRAD" - slot_gradient: "embedding_400.tmp_0@GRAD" - slot_gradient: "embedding_401.tmp_0@GRAD" - slot_gradient: "embedding_402.tmp_0@GRAD" - slot_gradient: "embedding_403.tmp_0@GRAD" - slot_gradient: "embedding_404.tmp_0@GRAD" - slot_gradient: "embedding_405.tmp_0@GRAD" - slot_gradient: "embedding_406.tmp_0@GRAD" - slot_gradient: "embedding_407.tmp_0@GRAD" - } - skip_op: "lookup_table" - skip_op: "lookup_table_grad" -} -fs_client_param { - uri: "afs://xingtian.afs.baidu.com:9902" - user: "mlarch" - passwd: "Fv1M87" - hadoop_bin: "$HADOOP_HOME/bin/hadoop" -} diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/format_newcate_hotnews.awk b/feed/feed_deploy/news_jingpai/package/my_nets/format_newcate_hotnews.awk deleted file mode 100755 index 7820d4050110a1e1b59d739c126648d24681dd18..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/format_newcate_hotnews.awk +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/awk -f -{ - if ($1 !~ /^([0-9a-zA-Z])+$/ || $2 !~ /^([0-9])+$/ || $3 !~ /^([0-9])+$/) { - next; - } - show = $2; - clk = $3; - if (clk > show) { - clk = show; - } - for (i = 0; i < clk; i++) { - $2 = "1"; - $3 = "1"; - print $0; - } - for (i = 0; i < show - clk; i++) { - $2 = "1"; - $3 = "0"; - print $0; - } -} diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/ins_weight.py b/feed/feed_deploy/news_jingpai/package/my_nets/ins_weight.py deleted file mode 100755 index 8b4d87c34300aaea048c07fd9e9c50aa70e3a07c..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/ins_weight.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/python -import sys -import re -import math - -del_text_slot = True -g_ratio = 1 -w_ratio = 0.01 -slots_str = "6048 6145 6202 6201 6121 6119 6146 6120 6147 6122 6123 6118 6142 6143 6008 6148 6151 6127 6144 6150 6109 6003 6096 6149 6129 6203 6153 6152 6128 6106 6251 7082 7515 7080 6066 7507 6186 6007 7514 6054 6125 7506 10001 6006 6080 7023 6085 10000 6250 6110 6124 6090 6082 6067 7516 6101 6004 6191 6188 6070 6194 6247 6814 7512 10007 6058 6189 6059 7517 10005 7510 7024 7502 7503 6183 7511 6060 6806 7504 6185 6810 6248 10004 6815 6182 10068 6069 6073 6196 6816 7513 6071 6809 6072 6817 6190 7505 6813 6192 6807 6808 6195 6826 6184 6197 6068 6812 7107 6811 6823 6824 6819 6818 6821 6822 6820 6094 6083 6952 6099 6951 6949 6098 7075 6948 6157 6126 7077 6111 6087 6103 6107 6156 6005 6158 7122 6155 7058 6115 7079 7081 6833 6108 6840 6837 7147 7129 6097 6231 6957 7145 6956 7143 6130 7149 7142 6212 6827 7144 6089 6161 7055 6233 6105 7057 6237 6828 6850 6163 7124 6354 6162 7146 6830 7123 6160 6235 7056 6081 6841 6132 6954 6131 6236 6831 6845 6832 6953 6839 6950 7125 7054 6138 6166 6076 6851 6353 7076 7148 6858 6842 6860 7126 6829 6835 7078 6866 6869 6871 7052 6134 6855 6947 6862 6215 6852 7128 6092 6112 6213 6232 6863 6113 6165 6214 6216 6873 6865 6870 6077 6234 6861 6164 6217 7127 6218 6962 7053 7051 6961 6002 6738 6739 10105 7064 6751 6770 7100 6014 6765 6755 10021 10022 6010 10056 6011 6756 10055 6768 10024 6023 10003 6769 10002 6767 6759 10018 6024 6064 6012 6050 10042 6168 6253 10010 10020 6015 6018 10033 10041 10039 10031 10016 6764 7083 7152 7066 6171 7150 7085 6255 10044 10008 7102 6167 6240 6238 6095 10017 10046 6019 6031 6763 6256 6169 6254 10034 7108 7186 6257 10019 6757 10040 6025 7019 7086 10029 10011 7104 6261 6013 6766 10106 7105 7153 7089 6057 7134 7151 7045 7005 7008 7101 6035 7137 10023 6036 6172 7099 7087 6239 7185 6170 10006 6243 6350 7103 7090 7157 6259 7171 6875 7084 7154 6242 6260 7155 7017 7048 7156 6959 7047 10053 7135 6244 7136 10030 7063 6760 7016 7065 7179 6881 7018 6876 10081 10052 10054 10038 6886 10069 7004 10051 7007 7109 10057 6029 6888 10009 6889 7021 10047 6245 6878 10067 6879 6884 7180 7182 10071 7002 6880 6890 6887 10061 6027 6877 6892 10060 6893 7050 10036 7049 10012 10025 7012 7183 10058 7181 10086 6891 6258 6894 6883 7046 6037 7106 10043 10048 10045 10087 6885 10013 10028 7187 10037 10035 10050 6895 7011 7170 7172 10026 10063 10095 10082 10084 6960 10092 10075 6038 7010 7015 10015 10027 10064 7184 10014 10059 7013 7020 10072 10066 10080 6896 10083 10090 6039 10049 7164 7165 10091 10099 6963 7166 10079 10103 7006 7009 7169 6034 7028 7029 7030 7034 7035 7036 7040 7041 7042 10032 6009 6241 7003 7014 7088 13326 13330 13331 13352 13353 6198" -slot_whitelist = slots_str.split(" ") - -def calc_ins_weight(params, label): - """calc ins weight""" - global g_ratio - global w_ratio - slots = [] - s_clk_num = 0 - s_show_num = 0 - active = 0 - attclk_num = 0 - attshow_num = 0 - attclk_avg = 0 - for items in params: - if len(items) != 2: - continue - slot_name = items[0] - slot_val = items[1] - if slot_name not in slots: - slots.append(slot_name) - if slot_name == "session_click_num": - s_clk_num = int(slot_val) - if slot_name == "session_show_num": - s_show_num = int(slot_val) - if slot_name == "activity": - active = float(slot_val) / 10000.0 - w = 1 - # for inactive user - if active >= 0 and active < 0.4 and s_show_num >=0 and s_show_num < 20: - w = math.log(w_ratio * (420 - (active * 50 + 1) * (s_show_num + 1)) + math.e) - if label == "0": - w = 1 + (w - 1) * g_ratio - return w - -def filter_whitelist_slot(tmp_line): - terms = tmp_line.split() - line = "%s %s %s" % (terms[0], terms[1], terms[2]) - for item in terms[3:]: - feasign = item.split(':') - if len(feasign) == 2 and \ - feasign[1] in slot_whitelist: - line = "%s %s" %(line, item) - return line - -def get_sample_type(line): - # vertical_type = 20 - # if line.find("13038012583501790:6738") > 0: - # return 30 - # vertical_type = 0/5/1/2/9/11/13/16/29/-1 - if (line.find("7408512894065610:6738") > 0) or \ - (line.find("8815887816424655:6738") > 0) or \ - (line.find("7689987878537419:6738") > 0) or \ - (line.find("7971462863009228:6738") > 0) or \ - (line.find("9941787754311891:6738") > 0) or \ - (line.find("10504737723255509:6738") > 0) or \ - (line.find("11067687692199127:6738") > 0) or \ - (line.find("11912112645614554:6738") > 0) or \ - (line.find("15571287443748071:6738") > 0) or \ - (line.find("7127025017546227:6738") > 0): - return 20 - return -1 - -def main(): - """ins adjust""" - global del_text_slot - for l in sys.stdin: - l = l.rstrip("\n") - items = l.split(" ") - if len(items) < 3: - continue - label = items[2] - lines = l.split("\t") - line = lines[0] - # streaming ins include all ins, sample_type only handle NEWS ins - sample_type = -1 - if 'NEWS' in l: - sample_type = get_sample_type(line) - #line = filter_whitelist_slot(tmp_line) - if len(lines) >= 4: - if 'VIDEO' in lines[3]: - continue - params = lines[2] - params = params.split(" ") - m = [tuple(i.split(":")) for i in params] - if m is None or len(m) == 0: - if sample_type > 0: - print "%s $%s *1" % (line, sample_type) - else: - print "%s *1" % line - sys.stdout.flush() - continue - weight = calc_ins_weight(m, label) - if sample_type > 0: - print "%s $%s *%s" % (line, sample_type, weight) - else: - print "%s *%s" % (line, weight) - sys.stdout.flush() - else: - if sample_type > 0: - print "%s $%s *1" % (line, sample_type) - else: - print "%s *1" % line - sys.stdout.flush() - -if __name__ == "__main__": - if len(sys.argv) > 1: - if sys.argv[1] == "0": - del_text_slot = False - if len(sys.argv) > 2: - g_ratio = float(sys.argv[2]) - if len(sys.argv) > 3: - w_ratio = float(sys.argv[3]) - main() diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/jingpai_fleet_desc_new.prototxt b/feed/feed_deploy/news_jingpai/package/my_nets/jingpai_fleet_desc_new.prototxt deleted file mode 100644 index baf86c34e42a544ebfee248fcd1126ae2715d762..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/jingpai_fleet_desc_new.prototxt +++ /dev/null @@ -1,1504 +0,0 @@ -server_param { - downpour_server_param { - downpour_table_param { - table_id: 0 - table_class: "DownpourSparseTable" - shard_num: 1950 - accessor { - accessor_class: "DownpourCtrAccessor" - sparse_sgd_param { - learning_rate: 0.05 - initial_g2sum: 3.0 - initial_range: 0.0001 - weight_bounds: -10.0 - weight_bounds: 10.0 - } - fea_dim: 11 - embedx_dim: 8 - embedx_threshold: 10 - downpour_accessor_param { - nonclk_coeff: 0.1 - click_coeff: 1 - base_threshold: 1.5 - delta_threshold: 0.25 - delta_keep_days: 16 - delete_after_unseen_days: 30 - show_click_decay_rate: 0.98 - delete_threshold: 0.8 - } - table_accessor_save_param { - param: 1 - converter: "(scripts/xbox_compressor_mf.py | bin/xbox_pb_converter)" - deconverter: "(bin/xbox_pb_deconverter | scripts/xbox_decompressor_mf.awk)" - } - table_accessor_save_param { - param: 2 - converter: "(scripts/xbox_compressor_mf.py | bin/xbox_pb_converter)" - deconverter: "(bin/xbox_pb_deconverter | scripts/xbox_decompressor_mf.awk)" - } - } - type: PS_SPARSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 1 - table_class: "DownpourDenseTable" - accessor { - accessor_class: "DownpourDenseValueAccessor" - dense_sgd_param { - name: "adam" - adam { - learning_rate: 5e-06 - avg_decay_rate: 0.999993 - ada_decay_rate: 0.9999 - ada_epsilon: 1e-08 - mom_decay_rate: 0.99 - } - naive { - learning_rate: 0.0002 - } - } - fea_dim: 2571127 - } - type: PS_DENSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 2 - table_class: "DownpourDenseDoubleTable" - accessor { - accessor_class: "DownpourDenseValueDoubleAccessor" - dense_sgd_param { - name: "summarydouble" - summary { - summary_decay_rate: 0.999999 - } - } - fea_dim: 13464 - } - type: PS_DENSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 3 - table_class: "DownpourDenseTable" - accessor { - accessor_class: "DownpourDenseValueAccessor" - dense_sgd_param { - name: "adam" - adam { - learning_rate: 5e-06 - avg_decay_rate: 0.999993 - ada_decay_rate: 0.9999 - ada_epsilon: 1e-08 - mom_decay_rate: 0.99 - } - naive { - learning_rate: 0.0002 - } - } - fea_dim: 834238 - } - type: PS_DENSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 4 - table_class: "DownpourDenseDoubleTable" - accessor { - accessor_class: "DownpourDenseValueDoubleAccessor" - dense_sgd_param { - name: "summarydouble" - summary { - summary_decay_rate: 0.999999 - } - } - fea_dim: 3267 - } - type: PS_DENSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 5 - table_class: "DownpourDenseTable" - accessor { - accessor_class: "DownpourDenseValueAccessor" - dense_sgd_param { - name: "adam" - adam { - learning_rate: 5e-06 - avg_decay_rate: 0.999993 - ada_decay_rate: 0.9999 - ada_epsilon: 1e-08 - mom_decay_rate: 0.99 - } - naive { - learning_rate: 0.0002 - } - } - fea_dim: 2072615 - } - type: PS_DENSE_TABLE - compress_in_save: true - } - service_param { - server_class: "DownpourBrpcPsServer" - client_class: "DownpourBrpcPsClient" - service_class: "DownpourPsService" - start_server_port: 0 - server_thread_num: 12 - } - } -} -trainer_param { - dense_table { - table_id: 1 - dense_variable_name: "fc_0.w_0" - dense_variable_name: "fc_0.b_0" - dense_variable_name: "fc_1.w_0" - dense_variable_name: "fc_1.b_0" - dense_variable_name: "fc_2.w_0" - dense_variable_name: "fc_2.b_0" - dense_variable_name: "fc_3.w_0" - dense_variable_name: "fc_3.b_0" - dense_variable_name: "fc_4.w_0" - dense_variable_name: "fc_4.b_0" - dense_variable_name: "fc_5.w_0" - dense_variable_name: "fc_5.b_0" - dense_variable_name: "fc_6.w_0" - dense_variable_name: "fc_6.b_0" - dense_variable_name: "fc_7.w_0" - dense_variable_name: "fc_7.b_0" - dense_gradient_variable_name: "fc_0.w_0@GRAD" - dense_gradient_variable_name: "fc_0.b_0@GRAD" - dense_gradient_variable_name: "fc_1.w_0@GRAD" - dense_gradient_variable_name: "fc_1.b_0@GRAD" - dense_gradient_variable_name: "fc_2.w_0@GRAD" - dense_gradient_variable_name: "fc_2.b_0@GRAD" - dense_gradient_variable_name: "fc_3.w_0@GRAD" - dense_gradient_variable_name: "fc_3.b_0@GRAD" - dense_gradient_variable_name: "fc_4.w_0@GRAD" - dense_gradient_variable_name: "fc_4.b_0@GRAD" - dense_gradient_variable_name: "fc_5.w_0@GRAD" - dense_gradient_variable_name: "fc_5.b_0@GRAD" - dense_gradient_variable_name: "fc_6.w_0@GRAD" - dense_gradient_variable_name: "fc_6.b_0@GRAD" - dense_gradient_variable_name: "fc_7.w_0@GRAD" - dense_gradient_variable_name: "fc_7.b_0@GRAD" - } - dense_table { - table_id: 2 - dense_variable_name: "bn6048.batch_size" - dense_variable_name: "bn6048.batch_sum" - dense_variable_name: "bn6048.batch_square_sum" - dense_gradient_variable_name: "bn6048.batch_size@GRAD" - dense_gradient_variable_name: "bn6048.batch_sum@GRAD" - dense_gradient_variable_name: "bn6048.batch_square_sum@GRAD" - } - dense_table { - table_id: 3 - dense_variable_name: "fc_0.w_0" - dense_variable_name: "fc_0.b_0" - dense_variable_name: "fc_1.w_0" - dense_variable_name: "fc_1.b_0" - dense_variable_name: "fc_2.w_0" - dense_variable_name: "fc_2.b_0" - dense_variable_name: "fc_3.w_0" - dense_variable_name: "fc_3.b_0" - dense_variable_name: "fc_4.w_0" - dense_variable_name: "fc_4.b_0" - dense_variable_name: "fc_5.w_0" - dense_variable_name: "fc_5.b_0" - dense_variable_name: "fc_6.w_0" - dense_variable_name: "fc_6.b_0" - dense_variable_name: "fc_7.w_0" - dense_variable_name: "fc_7.b_0" - dense_gradient_variable_name: "fc_0.w_0@GRAD" - dense_gradient_variable_name: "fc_0.b_0@GRAD" - dense_gradient_variable_name: "fc_1.w_0@GRAD" - dense_gradient_variable_name: "fc_1.b_0@GRAD" - dense_gradient_variable_name: "fc_2.w_0@GRAD" - dense_gradient_variable_name: "fc_2.b_0@GRAD" - dense_gradient_variable_name: "fc_3.w_0@GRAD" - dense_gradient_variable_name: "fc_3.b_0@GRAD" - dense_gradient_variable_name: "fc_4.w_0@GRAD" - dense_gradient_variable_name: "fc_4.b_0@GRAD" - dense_gradient_variable_name: "fc_5.w_0@GRAD" - dense_gradient_variable_name: "fc_5.b_0@GRAD" - dense_gradient_variable_name: "fc_6.w_0@GRAD" - dense_gradient_variable_name: "fc_6.b_0@GRAD" - dense_gradient_variable_name: "fc_7.w_0@GRAD" - dense_gradient_variable_name: "fc_7.b_0@GRAD" - } - dense_table { - table_id: 4 - dense_variable_name: "bn6048.batch_size" - dense_variable_name: "bn6048.batch_sum" - dense_variable_name: "bn6048.batch_square_sum" - dense_gradient_variable_name: "bn6048.batch_size@GRAD" - dense_gradient_variable_name: "bn6048.batch_sum@GRAD" - dense_gradient_variable_name: "bn6048.batch_square_sum@GRAD" - } - dense_table { - table_id: 5 - dense_variable_name: "fc_0.w_0" - dense_variable_name: "fc_0.b_0" - dense_variable_name: "fc_1.w_0" - dense_variable_name: "fc_1.b_0" - dense_variable_name: "fc_2.w_0" - dense_variable_name: "fc_2.b_0" - dense_variable_name: "fc_3.w_0" - dense_variable_name: "fc_3.b_0" - dense_variable_name: "fc_4.w_0" - dense_variable_name: "fc_4.b_0" - dense_variable_name: "fc_5.w_0" - dense_variable_name: "fc_5.b_0" - dense_gradient_variable_name: "fc_0.w_0@GRAD" - dense_gradient_variable_name: "fc_0.b_0@GRAD" - dense_gradient_variable_name: "fc_1.w_0@GRAD" - dense_gradient_variable_name: "fc_1.b_0@GRAD" - dense_gradient_variable_name: "fc_2.w_0@GRAD" - dense_gradient_variable_name: "fc_2.b_0@GRAD" - dense_gradient_variable_name: "fc_3.w_0@GRAD" - dense_gradient_variable_name: "fc_3.b_0@GRAD" - dense_gradient_variable_name: "fc_4.w_0@GRAD" - dense_gradient_variable_name: "fc_4.b_0@GRAD" - dense_gradient_variable_name: "fc_5.w_0@GRAD" - dense_gradient_variable_name: "fc_5.b_0@GRAD" - } - sparse_table { - table_id: 0 - slot_key: "6048" - slot_key: "6002" - slot_key: "6145" - slot_key: "6202" - slot_key: "6201" - slot_key: "6121" - slot_key: "6738" - slot_key: "6119" - slot_key: "6146" - slot_key: "6120" - slot_key: "6147" - slot_key: "6122" - slot_key: "6123" - slot_key: "6118" - slot_key: "6142" - slot_key: "6143" - slot_key: "6008" - slot_key: "6148" - slot_key: "6151" - slot_key: "6127" - slot_key: "6144" - slot_key: "6094" - slot_key: "6083" - slot_key: "6952" - slot_key: "6739" - slot_key: "6150" - slot_key: "6109" - slot_key: "6003" - slot_key: "6099" - slot_key: "6149" - slot_key: "6129" - slot_key: "6203" - slot_key: "6153" - slot_key: "6152" - slot_key: "6128" - slot_key: "6106" - slot_key: "6251" - slot_key: "7082" - slot_key: "7515" - slot_key: "6951" - slot_key: "6949" - slot_key: "7080" - slot_key: "6066" - slot_key: "7507" - slot_key: "6186" - slot_key: "6007" - slot_key: "7514" - slot_key: "6125" - slot_key: "7506" - slot_key: "10001" - slot_key: "6006" - slot_key: "7023" - slot_key: "6085" - slot_key: "10000" - slot_key: "6098" - slot_key: "6250" - slot_key: "6110" - slot_key: "6124" - slot_key: "6090" - slot_key: "6082" - slot_key: "6067" - slot_key: "6101" - slot_key: "6004" - slot_key: "6191" - slot_key: "7075" - slot_key: "6948" - slot_key: "6157" - slot_key: "6126" - slot_key: "6188" - slot_key: "7077" - slot_key: "6070" - slot_key: "6111" - slot_key: "6087" - slot_key: "6103" - slot_key: "6107" - slot_key: "6194" - slot_key: "6156" - slot_key: "6005" - slot_key: "6247" - slot_key: "6814" - slot_key: "6158" - slot_key: "7122" - slot_key: "6058" - slot_key: "6189" - slot_key: "7058" - slot_key: "6059" - slot_key: "6115" - slot_key: "7079" - slot_key: "7081" - slot_key: "6833" - slot_key: "7024" - slot_key: "6108" - slot_key: "13342" - slot_key: "13345" - slot_key: "13412" - slot_key: "13343" - slot_key: "13350" - slot_key: "13346" - slot_key: "13409" - slot_key: "6009" - slot_key: "6011" - slot_key: "6012" - slot_key: "6013" - slot_key: "6014" - slot_key: "6015" - slot_key: "6019" - slot_key: "6023" - slot_key: "6024" - slot_key: "6027" - slot_key: "6029" - slot_key: "6031" - slot_key: "6050" - slot_key: "6060" - slot_key: "6068" - slot_key: "6069" - slot_key: "6089" - slot_key: "6095" - slot_key: "6105" - slot_key: "6112" - slot_key: "6130" - slot_key: "6131" - slot_key: "6132" - slot_key: "6134" - slot_key: "6161" - slot_key: "6162" - slot_key: "6163" - slot_key: "6166" - slot_key: "6182" - slot_key: "6183" - slot_key: "6185" - slot_key: "6190" - slot_key: "6212" - slot_key: "6213" - slot_key: "6231" - slot_key: "6233" - slot_key: "6234" - slot_key: "6236" - slot_key: "6238" - slot_key: "6239" - slot_key: "6240" - slot_key: "6241" - slot_key: "6242" - slot_key: "6243" - slot_key: "6244" - slot_key: "6245" - slot_key: "6354" - slot_key: "7002" - slot_key: "7005" - slot_key: "7008" - slot_key: "7010" - slot_key: "7012" - slot_key: "7013" - slot_key: "7015" - slot_key: "7016" - slot_key: "7017" - slot_key: "7018" - slot_key: "7019" - slot_key: "7020" - slot_key: "7045" - slot_key: "7046" - slot_key: "7048" - slot_key: "7049" - slot_key: "7052" - slot_key: "7054" - slot_key: "7056" - slot_key: "7064" - slot_key: "7066" - slot_key: "7076" - slot_key: "7078" - slot_key: "7083" - slot_key: "7084" - slot_key: "7085" - slot_key: "7086" - slot_key: "7087" - slot_key: "7088" - slot_key: "7089" - slot_key: "7090" - slot_key: "7099" - slot_key: "7100" - slot_key: "7101" - slot_key: "7102" - slot_key: "7103" - slot_key: "7104" - slot_key: "7105" - slot_key: "7109" - slot_key: "7124" - slot_key: "7126" - slot_key: "7136" - slot_key: "7142" - slot_key: "7143" - slot_key: "7144" - slot_key: "7145" - slot_key: "7146" - slot_key: "7147" - slot_key: "7148" - slot_key: "7150" - slot_key: "7151" - slot_key: "7152" - slot_key: "7153" - slot_key: "7154" - slot_key: "7155" - slot_key: "7156" - slot_key: "7157" - slot_key: "7047" - slot_key: "7050" - slot_key: "6253" - slot_key: "6254" - slot_key: "6255" - slot_key: "6256" - slot_key: "6257" - slot_key: "6259" - slot_key: "6260" - slot_key: "6261" - slot_key: "7170" - slot_key: "7185" - slot_key: "7186" - slot_key: "6751" - slot_key: "6755" - slot_key: "6757" - slot_key: "6759" - slot_key: "6760" - slot_key: "6763" - slot_key: "6764" - slot_key: "6765" - slot_key: "6766" - slot_key: "6767" - slot_key: "6768" - slot_key: "6769" - slot_key: "6770" - slot_key: "7502" - slot_key: "7503" - slot_key: "7504" - slot_key: "7505" - slot_key: "7510" - slot_key: "7511" - slot_key: "7512" - slot_key: "7513" - slot_key: "6806" - slot_key: "6807" - slot_key: "6808" - slot_key: "6809" - slot_key: "6810" - slot_key: "6811" - slot_key: "6812" - slot_key: "6813" - slot_key: "6815" - slot_key: "6816" - slot_key: "6817" - slot_key: "6819" - slot_key: "6823" - slot_key: "6828" - slot_key: "6831" - slot_key: "6840" - slot_key: "6845" - slot_key: "6875" - slot_key: "6879" - slot_key: "6881" - slot_key: "6888" - slot_key: "6889" - slot_key: "6947" - slot_key: "6950" - slot_key: "6956" - slot_key: "6957" - slot_key: "6959" - slot_key: "10006" - slot_key: "10008" - slot_key: "10009" - slot_key: "10010" - slot_key: "10011" - slot_key: "10016" - slot_key: "10017" - slot_key: "10018" - slot_key: "10019" - slot_key: "10020" - slot_key: "10021" - slot_key: "10022" - slot_key: "10023" - slot_key: "10024" - slot_key: "10029" - slot_key: "10030" - slot_key: "10031" - slot_key: "10032" - slot_key: "10033" - slot_key: "10034" - slot_key: "10035" - slot_key: "10036" - slot_key: "10037" - slot_key: "10038" - slot_key: "10039" - slot_key: "10040" - slot_key: "10041" - slot_key: "10042" - slot_key: "10044" - slot_key: "10045" - slot_key: "10046" - slot_key: "10051" - slot_key: "10052" - slot_key: "10053" - slot_key: "10054" - slot_key: "10055" - slot_key: "10056" - slot_key: "10057" - slot_key: "10060" - slot_key: "10066" - slot_key: "10069" - slot_key: "6820" - slot_key: "6821" - slot_key: "6822" - slot_key: "13333" - slot_key: "13334" - slot_key: "13335" - slot_key: "13336" - slot_key: "13337" - slot_key: "13338" - slot_key: "13339" - slot_key: "13340" - slot_key: "13341" - slot_key: "13351" - slot_key: "13352" - slot_key: "13353" - slot_key: "13359" - slot_key: "13361" - slot_key: "13362" - slot_key: "13363" - slot_key: "13366" - slot_key: "13367" - slot_key: "13368" - slot_key: "13369" - slot_key: "13370" - slot_key: "13371" - slot_key: "13375" - slot_key: "13376" - slot_key: "5700" - slot_key: "5702" - slot_key: "13400" - slot_key: "13401" - slot_key: "13402" - slot_key: "13403" - slot_key: "13404" - slot_key: "13406" - slot_key: "13407" - slot_key: "13408" - slot_key: "13410" - slot_key: "13417" - slot_key: "13418" - slot_key: "13419" - slot_key: "13420" - slot_key: "13422" - slot_key: "13425" - slot_key: "13427" - slot_key: "13428" - slot_key: "13429" - slot_key: "13430" - slot_key: "13431" - slot_key: "13433" - slot_key: "13434" - slot_key: "13436" - slot_key: "13437" - slot_key: "13326" - slot_key: "13330" - slot_key: "13331" - slot_key: "5717" - slot_key: "13442" - slot_key: "13451" - slot_key: "13452" - slot_key: "13455" - slot_key: "13456" - slot_key: "13457" - slot_key: "13458" - slot_key: "13459" - slot_key: "13460" - slot_key: "13461" - slot_key: "13462" - slot_key: "13463" - slot_key: "13464" - slot_key: "13465" - slot_key: "13466" - slot_key: "13467" - slot_key: "13468" - slot_key: "1104" - slot_key: "1106" - slot_key: "1107" - slot_key: "1108" - slot_key: "1109" - slot_key: "1110" - slot_key: "1111" - slot_key: "1112" - slot_key: "1113" - slot_key: "1114" - slot_key: "1115" - slot_key: "1116" - slot_key: "1117" - slot_key: "1119" - slot_key: "1120" - slot_key: "1121" - slot_key: "1122" - slot_key: "1123" - slot_key: "1124" - slot_key: "1125" - slot_key: "1126" - slot_key: "1127" - slot_key: "1128" - slot_key: "1129" - slot_key: "13812" - slot_key: "13813" - slot_key: "6740" - slot_key: "1490" - slot_key: "1491" - slot_value: "embedding_0.tmp_0" - slot_value: "embedding_1.tmp_0" - slot_value: "embedding_2.tmp_0" - slot_value: "embedding_3.tmp_0" - slot_value: "embedding_4.tmp_0" - slot_value: "embedding_5.tmp_0" - slot_value: "embedding_6.tmp_0" - slot_value: "embedding_7.tmp_0" - slot_value: "embedding_8.tmp_0" - slot_value: "embedding_9.tmp_0" - slot_value: "embedding_10.tmp_0" - slot_value: "embedding_11.tmp_0" - slot_value: "embedding_12.tmp_0" - slot_value: "embedding_13.tmp_0" - slot_value: "embedding_14.tmp_0" - slot_value: "embedding_15.tmp_0" - slot_value: "embedding_16.tmp_0" - slot_value: "embedding_17.tmp_0" - slot_value: "embedding_18.tmp_0" - slot_value: "embedding_19.tmp_0" - slot_value: "embedding_20.tmp_0" - slot_value: "embedding_21.tmp_0" - slot_value: "embedding_22.tmp_0" - slot_value: "embedding_23.tmp_0" - slot_value: "embedding_24.tmp_0" - slot_value: "embedding_25.tmp_0" - slot_value: "embedding_26.tmp_0" - slot_value: "embedding_27.tmp_0" - slot_value: "embedding_28.tmp_0" - slot_value: "embedding_29.tmp_0" - slot_value: "embedding_30.tmp_0" - slot_value: "embedding_31.tmp_0" - slot_value: "embedding_32.tmp_0" - slot_value: "embedding_33.tmp_0" - slot_value: "embedding_34.tmp_0" - slot_value: "embedding_35.tmp_0" - slot_value: "embedding_36.tmp_0" - slot_value: "embedding_37.tmp_0" - slot_value: "embedding_38.tmp_0" - slot_value: "embedding_39.tmp_0" - slot_value: "embedding_40.tmp_0" - slot_value: "embedding_41.tmp_0" - slot_value: "embedding_42.tmp_0" - slot_value: "embedding_43.tmp_0" - slot_value: "embedding_44.tmp_0" - slot_value: "embedding_45.tmp_0" - slot_value: "embedding_46.tmp_0" - slot_value: "embedding_47.tmp_0" - slot_value: "embedding_48.tmp_0" - slot_value: "embedding_49.tmp_0" - slot_value: "embedding_50.tmp_0" - slot_value: "embedding_51.tmp_0" - slot_value: "embedding_52.tmp_0" - slot_value: "embedding_53.tmp_0" - slot_value: "embedding_54.tmp_0" - slot_value: "embedding_55.tmp_0" - slot_value: "embedding_56.tmp_0" - slot_value: "embedding_57.tmp_0" - slot_value: "embedding_58.tmp_0" - slot_value: "embedding_59.tmp_0" - slot_value: "embedding_60.tmp_0" - slot_value: "embedding_61.tmp_0" - slot_value: "embedding_62.tmp_0" - slot_value: "embedding_63.tmp_0" - slot_value: "embedding_64.tmp_0" - slot_value: "embedding_65.tmp_0" - slot_value: "embedding_66.tmp_0" - slot_value: "embedding_67.tmp_0" - slot_value: "embedding_68.tmp_0" - slot_value: "embedding_69.tmp_0" - slot_value: "embedding_70.tmp_0" - slot_value: "embedding_71.tmp_0" - slot_value: "embedding_72.tmp_0" - slot_value: "embedding_73.tmp_0" - slot_value: "embedding_74.tmp_0" - slot_value: "embedding_75.tmp_0" - slot_value: "embedding_76.tmp_0" - slot_value: "embedding_77.tmp_0" - slot_value: "embedding_78.tmp_0" - slot_value: "embedding_79.tmp_0" - slot_value: "embedding_80.tmp_0" - slot_value: "embedding_81.tmp_0" - slot_value: "embedding_82.tmp_0" - slot_value: "embedding_83.tmp_0" - slot_value: "embedding_84.tmp_0" - slot_value: "embedding_85.tmp_0" - slot_value: "embedding_86.tmp_0" - slot_value: "embedding_87.tmp_0" - slot_value: "embedding_88.tmp_0" - slot_value: "embedding_89.tmp_0" - slot_value: "embedding_90.tmp_0" - slot_value: "embedding_91.tmp_0" - slot_value: "embedding_92.tmp_0" - slot_value: "embedding_93.tmp_0" - slot_value: "embedding_94.tmp_0" - slot_value: "embedding_95.tmp_0" - slot_value: "embedding_96.tmp_0" - slot_value: "embedding_97.tmp_0" - slot_value: "embedding_98.tmp_0" - slot_value: "embedding_99.tmp_0" - slot_value: "embedding_100.tmp_0" - slot_value: "embedding_101.tmp_0" - slot_value: "embedding_102.tmp_0" - slot_value: "embedding_103.tmp_0" - slot_value: "embedding_104.tmp_0" - slot_value: "embedding_105.tmp_0" - slot_value: "embedding_106.tmp_0" - slot_value: "embedding_107.tmp_0" - slot_value: "embedding_108.tmp_0" - slot_value: "embedding_109.tmp_0" - slot_value: "embedding_110.tmp_0" - slot_value: "embedding_111.tmp_0" - slot_value: "embedding_112.tmp_0" - slot_value: "embedding_113.tmp_0" - slot_value: "embedding_114.tmp_0" - slot_value: "embedding_115.tmp_0" - slot_value: "embedding_116.tmp_0" - slot_value: "embedding_117.tmp_0" - slot_value: "embedding_118.tmp_0" - slot_value: "embedding_119.tmp_0" - slot_value: "embedding_120.tmp_0" - slot_value: "embedding_121.tmp_0" - slot_value: "embedding_122.tmp_0" - slot_value: "embedding_123.tmp_0" - slot_value: "embedding_124.tmp_0" - slot_value: "embedding_125.tmp_0" - slot_value: "embedding_126.tmp_0" - slot_value: "embedding_127.tmp_0" - slot_value: "embedding_128.tmp_0" - slot_value: "embedding_129.tmp_0" - slot_value: "embedding_130.tmp_0" - slot_value: "embedding_131.tmp_0" - slot_value: "embedding_132.tmp_0" - slot_value: "embedding_133.tmp_0" - slot_value: "embedding_134.tmp_0" - slot_value: "embedding_135.tmp_0" - slot_value: "embedding_136.tmp_0" - slot_value: "embedding_137.tmp_0" - slot_value: "embedding_138.tmp_0" - slot_value: "embedding_139.tmp_0" - slot_value: "embedding_140.tmp_0" - slot_value: "embedding_141.tmp_0" - slot_value: "embedding_142.tmp_0" - slot_value: "embedding_143.tmp_0" - slot_value: "embedding_144.tmp_0" - slot_value: "embedding_145.tmp_0" - slot_value: "embedding_146.tmp_0" - slot_value: "embedding_147.tmp_0" - slot_value: "embedding_148.tmp_0" - slot_value: "embedding_149.tmp_0" - slot_value: "embedding_150.tmp_0" - slot_value: "embedding_151.tmp_0" - slot_value: "embedding_152.tmp_0" - slot_value: "embedding_153.tmp_0" - slot_value: "embedding_154.tmp_0" - slot_value: "embedding_155.tmp_0" - slot_value: "embedding_156.tmp_0" - slot_value: "embedding_157.tmp_0" - slot_value: "embedding_158.tmp_0" - slot_value: "embedding_159.tmp_0" - slot_value: "embedding_160.tmp_0" - slot_value: "embedding_161.tmp_0" - slot_value: "embedding_162.tmp_0" - slot_value: "embedding_163.tmp_0" - slot_value: "embedding_164.tmp_0" - slot_value: "embedding_165.tmp_0" - slot_value: "embedding_166.tmp_0" - slot_value: "embedding_167.tmp_0" - slot_value: "embedding_168.tmp_0" - slot_value: "embedding_169.tmp_0" - slot_value: "embedding_170.tmp_0" - slot_value: "embedding_171.tmp_0" - slot_value: "embedding_172.tmp_0" - slot_value: "embedding_173.tmp_0" - slot_value: "embedding_174.tmp_0" - slot_value: "embedding_175.tmp_0" - slot_value: "embedding_176.tmp_0" - slot_value: "embedding_177.tmp_0" - slot_value: "embedding_178.tmp_0" - slot_value: "embedding_179.tmp_0" - slot_value: "embedding_180.tmp_0" - slot_value: "embedding_181.tmp_0" - slot_value: "embedding_182.tmp_0" - slot_value: "embedding_183.tmp_0" - slot_value: "embedding_184.tmp_0" - slot_value: "embedding_185.tmp_0" - slot_value: "embedding_186.tmp_0" - slot_value: "embedding_187.tmp_0" - slot_value: "embedding_188.tmp_0" - slot_value: "embedding_189.tmp_0" - slot_value: "embedding_190.tmp_0" - slot_value: "embedding_191.tmp_0" - slot_value: "embedding_192.tmp_0" - slot_value: "embedding_193.tmp_0" - slot_value: "embedding_194.tmp_0" - slot_value: "embedding_195.tmp_0" - slot_value: "embedding_196.tmp_0" - slot_value: "embedding_197.tmp_0" - slot_value: "embedding_198.tmp_0" - slot_value: "embedding_199.tmp_0" - slot_value: "embedding_200.tmp_0" - slot_value: "embedding_201.tmp_0" - slot_value: "embedding_202.tmp_0" - slot_value: "embedding_203.tmp_0" - slot_value: "embedding_204.tmp_0" - slot_value: "embedding_205.tmp_0" - slot_value: "embedding_206.tmp_0" - slot_value: "embedding_207.tmp_0" - slot_value: "embedding_208.tmp_0" - slot_value: "embedding_209.tmp_0" - slot_value: "embedding_210.tmp_0" - slot_value: "embedding_211.tmp_0" - slot_value: "embedding_212.tmp_0" - slot_value: "embedding_213.tmp_0" - slot_value: "embedding_214.tmp_0" - slot_value: "embedding_215.tmp_0" - slot_value: "embedding_216.tmp_0" - slot_value: "embedding_217.tmp_0" - slot_value: "embedding_218.tmp_0" - slot_value: "embedding_219.tmp_0" - slot_value: "embedding_220.tmp_0" - slot_value: "embedding_221.tmp_0" - slot_value: "embedding_222.tmp_0" - slot_value: "embedding_223.tmp_0" - slot_value: "embedding_224.tmp_0" - slot_value: "embedding_225.tmp_0" - slot_value: "embedding_226.tmp_0" - slot_value: "embedding_227.tmp_0" - slot_value: "embedding_228.tmp_0" - slot_value: "embedding_229.tmp_0" - slot_value: "embedding_230.tmp_0" - slot_value: "embedding_231.tmp_0" - slot_value: "embedding_232.tmp_0" - slot_value: "embedding_233.tmp_0" - slot_value: "embedding_234.tmp_0" - slot_value: "embedding_235.tmp_0" - slot_value: "embedding_236.tmp_0" - slot_value: "embedding_237.tmp_0" - slot_value: "embedding_238.tmp_0" - slot_value: "embedding_239.tmp_0" - slot_value: "embedding_240.tmp_0" - slot_value: "embedding_241.tmp_0" - slot_value: "embedding_242.tmp_0" - slot_value: "embedding_243.tmp_0" - slot_value: "embedding_244.tmp_0" - slot_value: "embedding_245.tmp_0" - slot_value: "embedding_246.tmp_0" - slot_value: "embedding_247.tmp_0" - slot_value: "embedding_248.tmp_0" - slot_value: "embedding_249.tmp_0" - slot_value: "embedding_250.tmp_0" - slot_value: "embedding_251.tmp_0" - slot_value: "embedding_252.tmp_0" - slot_value: "embedding_253.tmp_0" - slot_value: "embedding_254.tmp_0" - slot_value: "embedding_255.tmp_0" - slot_value: "embedding_256.tmp_0" - slot_value: "embedding_257.tmp_0" - slot_value: "embedding_258.tmp_0" - slot_value: "embedding_259.tmp_0" - slot_value: "embedding_260.tmp_0" - slot_value: "embedding_261.tmp_0" - slot_value: "embedding_262.tmp_0" - slot_value: "embedding_263.tmp_0" - slot_value: "embedding_264.tmp_0" - slot_value: "embedding_265.tmp_0" - slot_value: "embedding_266.tmp_0" - slot_value: "embedding_267.tmp_0" - slot_value: "embedding_268.tmp_0" - slot_value: "embedding_269.tmp_0" - slot_value: "embedding_270.tmp_0" - slot_value: "embedding_271.tmp_0" - slot_value: "embedding_272.tmp_0" - slot_value: "embedding_273.tmp_0" - slot_value: "embedding_274.tmp_0" - slot_value: "embedding_275.tmp_0" - slot_value: "embedding_276.tmp_0" - slot_value: "embedding_277.tmp_0" - slot_value: "embedding_278.tmp_0" - slot_value: "embedding_279.tmp_0" - slot_value: "embedding_280.tmp_0" - slot_value: "embedding_281.tmp_0" - slot_value: "embedding_282.tmp_0" - slot_value: "embedding_283.tmp_0" - slot_value: "embedding_284.tmp_0" - slot_value: "embedding_285.tmp_0" - slot_value: "embedding_286.tmp_0" - slot_value: "embedding_287.tmp_0" - slot_value: "embedding_288.tmp_0" - slot_value: "embedding_289.tmp_0" - slot_value: "embedding_290.tmp_0" - slot_value: "embedding_291.tmp_0" - slot_value: "embedding_292.tmp_0" - slot_value: "embedding_293.tmp_0" - slot_value: "embedding_294.tmp_0" - slot_value: "embedding_295.tmp_0" - slot_value: "embedding_296.tmp_0" - slot_value: "embedding_297.tmp_0" - slot_value: "embedding_298.tmp_0" - slot_value: "embedding_299.tmp_0" - slot_value: "embedding_300.tmp_0" - slot_value: "embedding_301.tmp_0" - slot_value: "embedding_302.tmp_0" - slot_value: "embedding_303.tmp_0" - slot_value: "embedding_304.tmp_0" - slot_value: "embedding_305.tmp_0" - slot_value: "embedding_306.tmp_0" - slot_value: "embedding_307.tmp_0" - slot_value: "embedding_308.tmp_0" - slot_value: "embedding_309.tmp_0" - slot_value: "embedding_310.tmp_0" - slot_value: "embedding_311.tmp_0" - slot_value: "embedding_312.tmp_0" - slot_value: "embedding_313.tmp_0" - slot_value: "embedding_314.tmp_0" - slot_value: "embedding_315.tmp_0" - slot_value: "embedding_316.tmp_0" - slot_value: "embedding_317.tmp_0" - slot_value: "embedding_318.tmp_0" - slot_value: "embedding_319.tmp_0" - slot_value: "embedding_320.tmp_0" - slot_value: "embedding_321.tmp_0" - slot_value: "embedding_322.tmp_0" - slot_value: "embedding_323.tmp_0" - slot_value: "embedding_324.tmp_0" - slot_value: "embedding_325.tmp_0" - slot_value: "embedding_326.tmp_0" - slot_value: "embedding_327.tmp_0" - slot_value: "embedding_328.tmp_0" - slot_value: "embedding_329.tmp_0" - slot_value: "embedding_330.tmp_0" - slot_value: "embedding_331.tmp_0" - slot_value: "embedding_332.tmp_0" - slot_value: "embedding_333.tmp_0" - slot_value: "embedding_334.tmp_0" - slot_value: "embedding_335.tmp_0" - slot_value: "embedding_336.tmp_0" - slot_value: "embedding_337.tmp_0" - slot_value: "embedding_338.tmp_0" - slot_value: "embedding_339.tmp_0" - slot_value: "embedding_340.tmp_0" - slot_value: "embedding_341.tmp_0" - slot_value: "embedding_342.tmp_0" - slot_value: "embedding_343.tmp_0" - slot_value: "embedding_344.tmp_0" - slot_value: "embedding_345.tmp_0" - slot_value: "embedding_346.tmp_0" - slot_value: "embedding_347.tmp_0" - slot_value: "embedding_348.tmp_0" - slot_value: "embedding_349.tmp_0" - slot_value: "embedding_350.tmp_0" - slot_value: "embedding_351.tmp_0" - slot_value: "embedding_352.tmp_0" - slot_value: "embedding_353.tmp_0" - slot_value: "embedding_354.tmp_0" - slot_value: "embedding_355.tmp_0" - slot_value: "embedding_356.tmp_0" - slot_value: "embedding_357.tmp_0" - slot_value: "embedding_358.tmp_0" - slot_value: "embedding_359.tmp_0" - slot_value: "embedding_360.tmp_0" - slot_value: "embedding_361.tmp_0" - slot_value: "embedding_362.tmp_0" - slot_value: "embedding_363.tmp_0" - slot_value: "embedding_364.tmp_0" - slot_value: "embedding_365.tmp_0" - slot_value: "embedding_366.tmp_0" - slot_value: "embedding_367.tmp_0" - slot_value: "embedding_368.tmp_0" - slot_value: "embedding_369.tmp_0" - slot_value: "embedding_370.tmp_0" - slot_value: "embedding_371.tmp_0" - slot_value: "embedding_372.tmp_0" - slot_value: "embedding_373.tmp_0" - slot_value: "embedding_374.tmp_0" - slot_value: "embedding_375.tmp_0" - slot_value: "embedding_376.tmp_0" - slot_value: "embedding_377.tmp_0" - slot_value: "embedding_378.tmp_0" - slot_value: "embedding_379.tmp_0" - slot_value: "embedding_380.tmp_0" - slot_value: "embedding_381.tmp_0" - slot_value: "embedding_382.tmp_0" - slot_value: "embedding_383.tmp_0" - slot_value: "embedding_384.tmp_0" - slot_value: "embedding_385.tmp_0" - slot_value: "embedding_386.tmp_0" - slot_value: "embedding_387.tmp_0" - slot_value: "embedding_388.tmp_0" - slot_value: "embedding_389.tmp_0" - slot_value: "embedding_390.tmp_0" - slot_value: "embedding_391.tmp_0" - slot_value: "embedding_392.tmp_0" - slot_value: "embedding_393.tmp_0" - slot_value: "embedding_394.tmp_0" - slot_value: "embedding_395.tmp_0" - slot_value: "embedding_396.tmp_0" - slot_value: "embedding_397.tmp_0" - slot_value: "embedding_398.tmp_0" - slot_value: "embedding_399.tmp_0" - slot_value: "embedding_400.tmp_0" - slot_value: "embedding_401.tmp_0" - slot_value: "embedding_402.tmp_0" - slot_value: "embedding_403.tmp_0" - slot_value: "embedding_404.tmp_0" - slot_value: "embedding_405.tmp_0" - slot_value: "embedding_406.tmp_0" - slot_value: "embedding_407.tmp_0" - slot_gradient: "embedding_0.tmp_0@GRAD" - slot_gradient: "embedding_1.tmp_0@GRAD" - slot_gradient: "embedding_2.tmp_0@GRAD" - slot_gradient: "embedding_3.tmp_0@GRAD" - slot_gradient: "embedding_4.tmp_0@GRAD" - slot_gradient: "embedding_5.tmp_0@GRAD" - slot_gradient: "embedding_6.tmp_0@GRAD" - slot_gradient: "embedding_7.tmp_0@GRAD" - slot_gradient: "embedding_8.tmp_0@GRAD" - slot_gradient: "embedding_9.tmp_0@GRAD" - slot_gradient: "embedding_10.tmp_0@GRAD" - slot_gradient: "embedding_11.tmp_0@GRAD" - slot_gradient: "embedding_12.tmp_0@GRAD" - slot_gradient: "embedding_13.tmp_0@GRAD" - slot_gradient: "embedding_14.tmp_0@GRAD" - slot_gradient: "embedding_15.tmp_0@GRAD" - slot_gradient: "embedding_16.tmp_0@GRAD" - slot_gradient: "embedding_17.tmp_0@GRAD" - slot_gradient: "embedding_18.tmp_0@GRAD" - slot_gradient: "embedding_19.tmp_0@GRAD" - slot_gradient: "embedding_20.tmp_0@GRAD" - slot_gradient: "embedding_21.tmp_0@GRAD" - slot_gradient: "embedding_22.tmp_0@GRAD" - slot_gradient: "embedding_23.tmp_0@GRAD" - slot_gradient: "embedding_24.tmp_0@GRAD" - slot_gradient: "embedding_25.tmp_0@GRAD" - slot_gradient: "embedding_26.tmp_0@GRAD" - slot_gradient: "embedding_27.tmp_0@GRAD" - slot_gradient: "embedding_28.tmp_0@GRAD" - slot_gradient: "embedding_29.tmp_0@GRAD" - slot_gradient: "embedding_30.tmp_0@GRAD" - slot_gradient: "embedding_31.tmp_0@GRAD" - slot_gradient: "embedding_32.tmp_0@GRAD" - slot_gradient: "embedding_33.tmp_0@GRAD" - slot_gradient: "embedding_34.tmp_0@GRAD" - slot_gradient: "embedding_35.tmp_0@GRAD" - slot_gradient: "embedding_36.tmp_0@GRAD" - slot_gradient: "embedding_37.tmp_0@GRAD" - slot_gradient: "embedding_38.tmp_0@GRAD" - slot_gradient: "embedding_39.tmp_0@GRAD" - slot_gradient: "embedding_40.tmp_0@GRAD" - slot_gradient: "embedding_41.tmp_0@GRAD" - slot_gradient: "embedding_42.tmp_0@GRAD" - slot_gradient: "embedding_43.tmp_0@GRAD" - slot_gradient: "embedding_44.tmp_0@GRAD" - slot_gradient: "embedding_45.tmp_0@GRAD" - slot_gradient: "embedding_46.tmp_0@GRAD" - slot_gradient: "embedding_47.tmp_0@GRAD" - slot_gradient: "embedding_48.tmp_0@GRAD" - slot_gradient: "embedding_49.tmp_0@GRAD" - slot_gradient: "embedding_50.tmp_0@GRAD" - slot_gradient: "embedding_51.tmp_0@GRAD" - slot_gradient: "embedding_52.tmp_0@GRAD" - slot_gradient: "embedding_53.tmp_0@GRAD" - slot_gradient: "embedding_54.tmp_0@GRAD" - slot_gradient: "embedding_55.tmp_0@GRAD" - slot_gradient: "embedding_56.tmp_0@GRAD" - slot_gradient: "embedding_57.tmp_0@GRAD" - slot_gradient: "embedding_58.tmp_0@GRAD" - slot_gradient: "embedding_59.tmp_0@GRAD" - slot_gradient: "embedding_60.tmp_0@GRAD" - slot_gradient: "embedding_61.tmp_0@GRAD" - slot_gradient: "embedding_62.tmp_0@GRAD" - slot_gradient: "embedding_63.tmp_0@GRAD" - slot_gradient: "embedding_64.tmp_0@GRAD" - slot_gradient: "embedding_65.tmp_0@GRAD" - slot_gradient: "embedding_66.tmp_0@GRAD" - slot_gradient: "embedding_67.tmp_0@GRAD" - slot_gradient: "embedding_68.tmp_0@GRAD" - slot_gradient: "embedding_69.tmp_0@GRAD" - slot_gradient: "embedding_70.tmp_0@GRAD" - slot_gradient: "embedding_71.tmp_0@GRAD" - slot_gradient: "embedding_72.tmp_0@GRAD" - slot_gradient: "embedding_73.tmp_0@GRAD" - slot_gradient: "embedding_74.tmp_0@GRAD" - slot_gradient: "embedding_75.tmp_0@GRAD" - slot_gradient: "embedding_76.tmp_0@GRAD" - slot_gradient: "embedding_77.tmp_0@GRAD" - slot_gradient: "embedding_78.tmp_0@GRAD" - slot_gradient: "embedding_79.tmp_0@GRAD" - slot_gradient: "embedding_80.tmp_0@GRAD" - slot_gradient: "embedding_81.tmp_0@GRAD" - slot_gradient: "embedding_82.tmp_0@GRAD" - slot_gradient: "embedding_83.tmp_0@GRAD" - slot_gradient: "embedding_84.tmp_0@GRAD" - slot_gradient: "embedding_85.tmp_0@GRAD" - slot_gradient: "embedding_86.tmp_0@GRAD" - slot_gradient: "embedding_87.tmp_0@GRAD" - slot_gradient: "embedding_88.tmp_0@GRAD" - slot_gradient: "embedding_89.tmp_0@GRAD" - slot_gradient: "embedding_90.tmp_0@GRAD" - slot_gradient: "embedding_91.tmp_0@GRAD" - slot_gradient: "embedding_92.tmp_0@GRAD" - slot_gradient: "embedding_93.tmp_0@GRAD" - slot_gradient: "embedding_94.tmp_0@GRAD" - slot_gradient: "embedding_95.tmp_0@GRAD" - slot_gradient: "embedding_96.tmp_0@GRAD" - slot_gradient: "embedding_97.tmp_0@GRAD" - slot_gradient: "embedding_98.tmp_0@GRAD" - slot_gradient: "embedding_99.tmp_0@GRAD" - slot_gradient: "embedding_100.tmp_0@GRAD" - slot_gradient: "embedding_101.tmp_0@GRAD" - slot_gradient: "embedding_102.tmp_0@GRAD" - slot_gradient: "embedding_103.tmp_0@GRAD" - slot_gradient: "embedding_104.tmp_0@GRAD" - slot_gradient: "embedding_105.tmp_0@GRAD" - slot_gradient: "embedding_106.tmp_0@GRAD" - slot_gradient: "embedding_107.tmp_0@GRAD" - slot_gradient: "embedding_108.tmp_0@GRAD" - slot_gradient: "embedding_109.tmp_0@GRAD" - slot_gradient: "embedding_110.tmp_0@GRAD" - slot_gradient: "embedding_111.tmp_0@GRAD" - slot_gradient: "embedding_112.tmp_0@GRAD" - slot_gradient: "embedding_113.tmp_0@GRAD" - slot_gradient: "embedding_114.tmp_0@GRAD" - slot_gradient: "embedding_115.tmp_0@GRAD" - slot_gradient: "embedding_116.tmp_0@GRAD" - slot_gradient: "embedding_117.tmp_0@GRAD" - slot_gradient: "embedding_118.tmp_0@GRAD" - slot_gradient: "embedding_119.tmp_0@GRAD" - slot_gradient: "embedding_120.tmp_0@GRAD" - slot_gradient: "embedding_121.tmp_0@GRAD" - slot_gradient: "embedding_122.tmp_0@GRAD" - slot_gradient: "embedding_123.tmp_0@GRAD" - slot_gradient: "embedding_124.tmp_0@GRAD" - slot_gradient: "embedding_125.tmp_0@GRAD" - slot_gradient: "embedding_126.tmp_0@GRAD" - slot_gradient: "embedding_127.tmp_0@GRAD" - slot_gradient: "embedding_128.tmp_0@GRAD" - slot_gradient: "embedding_129.tmp_0@GRAD" - slot_gradient: "embedding_130.tmp_0@GRAD" - slot_gradient: "embedding_131.tmp_0@GRAD" - slot_gradient: "embedding_132.tmp_0@GRAD" - slot_gradient: "embedding_133.tmp_0@GRAD" - slot_gradient: "embedding_134.tmp_0@GRAD" - slot_gradient: "embedding_135.tmp_0@GRAD" - slot_gradient: "embedding_136.tmp_0@GRAD" - slot_gradient: "embedding_137.tmp_0@GRAD" - slot_gradient: "embedding_138.tmp_0@GRAD" - slot_gradient: "embedding_139.tmp_0@GRAD" - slot_gradient: "embedding_140.tmp_0@GRAD" - slot_gradient: "embedding_141.tmp_0@GRAD" - slot_gradient: "embedding_142.tmp_0@GRAD" - slot_gradient: "embedding_143.tmp_0@GRAD" - slot_gradient: "embedding_144.tmp_0@GRAD" - slot_gradient: "embedding_145.tmp_0@GRAD" - slot_gradient: "embedding_146.tmp_0@GRAD" - slot_gradient: "embedding_147.tmp_0@GRAD" - slot_gradient: "embedding_148.tmp_0@GRAD" - slot_gradient: "embedding_149.tmp_0@GRAD" - slot_gradient: "embedding_150.tmp_0@GRAD" - slot_gradient: "embedding_151.tmp_0@GRAD" - slot_gradient: "embedding_152.tmp_0@GRAD" - slot_gradient: "embedding_153.tmp_0@GRAD" - slot_gradient: "embedding_154.tmp_0@GRAD" - slot_gradient: "embedding_155.tmp_0@GRAD" - slot_gradient: "embedding_156.tmp_0@GRAD" - slot_gradient: "embedding_157.tmp_0@GRAD" - slot_gradient: "embedding_158.tmp_0@GRAD" - slot_gradient: "embedding_159.tmp_0@GRAD" - slot_gradient: "embedding_160.tmp_0@GRAD" - slot_gradient: "embedding_161.tmp_0@GRAD" - slot_gradient: "embedding_162.tmp_0@GRAD" - slot_gradient: "embedding_163.tmp_0@GRAD" - slot_gradient: "embedding_164.tmp_0@GRAD" - slot_gradient: "embedding_165.tmp_0@GRAD" - slot_gradient: "embedding_166.tmp_0@GRAD" - slot_gradient: "embedding_167.tmp_0@GRAD" - slot_gradient: "embedding_168.tmp_0@GRAD" - slot_gradient: "embedding_169.tmp_0@GRAD" - slot_gradient: "embedding_170.tmp_0@GRAD" - slot_gradient: "embedding_171.tmp_0@GRAD" - slot_gradient: "embedding_172.tmp_0@GRAD" - slot_gradient: "embedding_173.tmp_0@GRAD" - slot_gradient: "embedding_174.tmp_0@GRAD" - slot_gradient: "embedding_175.tmp_0@GRAD" - slot_gradient: "embedding_176.tmp_0@GRAD" - slot_gradient: "embedding_177.tmp_0@GRAD" - slot_gradient: "embedding_178.tmp_0@GRAD" - slot_gradient: "embedding_179.tmp_0@GRAD" - slot_gradient: "embedding_180.tmp_0@GRAD" - slot_gradient: "embedding_181.tmp_0@GRAD" - slot_gradient: "embedding_182.tmp_0@GRAD" - slot_gradient: "embedding_183.tmp_0@GRAD" - slot_gradient: "embedding_184.tmp_0@GRAD" - slot_gradient: "embedding_185.tmp_0@GRAD" - slot_gradient: "embedding_186.tmp_0@GRAD" - slot_gradient: "embedding_187.tmp_0@GRAD" - slot_gradient: "embedding_188.tmp_0@GRAD" - slot_gradient: "embedding_189.tmp_0@GRAD" - slot_gradient: "embedding_190.tmp_0@GRAD" - slot_gradient: "embedding_191.tmp_0@GRAD" - slot_gradient: "embedding_192.tmp_0@GRAD" - slot_gradient: "embedding_193.tmp_0@GRAD" - slot_gradient: "embedding_194.tmp_0@GRAD" - slot_gradient: "embedding_195.tmp_0@GRAD" - slot_gradient: "embedding_196.tmp_0@GRAD" - slot_gradient: "embedding_197.tmp_0@GRAD" - slot_gradient: "embedding_198.tmp_0@GRAD" - slot_gradient: "embedding_199.tmp_0@GRAD" - slot_gradient: "embedding_200.tmp_0@GRAD" - slot_gradient: "embedding_201.tmp_0@GRAD" - slot_gradient: "embedding_202.tmp_0@GRAD" - slot_gradient: "embedding_203.tmp_0@GRAD" - slot_gradient: "embedding_204.tmp_0@GRAD" - slot_gradient: "embedding_205.tmp_0@GRAD" - slot_gradient: "embedding_206.tmp_0@GRAD" - slot_gradient: "embedding_207.tmp_0@GRAD" - slot_gradient: "embedding_208.tmp_0@GRAD" - slot_gradient: "embedding_209.tmp_0@GRAD" - slot_gradient: "embedding_210.tmp_0@GRAD" - slot_gradient: "embedding_211.tmp_0@GRAD" - slot_gradient: "embedding_212.tmp_0@GRAD" - slot_gradient: "embedding_213.tmp_0@GRAD" - slot_gradient: "embedding_214.tmp_0@GRAD" - slot_gradient: "embedding_215.tmp_0@GRAD" - slot_gradient: "embedding_216.tmp_0@GRAD" - slot_gradient: "embedding_217.tmp_0@GRAD" - slot_gradient: "embedding_218.tmp_0@GRAD" - slot_gradient: "embedding_219.tmp_0@GRAD" - slot_gradient: "embedding_220.tmp_0@GRAD" - slot_gradient: "embedding_221.tmp_0@GRAD" - slot_gradient: "embedding_222.tmp_0@GRAD" - slot_gradient: "embedding_223.tmp_0@GRAD" - slot_gradient: "embedding_224.tmp_0@GRAD" - slot_gradient: "embedding_225.tmp_0@GRAD" - slot_gradient: "embedding_226.tmp_0@GRAD" - slot_gradient: "embedding_227.tmp_0@GRAD" - slot_gradient: "embedding_228.tmp_0@GRAD" - slot_gradient: "embedding_229.tmp_0@GRAD" - slot_gradient: "embedding_230.tmp_0@GRAD" - slot_gradient: "embedding_231.tmp_0@GRAD" - slot_gradient: "embedding_232.tmp_0@GRAD" - slot_gradient: "embedding_233.tmp_0@GRAD" - slot_gradient: "embedding_234.tmp_0@GRAD" - slot_gradient: "embedding_235.tmp_0@GRAD" - slot_gradient: "embedding_236.tmp_0@GRAD" - slot_gradient: "embedding_237.tmp_0@GRAD" - slot_gradient: "embedding_238.tmp_0@GRAD" - slot_gradient: "embedding_239.tmp_0@GRAD" - slot_gradient: "embedding_240.tmp_0@GRAD" - slot_gradient: "embedding_241.tmp_0@GRAD" - slot_gradient: "embedding_242.tmp_0@GRAD" - slot_gradient: "embedding_243.tmp_0@GRAD" - slot_gradient: "embedding_244.tmp_0@GRAD" - slot_gradient: "embedding_245.tmp_0@GRAD" - slot_gradient: "embedding_246.tmp_0@GRAD" - slot_gradient: "embedding_247.tmp_0@GRAD" - slot_gradient: "embedding_248.tmp_0@GRAD" - slot_gradient: "embedding_249.tmp_0@GRAD" - slot_gradient: "embedding_250.tmp_0@GRAD" - slot_gradient: "embedding_251.tmp_0@GRAD" - slot_gradient: "embedding_252.tmp_0@GRAD" - slot_gradient: "embedding_253.tmp_0@GRAD" - slot_gradient: "embedding_254.tmp_0@GRAD" - slot_gradient: "embedding_255.tmp_0@GRAD" - slot_gradient: "embedding_256.tmp_0@GRAD" - slot_gradient: "embedding_257.tmp_0@GRAD" - slot_gradient: "embedding_258.tmp_0@GRAD" - slot_gradient: "embedding_259.tmp_0@GRAD" - slot_gradient: "embedding_260.tmp_0@GRAD" - slot_gradient: "embedding_261.tmp_0@GRAD" - slot_gradient: "embedding_262.tmp_0@GRAD" - slot_gradient: "embedding_263.tmp_0@GRAD" - slot_gradient: "embedding_264.tmp_0@GRAD" - slot_gradient: "embedding_265.tmp_0@GRAD" - slot_gradient: "embedding_266.tmp_0@GRAD" - slot_gradient: "embedding_267.tmp_0@GRAD" - slot_gradient: "embedding_268.tmp_0@GRAD" - slot_gradient: "embedding_269.tmp_0@GRAD" - slot_gradient: "embedding_270.tmp_0@GRAD" - slot_gradient: "embedding_271.tmp_0@GRAD" - slot_gradient: "embedding_272.tmp_0@GRAD" - slot_gradient: "embedding_273.tmp_0@GRAD" - slot_gradient: "embedding_274.tmp_0@GRAD" - slot_gradient: "embedding_275.tmp_0@GRAD" - slot_gradient: "embedding_276.tmp_0@GRAD" - slot_gradient: "embedding_277.tmp_0@GRAD" - slot_gradient: "embedding_278.tmp_0@GRAD" - slot_gradient: "embedding_279.tmp_0@GRAD" - slot_gradient: "embedding_280.tmp_0@GRAD" - slot_gradient: "embedding_281.tmp_0@GRAD" - slot_gradient: "embedding_282.tmp_0@GRAD" - slot_gradient: "embedding_283.tmp_0@GRAD" - slot_gradient: "embedding_284.tmp_0@GRAD" - slot_gradient: "embedding_285.tmp_0@GRAD" - slot_gradient: "embedding_286.tmp_0@GRAD" - slot_gradient: "embedding_287.tmp_0@GRAD" - slot_gradient: "embedding_288.tmp_0@GRAD" - slot_gradient: "embedding_289.tmp_0@GRAD" - slot_gradient: "embedding_290.tmp_0@GRAD" - slot_gradient: "embedding_291.tmp_0@GRAD" - slot_gradient: "embedding_292.tmp_0@GRAD" - slot_gradient: "embedding_293.tmp_0@GRAD" - slot_gradient: "embedding_294.tmp_0@GRAD" - slot_gradient: "embedding_295.tmp_0@GRAD" - slot_gradient: "embedding_296.tmp_0@GRAD" - slot_gradient: "embedding_297.tmp_0@GRAD" - slot_gradient: "embedding_298.tmp_0@GRAD" - slot_gradient: "embedding_299.tmp_0@GRAD" - slot_gradient: "embedding_300.tmp_0@GRAD" - slot_gradient: "embedding_301.tmp_0@GRAD" - slot_gradient: "embedding_302.tmp_0@GRAD" - slot_gradient: "embedding_303.tmp_0@GRAD" - slot_gradient: "embedding_304.tmp_0@GRAD" - slot_gradient: "embedding_305.tmp_0@GRAD" - slot_gradient: "embedding_306.tmp_0@GRAD" - slot_gradient: "embedding_307.tmp_0@GRAD" - slot_gradient: "embedding_308.tmp_0@GRAD" - slot_gradient: "embedding_309.tmp_0@GRAD" - slot_gradient: "embedding_310.tmp_0@GRAD" - slot_gradient: "embedding_311.tmp_0@GRAD" - slot_gradient: "embedding_312.tmp_0@GRAD" - slot_gradient: "embedding_313.tmp_0@GRAD" - slot_gradient: "embedding_314.tmp_0@GRAD" - slot_gradient: "embedding_315.tmp_0@GRAD" - slot_gradient: "embedding_316.tmp_0@GRAD" - slot_gradient: "embedding_317.tmp_0@GRAD" - slot_gradient: "embedding_318.tmp_0@GRAD" - slot_gradient: "embedding_319.tmp_0@GRAD" - slot_gradient: "embedding_320.tmp_0@GRAD" - slot_gradient: "embedding_321.tmp_0@GRAD" - slot_gradient: "embedding_322.tmp_0@GRAD" - slot_gradient: "embedding_323.tmp_0@GRAD" - slot_gradient: "embedding_324.tmp_0@GRAD" - slot_gradient: "embedding_325.tmp_0@GRAD" - slot_gradient: "embedding_326.tmp_0@GRAD" - slot_gradient: "embedding_327.tmp_0@GRAD" - slot_gradient: "embedding_328.tmp_0@GRAD" - slot_gradient: "embedding_329.tmp_0@GRAD" - slot_gradient: "embedding_330.tmp_0@GRAD" - slot_gradient: "embedding_331.tmp_0@GRAD" - slot_gradient: "embedding_332.tmp_0@GRAD" - slot_gradient: "embedding_333.tmp_0@GRAD" - slot_gradient: "embedding_334.tmp_0@GRAD" - slot_gradient: "embedding_335.tmp_0@GRAD" - slot_gradient: "embedding_336.tmp_0@GRAD" - slot_gradient: "embedding_337.tmp_0@GRAD" - slot_gradient: "embedding_338.tmp_0@GRAD" - slot_gradient: "embedding_339.tmp_0@GRAD" - slot_gradient: "embedding_340.tmp_0@GRAD" - slot_gradient: "embedding_341.tmp_0@GRAD" - slot_gradient: "embedding_342.tmp_0@GRAD" - slot_gradient: "embedding_343.tmp_0@GRAD" - slot_gradient: "embedding_344.tmp_0@GRAD" - slot_gradient: "embedding_345.tmp_0@GRAD" - slot_gradient: "embedding_346.tmp_0@GRAD" - slot_gradient: "embedding_347.tmp_0@GRAD" - slot_gradient: "embedding_348.tmp_0@GRAD" - slot_gradient: "embedding_349.tmp_0@GRAD" - slot_gradient: "embedding_350.tmp_0@GRAD" - slot_gradient: "embedding_351.tmp_0@GRAD" - slot_gradient: "embedding_352.tmp_0@GRAD" - slot_gradient: "embedding_353.tmp_0@GRAD" - slot_gradient: "embedding_354.tmp_0@GRAD" - slot_gradient: "embedding_355.tmp_0@GRAD" - slot_gradient: "embedding_356.tmp_0@GRAD" - slot_gradient: "embedding_357.tmp_0@GRAD" - slot_gradient: "embedding_358.tmp_0@GRAD" - slot_gradient: "embedding_359.tmp_0@GRAD" - slot_gradient: "embedding_360.tmp_0@GRAD" - slot_gradient: "embedding_361.tmp_0@GRAD" - slot_gradient: "embedding_362.tmp_0@GRAD" - slot_gradient: "embedding_363.tmp_0@GRAD" - slot_gradient: "embedding_364.tmp_0@GRAD" - slot_gradient: "embedding_365.tmp_0@GRAD" - slot_gradient: "embedding_366.tmp_0@GRAD" - slot_gradient: "embedding_367.tmp_0@GRAD" - slot_gradient: "embedding_368.tmp_0@GRAD" - slot_gradient: "embedding_369.tmp_0@GRAD" - slot_gradient: "embedding_370.tmp_0@GRAD" - slot_gradient: "embedding_371.tmp_0@GRAD" - slot_gradient: "embedding_372.tmp_0@GRAD" - slot_gradient: "embedding_373.tmp_0@GRAD" - slot_gradient: "embedding_374.tmp_0@GRAD" - slot_gradient: "embedding_375.tmp_0@GRAD" - slot_gradient: "embedding_376.tmp_0@GRAD" - slot_gradient: "embedding_377.tmp_0@GRAD" - slot_gradient: "embedding_378.tmp_0@GRAD" - slot_gradient: "embedding_379.tmp_0@GRAD" - slot_gradient: "embedding_380.tmp_0@GRAD" - slot_gradient: "embedding_381.tmp_0@GRAD" - slot_gradient: "embedding_382.tmp_0@GRAD" - slot_gradient: "embedding_383.tmp_0@GRAD" - slot_gradient: "embedding_384.tmp_0@GRAD" - slot_gradient: "embedding_385.tmp_0@GRAD" - slot_gradient: "embedding_386.tmp_0@GRAD" - slot_gradient: "embedding_387.tmp_0@GRAD" - slot_gradient: "embedding_388.tmp_0@GRAD" - slot_gradient: "embedding_389.tmp_0@GRAD" - slot_gradient: "embedding_390.tmp_0@GRAD" - slot_gradient: "embedding_391.tmp_0@GRAD" - slot_gradient: "embedding_392.tmp_0@GRAD" - slot_gradient: "embedding_393.tmp_0@GRAD" - slot_gradient: "embedding_394.tmp_0@GRAD" - slot_gradient: "embedding_395.tmp_0@GRAD" - slot_gradient: "embedding_396.tmp_0@GRAD" - slot_gradient: "embedding_397.tmp_0@GRAD" - slot_gradient: "embedding_398.tmp_0@GRAD" - slot_gradient: "embedding_399.tmp_0@GRAD" - slot_gradient: "embedding_400.tmp_0@GRAD" - slot_gradient: "embedding_401.tmp_0@GRAD" - slot_gradient: "embedding_402.tmp_0@GRAD" - slot_gradient: "embedding_403.tmp_0@GRAD" - slot_gradient: "embedding_404.tmp_0@GRAD" - slot_gradient: "embedding_405.tmp_0@GRAD" - slot_gradient: "embedding_406.tmp_0@GRAD" - slot_gradient: "embedding_407.tmp_0@GRAD" - } - skip_op: "lookup_table" - skip_op: "lookup_table_grad" -} -fs_client_param { - uri: "afs://xingtian.afs.baidu.com:9902" - user: "mlarch" - passwd: "Fv1M87" - hadoop_bin: "$HADOOP_HOME/bin/hadoop" -} diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/model_new.py b/feed/feed_deploy/news_jingpai/package/my_nets/model_new.py deleted file mode 100644 index 172ed804a52e8f53b8dbcd35874923408893e5c5..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/model_new.py +++ /dev/null @@ -1,188 +0,0 @@ - -import paddle.fluid as fluid -from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet - -class Model(object): - def __init__(self, slot_file_name, all_slot_file, use_cvm, ins_tag, is_update_model): - self._slot_file_name = slot_file_name - self._use_cvm = use_cvm - self._dict_dim = 10 # it's fake - self._emb_dim = 9 + 2 - self._init_range = 0.2 - self._all_slot_file = all_slot_file - self._not_use_slots = [] - self._not_use_slotemb = [] - self._all_slots = [] - self._ins_tag_value = ins_tag - self._is_update_model = is_update_model - self._train_program = fluid.Program() - self._startup_program = fluid.Program() - self.save_vars = [] - with fluid.program_guard(self._train_program, self._startup_program): - with fluid.unique_name.guard(): - self.show = fluid.layers.data(name="show", shape=[-1, 1], dtype="int64", lod_level=0, append_batch_size=False) - self.label = fluid.layers.data(name="click", shape=[-1, 1], dtype="int64", lod_level=0, append_batch_size=False) - self.ins_weight = fluid.layers.data( - name="12345", - shape=[-1, 1], - dtype="float32", - lod_level=0, - append_batch_size=False, - stop_gradient=True) - self.ins_tag = fluid.layers.data( - name="23456", - shape=[-1, 1], - dtype="int64", - lod_level=0, - append_batch_size=False, - stop_gradient=True) - self.slots = [] - self.slots_name = [] - self.embs = [] - - - if self._ins_tag_value != 0: - self.x3_ts = fluid.layers.create_global_var(shape=[1,1], value=self._ins_tag_value, dtype='int64', persistable=True, force_cpu=True, name='X3') - self.x3_ts.stop_gradient=True - self.label_after_filter, self.filter_loss = fluid.layers.filter_by_instag(self.label, self.ins_tag, self.x3_ts, True) - self.label_after_filter.stop_gradient=True - self.show_after_filter, _ = fluid.layers.filter_by_instag(self.show, self.ins_tag, self.x3_ts, True) - self.show_after_filter.stop_gradient=True - self.ins_weight_after_filter, _ = fluid.layers.filter_by_instag(self.ins_weight, self.ins_tag, self.x3_ts, True) - self.ins_weight_after_filter.stop_gradient=True - - for line in open(self._slot_file_name, 'r'): - slot = line.strip() - self.slots_name.append(slot) - - self.all_slots_name = [] - for line in open(self._all_slot_file, 'r'): - self.all_slots_name.append(line.strip()) - for i in self.all_slots_name: - if i == self.ins_weight.name or i == self.ins_tag.name: - pass - elif i not in self.slots_name: - pass - else: - l = fluid.layers.data(name=i, shape=[1], dtype="int64", lod_level=1) - emb = fluid.layers.embedding(input=l, size=[self._dict_dim, self._emb_dim], is_sparse = True, is_distributed=True, param_attr=fluid.ParamAttr(name="embedding")) - self.slots.append(l) - self.embs.append(emb) - - if self._ins_tag_value != 0: - self.emb = self.slot_net(self.slots, self.label_after_filter) - else: - self.emb = self.slot_net(self.slots, self.label) - - self.similarity_norm = fluid.layers.sigmoid(fluid.layers.clip(self.emb, min=-15.0, max=15.0), name="similarity_norm") - - if self._ins_tag_value != 0: - self.cost = fluid.layers.log_loss(input=self.similarity_norm, label=fluid.layers.cast(x=self.label_after_filter, dtype='float32')) - else: - self.cost = fluid.layers.log_loss(input=self.similarity_norm, label=fluid.layers.cast(x=self.label, dtype='float32')) - - if self._ins_tag_value != 0: - self.cost = fluid.layers.elementwise_mul(self.cost, self.ins_weight_after_filter) - else: - self.cost = fluid.layers.elementwise_mul(self.cost, self.ins_weight) - - if self._ins_tag_value != 0: - self.cost = fluid.layers.elementwise_mul(self.cost, self.filter_loss) - - self.avg_cost = fluid.layers.mean(x=self.cost) - - binary_predict = fluid.layers.concat( - input=[fluid.layers.elementwise_sub(fluid.layers.ceil(self.similarity_norm), self.similarity_norm), self.similarity_norm], axis=1) - - if self._ins_tag_value != 0: - self.auc, batch_auc, [self.batch_stat_pos, self.batch_stat_neg, self.stat_pos, self.stat_neg] = \ - fluid.layers.auc(input=binary_predict, label=self.label_after_filter, curve='ROC', num_thresholds=4096) - self.sqrerr, self.abserr, self.prob, self.q, self.pos, self.total = \ - fluid.contrib.layers.ctr_metric_bundle(self.similarity_norm, fluid.layers.cast(x=self.label_after_filter, dtype='float32')) - - #self.precise_ins_num = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1]) - #batch_ins_num = fluid.layers.reduce_sum(self.filter_loss) - #self.precise_ins_num = fluid.layers.elementwise_add(batch_ins_num, self.precise_ins_num) - - else: - self.auc, batch_auc, [self.batch_stat_pos, self.batch_stat_neg, self.stat_pos, self.stat_neg] = \ - fluid.layers.auc(input=binary_predict, label=self.label, curve='ROC', num_thresholds=4096) - self.sqrerr, self.abserr, self.prob, self.q, self.pos, self.total = \ - fluid.contrib.layers.ctr_metric_bundle(self.similarity_norm, fluid.layers.cast(x=self.label, dtype='float32')) - - - - self.tmp_train_program = fluid.Program() - self.tmp_startup_program = fluid.Program() - with fluid.program_guard(self.tmp_train_program, self.tmp_startup_program): - with fluid.unique_name.guard(): - self._all_slots = [self.show, self.label] - self._merge_slots = [] - for i in self.all_slots_name: - if i == self.ins_weight.name: - self._all_slots.append(self.ins_weight) - elif i == self.ins_tag.name: - self._all_slots.append(self.ins_tag) - else: - l = fluid.layers.data(name=i, shape=[1], dtype="int64", lod_level=1) - self._all_slots.append(l) - self._merge_slots.append(l) - - - - - def slot_net(self, slots, label, lr_x=1.0): - input_data = [] - cvms = [] - - cast_label = fluid.layers.cast(label, dtype='float32') - cast_label.stop_gradient = True - ones = fluid.layers.fill_constant_batch_size_like(input=label, shape=[-1, 1], dtype="float32", value=1) - show_clk = fluid.layers.cast(fluid.layers.concat([ones, cast_label], axis=1), dtype='float32') - show_clk.stop_gradient = True - - for index in range(len(slots)): - input_data.append(slots[index]) - emb = self.embs[index] - bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') - cvm = fluid.layers.continuous_value_model(bow, show_clk, self._use_cvm) - cvms.append(cvm) - - concat = None - if self._ins_tag_value != 0: - concat = fluid.layers.concat(cvms, axis=1) - concat, _ = fluid.layers.filter_by_instag(concat, self.ins_tag, self.x3_ts, False) - else: - concat = fluid.layers.concat(cvms, axis=1) - bn = concat - if self._use_cvm: - bn = fluid.layers.data_norm(input=concat, name="bn6048", epsilon=1e-4, - param_attr={ - "batch_size":1e4, - "batch_sum_default":0.0, - "batch_square":1e4}) - self.save_vars.append(bn) - fc_layers_input = [bn] - if self._is_update_model: - fc_layers_size = [511, 255, 127, 127, 127, 1] - else: - fc_layers_size = [511, 255, 255, 127, 127, 127, 127, 1] - fc_layers_act = ["relu"] * (len(fc_layers_size) - 1) + [None] - scales_tmp = [bn.shape[1]] + fc_layers_size - scales = [] - for i in range(len(scales_tmp)): - scales.append(self._init_range / (scales_tmp[i] ** 0.5)) - for i in range(len(fc_layers_size)): - fc = fluid.layers.fc( - input = fc_layers_input[-1], - size = fc_layers_size[i], - act = fc_layers_act[i], - param_attr = \ - fluid.ParamAttr(learning_rate=lr_x, \ - initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=1.0 * scales[i])), - bias_attr = \ - fluid.ParamAttr(learning_rate=lr_x, \ - initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=1.0 * scales[i]))) - fc_layers_input.append(fc) - self.save_vars.append(fc) - return fc_layers_input[-1] diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/model_new_jc.py b/feed/feed_deploy/news_jingpai/package/my_nets/model_new_jc.py deleted file mode 100644 index 31802b4a0f9f321bcbc7ad5ce68dc70e34cae9f6..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/model_new_jc.py +++ /dev/null @@ -1,166 +0,0 @@ - -import paddle.fluid as fluid -from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet - -class ModelJoinCommon(object): - def __init__(self, slot_file_name, slot_common_file_name, all_slot_file, join_ins_tag): - self.slot_file_name = slot_file_name - self.slot_common_file_name = slot_common_file_name - self.dict_dim = 10 # it's fake - self.emb_dim = 9 + 2 - self.init_range = 0.2 - self.all_slot_file = all_slot_file - self.ins_tag_v = join_ins_tag - self._train_program = fluid.Program() - self._startup_program = fluid.Program() - with fluid.program_guard(self._train_program, self._startup_program): - with fluid.unique_name.guard(): - self.show = fluid.layers.data(name="show", shape=[-1, 1], dtype="int64", lod_level=0, append_batch_size=False) - self.label = fluid.layers.data(name="click", shape=[-1, 1], dtype="int64", lod_level=0, append_batch_size=False) - self.ins_weight = fluid.layers.data( - name="12345", - shape=[-1, 1], - dtype="float32", - lod_level=0, - append_batch_size=False, - stop_gradient=True) - self.ins_tag = fluid.layers.data( - name="23456", - shape=[-1, 1], - dtype="int64", - lod_level=0, - append_batch_size=False, - stop_gradient=True) - self.x3_ts = fluid.layers.create_global_var(shape=[1,1], value=self.ins_tag_v, dtype='int64', persistable=True, force_cpu=True, name='X3') - self.x3_ts.stop_gradient=True - self.label_after_filter, self.filter_loss = fluid.layers.filter_by_instag(self.label, self.ins_tag, self.x3_ts, True) - self.label_after_filter.stop_gradient=True - self.show_after_filter, _ = fluid.layers.filter_by_instag(self.show, self.ins_tag, self.x3_ts, True) - self.show_after_filter.stop_gradient=True - self.ins_weight_after_filter, _ = fluid.layers.filter_by_instag(self.ins_weight, self.ins_tag, self.x3_ts, True) - self.ins_weight_after_filter.stop_gradient=True - - self.slots_name = [] - for line in open(self.slot_file_name, 'r'): - slot = line.strip() - self.slots_name.append(slot) - - self.all_slots_name = [] - for line in open(self.all_slot_file, 'r'): - self.all_slots_name.append(line.strip()) - - self.slots = [] - self.embs = [] - for i in self.all_slots_name: - if i == self.ins_weight.name or i == self.ins_tag.name: - pass - elif i not in self.slots_name: - pass - else: - l = fluid.layers.data(name=i, shape=[1], dtype="int64", lod_level=1) - emb = fluid.layers.embedding(input=l, size=[self.dict_dim, self.emb_dim], is_sparse = True, is_distributed=True, param_attr=fluid.ParamAttr(name="embedding")) - self.slots.append(l) - self.embs.append(emb) - - self.common_slot_name = [] - for i in open(self.slot_common_file_name, 'r'): - self.common_slot_name.append(i.strip()) - - cvms = [] - cast_label = fluid.layers.cast(self.label, dtype='float32') - cast_label.stop_gradient = True - ones = fluid.layers.fill_constant_batch_size_like(input=self.label, shape=[-1, 1], dtype="float32", value=1) - show_clk = fluid.layers.cast(fluid.layers.concat([ones, cast_label], axis=1), dtype='float32') - show_clk.stop_gradient = True - for index in range(len(self.embs)): - emb = self.embs[index] - emb.stop_gradient=True - bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') - bow.stop_gradient=True - cvm = fluid.layers.continuous_value_model(bow, show_clk, True) - cvm.stop_gradient=True - cvms.append(cvm) - concat_join = fluid.layers.concat(cvms, axis=1) - concat_join.stop_gradient=True - - cvms_common = [] - for index in range(len(self.common_slot_name)): - cvms_common.append(cvms[index]) - concat_common = fluid.layers.concat(cvms_common, axis=1) - concat_common.stop_gradient=True - - bn_common = fluid.layers.data_norm(input=concat_common, name="common", epsilon=1e-4, param_attr={"batch_size":1e4,"batch_sum_default":0.0,"batch_square":1e4}) - - concat_join, _ = fluid.layers.filter_by_instag(concat_join, self.ins_tag, self.x3_ts, False) - concat_join.stop_gradient=True - bn_join = fluid.layers.data_norm(input=concat_join, name="join", epsilon=1e-4, param_attr={"batch_size":1e4,"batch_sum_default":0.0,"batch_square":1e4}) - - join_fc = self.fcs(bn_join, "join") - join_similarity_norm = fluid.layers.sigmoid(fluid.layers.clip(join_fc, min=-15.0, max=15.0), name="join_similarity_norm") - join_cost = fluid.layers.log_loss(input=join_similarity_norm, label=fluid.layers.cast(x=self.label_after_filter, dtype='float32')) - join_cost = fluid.layers.elementwise_mul(join_cost, self.ins_weight_after_filter) - join_cost = fluid.layers.elementwise_mul(join_cost, self.filter_loss) - join_avg_cost = fluid.layers.mean(x=join_cost) - - common_fc = self.fcs(bn_common, "common") - common_similarity_norm = fluid.layers.sigmoid(fluid.layers.clip(common_fc, min=-15.0, max=15.0), name="common_similarity_norm") - common_cost = fluid.layers.log_loss(input=common_similarity_norm, label=fluid.layers.cast(x=self.label, dtype='float32')) - common_cost = fluid.layers.elementwise_mul(common_cost, self.ins_weight) - common_avg_cost = fluid.layers.mean(x=common_cost) - - self.joint_cost = join_avg_cost + common_avg_cost - - join_binary_predict = fluid.layers.concat( - input=[fluid.layers.elementwise_sub(fluid.layers.ceil(join_similarity_norm), join_similarity_norm), join_similarity_norm], axis=1) - self.join_auc, batch_auc, [self.join_batch_stat_pos, self.join_batch_stat_neg, self.join_stat_pos, self.join_stat_neg] = \ - fluid.layers.auc(input=join_binary_predict, label=self.label_after_filter, curve='ROC', num_thresholds=4096) - self.join_sqrerr, self.join_abserr, self.join_prob, self.join_q, self.join_pos, self.join_total = \ - fluid.contrib.layers.ctr_metric_bundle(join_similarity_norm, fluid.layers.cast(x=self.label_after_filter, dtype='float32')) - - common_binary_predict = fluid.layers.concat( - input=[fluid.layers.elementwise_sub(fluid.layers.ceil(common_similarity_norm), common_similarity_norm), common_similarity_norm], axis=1) - self.common_auc, batch_auc, [self.common_batch_stat_pos, self.common_batch_stat_neg, self.common_stat_pos, self.common_stat_neg] = \ - fluid.layers.auc(input=common_binary_predict, label=self.label, curve='ROC', num_thresholds=4096) - self.common_sqrerr, self.common_abserr, self.common_prob, self.common_q, self.common_pos, self.common_total = \ - fluid.contrib.layers.ctr_metric_bundle(common_similarity_norm, fluid.layers.cast(x=self.label, dtype='float32')) - - self.tmp_train_program = fluid.Program() - self.tmp_startup_program = fluid.Program() - with fluid.program_guard(self.tmp_train_program, self.tmp_startup_program): - with fluid.unique_name.guard(): - self._all_slots = [self.show, self.label] - self._merge_slots = [] - for i in self.all_slots_name: - if i == self.ins_weight.name: - self._all_slots.append(self.ins_weight) - elif i == self.ins_tag.name: - self._all_slots.append(self.ins_tag) - else: - l = fluid.layers.data(name=i, shape=[1], dtype="int64", lod_level=1) - self._all_slots.append(l) - self._merge_slots.append(l) - - - def fcs(self, bn, prefix): - fc_layers_input = [bn] - fc_layers_size = [511, 255, 255, 127, 127, 127, 127, 1] - fc_layers_act = ["relu"] * (len(fc_layers_size) - 1) + [None] - scales_tmp = [bn.shape[1]] + fc_layers_size - scales = [] - for i in range(len(scales_tmp)): - scales.append(self.init_range / (scales_tmp[i] ** 0.5)) - for i in range(len(fc_layers_size)): - name = prefix+"_"+str(i) - fc = fluid.layers.fc( - input = fc_layers_input[-1], - size = fc_layers_size[i], - act = fc_layers_act[i], - param_attr = \ - fluid.ParamAttr(learning_rate=1.0, \ - initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=1.0 * scales[i])), - bias_attr = \ - fluid.ParamAttr(learning_rate=1.0, \ - initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=1.0 * scales[i])), - name=name) - fc_layers_input.append(fc) - return fc_layers_input[-1] diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/my_data_generator_str.py b/feed/feed_deploy/news_jingpai/package/my_nets/my_data_generator_str.py deleted file mode 100644 index d47664645704fca47a964c27c55c400a6efae7a4..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/my_data_generator_str.py +++ /dev/null @@ -1,89 +0,0 @@ -import sys -import os -import paddle -import re -import collections -import time -#import paddle.fluid.incubate.data_generator as dg -import data_generate_base as dg - -class MyDataset(dg.MultiSlotDataGenerator): - def load_resource(self, dictf): - self._all_slots_dict = collections.OrderedDict() - with open(dictf, 'r') as f: - slots = f.readlines() - for index, slot in enumerate(slots): - #self._all_slots_dict[slot.strip()] = [False, index + 3] #+3 # - self._all_slots_dict[slot.strip()] = [False, index + 2] - - def generate_sample(self, line): - def data_iter_str(): - s = line.split('\t')[0].split()#[1:] - lineid = s[0] - elements = s[1:] #line.split('\t')[0].split()[1:] - padding = "0" - # output = [("lineid", [lineid]), ("show", [elements[0]]), ("click", [elements[1]])] - output = [("show", [elements[0]]), ("click", [elements[1]])] - output.extend([(slot, []) for slot in self._all_slots_dict]) - for elem in elements[2:]: - if elem.startswith("*"): - feasign = elem[1:] - slot = "12345" - elif elem.startswith("$"): - feasign = elem[1:] - if feasign == "D": - feasign = "0" - slot = "23456" - else: - feasign, slot = elem.split(':') - #feasign, slot = elem.split(':') - if not self._all_slots_dict.has_key(slot): - continue - self._all_slots_dict[slot][0] = True - index = self._all_slots_dict[slot][1] - output[index][1].append(feasign) - for slot in self._all_slots_dict: - visit, index = self._all_slots_dict[slot] - if visit: - self._all_slots_dict[slot][0] = False - else: - output[index][1].append(padding) - #print output - yield output - - return data_iter_str - - def data_iter(): - elements = line.split('\t')[0].split()[1:] - padding = 0 - output = [("show", [int(elements[0])]), ("click", [int(elements[1])])] - #output += [(slot, []) for slot in self._all_slots_dict] - output.extend([(slot, []) for slot in self._all_slots_dict]) - for elem in elements[2:]: - feasign, slot = elem.split(':') - if slot == "12345": - feasign = float(feasign) - else: - feasign = int(feasign) - if not self._all_slots_dict.has_key(slot): - continue - self._all_slots_dict[slot][0] = True - index = self._all_slots_dict[slot][1] - output[index][1].append(feasign) - for slot in self._all_slots_dict: - visit, index = self._all_slots_dict[slot] - if visit: - self._all_slots_dict[slot][0] = False - else: - output[index][1].append(padding) - yield output - return data_iter - - -if __name__ == "__main__": - #start = time.clock() - d = MyDataset() - d.load_resource("all_slot.dict") - d.run_from_stdin() - #elapsed = (time.clock() - start) - #print("Time used:",elapsed) diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_join_common_startup_program.bin b/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_join_common_startup_program.bin deleted file mode 100644 index edb43bda80ce2044da2dcd586e90c207e9fe268c..0000000000000000000000000000000000000000 Binary files a/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_join_common_startup_program.bin and /dev/null differ diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_join_common_train_program.bin b/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_join_common_train_program.bin deleted file mode 100644 index 89cb5d3dde949c31de7b3ce60b4108ac282a71f1..0000000000000000000000000000000000000000 Binary files a/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_join_common_train_program.bin and /dev/null differ diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_update_main_program.bin b/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_update_main_program.bin deleted file mode 100644 index d06fb007bb1c568b0afcfcb460c7db2362e40503..0000000000000000000000000000000000000000 Binary files a/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_update_main_program.bin and /dev/null differ diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_update_startup_program.bin b/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_update_startup_program.bin deleted file mode 100644 index 76b538aca52b9c46cfae8b79b8ffa772f4f5fc2d..0000000000000000000000000000000000000000 Binary files a/feed/feed_deploy/news_jingpai/package/my_nets/old_program/old_update_startup_program.bin and /dev/null differ diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/old_slot/slot b/feed/feed_deploy/news_jingpai/package/my_nets/old_slot/slot deleted file mode 100644 index 3e91b42e36e3bef406efc31c50a997ea7dc58f86..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/old_slot/slot +++ /dev/null @@ -1,408 +0,0 @@ -6048 -6002 -6145 -6202 -6201 -6121 -6738 -6119 -6146 -6120 -6147 -6122 -6123 -6118 -6142 -6143 -6008 -6148 -6151 -6127 -6144 -6094 -6083 -6952 -6739 -6150 -6109 -6003 -6099 -6149 -6129 -6203 -6153 -6152 -6128 -6106 -6251 -7082 -7515 -6951 -6949 -7080 -6066 -7507 -6186 -6007 -7514 -6125 -7506 -10001 -6006 -7023 -6085 -10000 -6098 -6250 -6110 -6124 -6090 -6082 -6067 -6101 -6004 -6191 -7075 -6948 -6157 -6126 -6188 -7077 -6070 -6111 -6087 -6103 -6107 -6194 -6156 -6005 -6247 -6814 -6158 -7122 -6058 -6189 -7058 -6059 -6115 -7079 -7081 -6833 -7024 -6108 -13342 -13345 -13412 -13343 -13350 -13346 -13409 -6009 -6011 -6012 -6013 -6014 -6015 -6019 -6023 -6024 -6027 -6029 -6031 -6050 -6060 -6068 -6069 -6089 -6095 -6105 -6112 -6130 -6131 -6132 -6134 -6161 -6162 -6163 -6166 -6182 -6183 -6185 -6190 -6212 -6213 -6231 -6233 -6234 -6236 -6238 -6239 -6240 -6241 -6242 -6243 -6244 -6245 -6354 -7002 -7005 -7008 -7010 -7012 -7013 -7015 -7016 -7017 -7018 -7019 -7020 -7045 -7046 -7048 -7049 -7052 -7054 -7056 -7064 -7066 -7076 -7078 -7083 -7084 -7085 -7086 -7087 -7088 -7089 -7090 -7099 -7100 -7101 -7102 -7103 -7104 -7105 -7109 -7124 -7126 -7136 -7142 -7143 -7144 -7145 -7146 -7147 -7148 -7150 -7151 -7152 -7153 -7154 -7155 -7156 -7157 -7047 -7050 -6253 -6254 -6255 -6256 -6257 -6259 -6260 -6261 -7170 -7185 -7186 -6751 -6755 -6757 -6759 -6760 -6763 -6764 -6765 -6766 -6767 -6768 -6769 -6770 -7502 -7503 -7504 -7505 -7510 -7511 -7512 -7513 -6806 -6807 -6808 -6809 -6810 -6811 -6812 -6813 -6815 -6816 -6817 -6819 -6823 -6828 -6831 -6840 -6845 -6875 -6879 -6881 -6888 -6889 -6947 -6950 -6956 -6957 -6959 -10006 -10008 -10009 -10010 -10011 -10016 -10017 -10018 -10019 -10020 -10021 -10022 -10023 -10024 -10029 -10030 -10031 -10032 -10033 -10034 -10035 -10036 -10037 -10038 -10039 -10040 -10041 -10042 -10044 -10045 -10046 -10051 -10052 -10053 -10054 -10055 -10056 -10057 -10060 -10066 -10069 -6820 -6821 -6822 -13333 -13334 -13335 -13336 -13337 -13338 -13339 -13340 -13341 -13351 -13352 -13353 -13359 -13361 -13362 -13363 -13366 -13367 -13368 -13369 -13370 -13371 -13375 -13376 -5700 -5702 -13400 -13401 -13402 -13403 -13404 -13406 -13407 -13408 -13410 -13417 -13418 -13419 -13420 -13422 -13425 -13427 -13428 -13429 -13430 -13431 -13433 -13434 -13436 -13437 -13326 -13330 -13331 -5717 -13442 -13451 -13452 -13455 -13456 -13457 -13458 -13459 -13460 -13461 -13462 -13463 -13464 -13465 -13466 -13467 -13468 -1104 -1106 -1107 -1108 -1109 -1110 -1111 -1112 -1113 -1114 -1115 -1116 -1117 -1119 -1120 -1121 -1122 -1123 -1124 -1125 -1126 -1127 -1128 -1129 -13812 -13813 -6740 -1490 -1491 diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/old_slot/slot_common b/feed/feed_deploy/news_jingpai/package/my_nets/old_slot/slot_common deleted file mode 100644 index 869fb695282eed4a69928e7af52dd49a62e0d4c6..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/old_slot/slot_common +++ /dev/null @@ -1,99 +0,0 @@ -6048 -6002 -6145 -6202 -6201 -6121 -6738 -6119 -6146 -6120 -6147 -6122 -6123 -6118 -6142 -6143 -6008 -6148 -6151 -6127 -6144 -6094 -6083 -6952 -6739 -6150 -6109 -6003 -6099 -6149 -6129 -6203 -6153 -6152 -6128 -6106 -6251 -7082 -7515 -6951 -6949 -7080 -6066 -7507 -6186 -6007 -7514 -6125 -7506 -10001 -6006 -7023 -6085 -10000 -6098 -6250 -6110 -6124 -6090 -6082 -6067 -6101 -6004 -6191 -7075 -6948 -6157 -6126 -6188 -7077 -6070 -6111 -6087 -6103 -6107 -6194 -6156 -6005 -6247 -6814 -6158 -7122 -6058 -6189 -7058 -6059 -6115 -7079 -7081 -6833 -7024 -6108 -13342 -13345 -13412 -13343 -13350 -13346 -13409 diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/old_slot/to.py b/feed/feed_deploy/news_jingpai/package/my_nets/old_slot/to.py deleted file mode 100644 index 638c53647dc2adc1d502ed53630f07dbcfe8ffce..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/old_slot/to.py +++ /dev/null @@ -1,5 +0,0 @@ -with open("session_slot", "r") as fin: - res = [] - for i in fin: - res.append("\"" + i.strip() + "\"") - print ", ".join(res) diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/reqi_fleet_desc b/feed/feed_deploy/news_jingpai/package/my_nets/reqi_fleet_desc deleted file mode 100644 index c0d3ab823170856e9a50f6d9f6b1b4b323833bf2..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/reqi_fleet_desc +++ /dev/null @@ -1,1461 +0,0 @@ -server_param { - downpour_server_param { - downpour_table_param { - table_id: 0 - table_class: "DownpourSparseTable" - shard_num: 1950 - sparse_table_cache_rate: 0.00055 - accessor { - accessor_class: "DownpourCtrAccessor" - sparse_sgd_param { - learning_rate: 0.05 - initial_g2sum: 3.0 - initial_range: 0.0001 - weight_bounds: -10.0 - weight_bounds: 10.0 - } - fea_dim: 11 - embedx_dim: 8 - embedx_threshold: 10 - downpour_accessor_param { - nonclk_coeff: 0.1 - click_coeff: 1 - base_threshold: 1.5 - delta_threshold: 0.25 - delta_keep_days: 16 - delete_after_unseen_days: 30 - show_click_decay_rate: 0.98 - delete_threshold: 0.8 - } - table_accessor_save_param { - param: 1 - converter: "(scripts/xbox_compressor_mf.py | bin/xbox_pb_converter)" - deconverter: "(bin/xbox_pb_deconverter | scripts/xbox_decompressor_mf.awk)" - } - table_accessor_save_param { - param: 2 - converter: "(scripts/xbox_compressor_mf.py | bin/xbox_pb_converter)" - deconverter: "(bin/xbox_pb_deconverter | scripts/xbox_decompressor_mf.awk)" - } - } - type: PS_SPARSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 1 - table_class: "DownpourDenseTable" - accessor { - accessor_class: "DownpourDenseValueAccessor" - dense_sgd_param { - name: "adam" - adam { - learning_rate: 5e-06 - avg_decay_rate: 0.999993 - ada_decay_rate: 0.9999 - ada_epsilon: 1e-08 - mom_decay_rate: 0.99 - } - naive { - learning_rate: 0.0002 - } - } - } - type: PS_DENSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 2 - table_class: "DownpourDenseDoubleTable" - accessor { - accessor_class: "DownpourDenseValueDoubleAccessor" - dense_sgd_param { - name: "summarydouble" - summary { - summary_decay_rate: 0.999999 - } - } - } - type: PS_DENSE_TABLE - compress_in_save: true - } - downpour_table_param { - table_id: 3 - table_class: "DownpourDenseTable" - accessor { - accessor_class: "DownpourDenseValueAccessor" - dense_sgd_param { - name: "adam" - adam { - learning_rate: 5e-06 - avg_decay_rate: 0.999993 - ada_decay_rate: 0.9999 - ada_epsilon: 1e-08 - mom_decay_rate: 0.99 - } - naive { - learning_rate: 0.0002 - } - } - } - type: PS_DENSE_TABLE - compress_in_save: true - } - service_param { - server_class: "DownpourBrpcPsServer" - client_class: "DownpourBrpcPsClient" - service_class: "DownpourPsService" - start_server_port: 0 - server_thread_num: 12 - } - } -} -trainer_param { - dense_table { - table_id: 1 - - dense_variable_name: "join_0.w_0" - dense_variable_name: "join_0.b_0" - dense_variable_name: "join_1.w_0" - dense_variable_name: "join_1.b_0" - dense_variable_name: "join_2.w_0" - dense_variable_name: "join_2.b_0" - dense_variable_name: "join_3.w_0" - dense_variable_name: "join_3.b_0" - dense_variable_name: "join_4.w_0" - dense_variable_name: "join_4.b_0" - dense_variable_name: "join_5.w_0" - dense_variable_name: "join_5.b_0" - dense_variable_name: "join_6.w_0" - dense_variable_name: "join_6.b_0" - dense_variable_name: "join_7.w_0" - dense_variable_name: "join_7.b_0" - - dense_variable_name: "common_0.w_0" - dense_variable_name: "common_0.b_0" - dense_variable_name: "common_1.w_0" - dense_variable_name: "common_1.b_0" - dense_variable_name: "common_2.w_0" - dense_variable_name: "common_2.b_0" - dense_variable_name: "common_3.w_0" - dense_variable_name: "common_3.b_0" - dense_variable_name: "common_4.w_0" - dense_variable_name: "common_4.b_0" - dense_variable_name: "common_5.w_0" - dense_variable_name: "common_5.b_0" - dense_variable_name: "common_6.w_0" - dense_variable_name: "common_6.b_0" - dense_variable_name: "common_7.w_0" - dense_variable_name: "common_7.b_0" - - dense_gradient_variable_name: "join_0.w_0@GRAD" - dense_gradient_variable_name: "join_0.b_0@GRAD" - dense_gradient_variable_name: "join_1.w_0@GRAD" - dense_gradient_variable_name: "join_1.b_0@GRAD" - dense_gradient_variable_name: "join_2.w_0@GRAD" - dense_gradient_variable_name: "join_2.b_0@GRAD" - dense_gradient_variable_name: "join_3.w_0@GRAD" - dense_gradient_variable_name: "join_3.b_0@GRAD" - dense_gradient_variable_name: "join_4.w_0@GRAD" - dense_gradient_variable_name: "join_4.b_0@GRAD" - dense_gradient_variable_name: "join_5.w_0@GRAD" - dense_gradient_variable_name: "join_5.b_0@GRAD" - dense_gradient_variable_name: "join_6.w_0@GRAD" - dense_gradient_variable_name: "join_6.b_0@GRAD" - dense_gradient_variable_name: "join_7.w_0@GRAD" - dense_gradient_variable_name: "join_7.b_0@GRAD" - - dense_gradient_variable_name: "common_0.w_0@GRAD" - dense_gradient_variable_name: "common_0.b_0@GRAD" - dense_gradient_variable_name: "common_1.w_0@GRAD" - dense_gradient_variable_name: "common_1.b_0@GRAD" - dense_gradient_variable_name: "common_2.w_0@GRAD" - dense_gradient_variable_name: "common_2.b_0@GRAD" - dense_gradient_variable_name: "common_3.w_0@GRAD" - dense_gradient_variable_name: "common_3.b_0@GRAD" - dense_gradient_variable_name: "common_4.w_0@GRAD" - dense_gradient_variable_name: "common_4.b_0@GRAD" - dense_gradient_variable_name: "common_5.w_0@GRAD" - dense_gradient_variable_name: "common_5.b_0@GRAD" - dense_gradient_variable_name: "common_6.w_0@GRAD" - dense_gradient_variable_name: "common_6.b_0@GRAD" - dense_gradient_variable_name: "common_7.w_0@GRAD" - dense_gradient_variable_name: "common_7.b_0@GRAD" - } - dense_table { - table_id: 2 - dense_variable_name: "join.batch_size" - dense_variable_name: "join.batch_sum" - dense_variable_name: "join.batch_square_sum" - - dense_variable_name: "common.batch_size" - dense_variable_name: "common.batch_sum" - dense_variable_name: "common.batch_square_sum" - - dense_gradient_variable_name: "join.batch_size@GRAD" - dense_gradient_variable_name: "join.batch_sum@GRAD" - dense_gradient_variable_name: "join.batch_square_sum@GRAD" - - dense_gradient_variable_name: "common.batch_size@GRAD" - dense_gradient_variable_name: "common.batch_sum@GRAD" - dense_gradient_variable_name: "common.batch_square_sum@GRAD" - } - dense_table { - table_id: 3 - dense_variable_name: "fc_0.w_0" - dense_variable_name: "fc_0.b_0" - dense_variable_name: "fc_1.w_0" - dense_variable_name: "fc_1.b_0" - dense_variable_name: "fc_2.w_0" - dense_variable_name: "fc_2.b_0" - dense_variable_name: "fc_3.w_0" - dense_variable_name: "fc_3.b_0" - dense_variable_name: "fc_4.w_0" - dense_variable_name: "fc_4.b_0" - dense_variable_name: "fc_5.w_0" - dense_variable_name: "fc_5.b_0" - dense_gradient_variable_name: "fc_0.w_0@GRAD" - dense_gradient_variable_name: "fc_0.b_0@GRAD" - dense_gradient_variable_name: "fc_1.w_0@GRAD" - dense_gradient_variable_name: "fc_1.b_0@GRAD" - dense_gradient_variable_name: "fc_2.w_0@GRAD" - dense_gradient_variable_name: "fc_2.b_0@GRAD" - dense_gradient_variable_name: "fc_3.w_0@GRAD" - dense_gradient_variable_name: "fc_3.b_0@GRAD" - dense_gradient_variable_name: "fc_4.w_0@GRAD" - dense_gradient_variable_name: "fc_4.b_0@GRAD" - dense_gradient_variable_name: "fc_5.w_0@GRAD" - dense_gradient_variable_name: "fc_5.b_0@GRAD" - } - sparse_table { - table_id: 0 - slot_key: "6048" - slot_key: "6002" - slot_key: "6145" - slot_key: "6202" - slot_key: "6201" - slot_key: "6121" - slot_key: "6738" - slot_key: "6119" - slot_key: "6146" - slot_key: "6120" - slot_key: "6147" - slot_key: "6122" - slot_key: "6123" - slot_key: "6118" - slot_key: "6142" - slot_key: "6143" - slot_key: "6008" - slot_key: "6148" - slot_key: "6151" - slot_key: "6127" - slot_key: "6144" - slot_key: "6094" - slot_key: "6083" - slot_key: "6952" - slot_key: "6739" - slot_key: "6150" - slot_key: "6109" - slot_key: "6003" - slot_key: "6099" - slot_key: "6149" - slot_key: "6129" - slot_key: "6203" - slot_key: "6153" - slot_key: "6152" - slot_key: "6128" - slot_key: "6106" - slot_key: "6251" - slot_key: "7082" - slot_key: "7515" - slot_key: "6951" - slot_key: "6949" - slot_key: "7080" - slot_key: "6066" - slot_key: "7507" - slot_key: "6186" - slot_key: "6007" - slot_key: "7514" - slot_key: "6125" - slot_key: "7506" - slot_key: "10001" - slot_key: "6006" - slot_key: "7023" - slot_key: "6085" - slot_key: "10000" - slot_key: "6098" - slot_key: "6250" - slot_key: "6110" - slot_key: "6124" - slot_key: "6090" - slot_key: "6082" - slot_key: "6067" - slot_key: "6101" - slot_key: "6004" - slot_key: "6191" - slot_key: "7075" - slot_key: "6948" - slot_key: "6157" - slot_key: "6126" - slot_key: "6188" - slot_key: "7077" - slot_key: "6070" - slot_key: "6111" - slot_key: "6087" - slot_key: "6103" - slot_key: "6107" - slot_key: "6194" - slot_key: "6156" - slot_key: "6005" - slot_key: "6247" - slot_key: "6814" - slot_key: "6158" - slot_key: "7122" - slot_key: "6058" - slot_key: "6189" - slot_key: "7058" - slot_key: "6059" - slot_key: "6115" - slot_key: "7079" - slot_key: "7081" - slot_key: "6833" - slot_key: "7024" - slot_key: "6108" - slot_key: "13342" - slot_key: "13345" - slot_key: "13412" - slot_key: "13343" - slot_key: "13350" - slot_key: "13346" - slot_key: "13409" - slot_key: "6009" - slot_key: "6011" - slot_key: "6012" - slot_key: "6013" - slot_key: "6014" - slot_key: "6015" - slot_key: "6019" - slot_key: "6023" - slot_key: "6024" - slot_key: "6027" - slot_key: "6029" - slot_key: "6031" - slot_key: "6050" - slot_key: "6060" - slot_key: "6068" - slot_key: "6069" - slot_key: "6089" - slot_key: "6095" - slot_key: "6105" - slot_key: "6112" - slot_key: "6130" - slot_key: "6131" - slot_key: "6132" - slot_key: "6134" - slot_key: "6161" - slot_key: "6162" - slot_key: "6163" - slot_key: "6166" - slot_key: "6182" - slot_key: "6183" - slot_key: "6185" - slot_key: "6190" - slot_key: "6212" - slot_key: "6213" - slot_key: "6231" - slot_key: "6233" - slot_key: "6234" - slot_key: "6236" - slot_key: "6238" - slot_key: "6239" - slot_key: "6240" - slot_key: "6241" - slot_key: "6242" - slot_key: "6243" - slot_key: "6244" - slot_key: "6245" - slot_key: "6354" - slot_key: "7002" - slot_key: "7005" - slot_key: "7008" - slot_key: "7010" - slot_key: "7013" - slot_key: "7015" - slot_key: "7019" - slot_key: "7020" - slot_key: "7045" - slot_key: "7046" - slot_key: "7048" - slot_key: "7049" - slot_key: "7052" - slot_key: "7054" - slot_key: "7056" - slot_key: "7064" - slot_key: "7066" - slot_key: "7076" - slot_key: "7078" - slot_key: "7083" - slot_key: "7084" - slot_key: "7085" - slot_key: "7086" - slot_key: "7087" - slot_key: "7088" - slot_key: "7089" - slot_key: "7090" - slot_key: "7099" - slot_key: "7100" - slot_key: "7101" - slot_key: "7102" - slot_key: "7103" - slot_key: "7104" - slot_key: "7105" - slot_key: "7109" - slot_key: "7124" - slot_key: "7126" - slot_key: "7136" - slot_key: "7142" - slot_key: "7143" - slot_key: "7144" - slot_key: "7145" - slot_key: "7146" - slot_key: "7147" - slot_key: "7148" - slot_key: "7150" - slot_key: "7151" - slot_key: "7152" - slot_key: "7153" - slot_key: "7154" - slot_key: "7155" - slot_key: "7156" - slot_key: "7157" - slot_key: "7047" - slot_key: "7050" - slot_key: "6257" - slot_key: "6259" - slot_key: "6260" - slot_key: "6261" - slot_key: "7170" - slot_key: "7185" - slot_key: "7186" - slot_key: "6751" - slot_key: "6755" - slot_key: "6757" - slot_key: "6759" - slot_key: "6760" - slot_key: "6763" - slot_key: "6764" - slot_key: "6765" - slot_key: "6766" - slot_key: "6767" - slot_key: "6768" - slot_key: "6769" - slot_key: "6770" - slot_key: "7502" - slot_key: "7503" - slot_key: "7504" - slot_key: "7505" - slot_key: "7510" - slot_key: "7511" - slot_key: "7512" - slot_key: "7513" - slot_key: "6806" - slot_key: "6807" - slot_key: "6808" - slot_key: "6809" - slot_key: "6810" - slot_key: "6811" - slot_key: "6812" - slot_key: "6813" - slot_key: "6815" - slot_key: "6816" - slot_key: "6817" - slot_key: "6819" - slot_key: "6823" - slot_key: "6828" - slot_key: "6831" - slot_key: "6840" - slot_key: "6845" - slot_key: "6875" - slot_key: "6879" - slot_key: "6881" - slot_key: "6888" - slot_key: "6889" - slot_key: "6947" - slot_key: "6950" - slot_key: "6956" - slot_key: "6957" - slot_key: "6959" - slot_key: "10006" - slot_key: "10008" - slot_key: "10009" - slot_key: "10010" - slot_key: "10011" - slot_key: "10016" - slot_key: "10017" - slot_key: "10018" - slot_key: "10019" - slot_key: "10020" - slot_key: "10021" - slot_key: "10022" - slot_key: "10023" - slot_key: "10024" - slot_key: "10029" - slot_key: "10030" - slot_key: "10031" - slot_key: "10032" - slot_key: "10033" - slot_key: "10034" - slot_key: "10035" - slot_key: "10036" - slot_key: "10037" - slot_key: "10038" - slot_key: "10039" - slot_key: "10040" - slot_key: "10041" - slot_key: "10042" - slot_key: "10044" - slot_key: "10045" - slot_key: "10046" - slot_key: "10051" - slot_key: "10052" - slot_key: "10053" - slot_key: "10054" - slot_key: "10055" - slot_key: "10056" - slot_key: "10057" - slot_key: "10060" - slot_key: "10066" - slot_key: "10069" - slot_key: "6820" - slot_key: "6821" - slot_key: "6822" - slot_key: "13333" - slot_key: "13334" - slot_key: "13335" - slot_key: "13336" - slot_key: "13337" - slot_key: "13338" - slot_key: "13339" - slot_key: "13340" - slot_key: "13341" - slot_key: "13351" - slot_key: "13352" - slot_key: "13353" - slot_key: "13359" - slot_key: "13361" - slot_key: "13362" - slot_key: "13363" - slot_key: "13366" - slot_key: "13367" - slot_key: "13368" - slot_key: "13369" - slot_key: "13370" - slot_key: "13371" - slot_key: "13375" - slot_key: "13376" - slot_key: "5700" - slot_key: "5702" - slot_key: "13400" - slot_key: "13401" - slot_key: "13402" - slot_key: "13403" - slot_key: "13404" - slot_key: "13406" - slot_key: "13407" - slot_key: "13408" - slot_key: "13410" - slot_key: "13417" - slot_key: "13418" - slot_key: "13419" - slot_key: "13420" - slot_key: "13422" - slot_key: "13425" - slot_key: "13427" - slot_key: "13428" - slot_key: "13429" - slot_key: "13430" - slot_key: "13431" - slot_key: "13433" - slot_key: "13434" - slot_key: "13436" - slot_key: "13437" - slot_key: "13326" - slot_key: "13330" - slot_key: "13331" - slot_key: "5717" - slot_key: "13442" - slot_key: "13451" - slot_key: "13452" - slot_key: "13455" - slot_key: "13456" - slot_key: "13457" - slot_key: "13458" - slot_key: "13459" - slot_key: "13460" - slot_key: "13461" - slot_key: "13462" - slot_key: "13463" - slot_key: "13464" - slot_key: "13465" - slot_key: "13466" - slot_key: "13467" - slot_key: "13468" - slot_key: "1104" - slot_key: "1106" - slot_key: "1107" - slot_key: "1108" - slot_key: "1109" - slot_key: "1110" - slot_key: "1111" - slot_key: "1112" - slot_key: "1113" - slot_key: "1114" - slot_key: "1115" - slot_key: "1116" - slot_key: "1117" - slot_key: "1119" - slot_key: "1120" - slot_key: "1121" - slot_key: "1122" - slot_key: "1123" - slot_key: "1124" - slot_key: "1125" - slot_key: "1126" - slot_key: "1127" - slot_key: "1128" - slot_key: "1129" - slot_key: "13812" - slot_key: "13813" - slot_key: "6740" - slot_key: "1490" - slot_key: "32915" - slot_key: "32950" - slot_key: "32952" - slot_key: "32953" - slot_key: "32954" - slot_key: "33077" - slot_key: "33085" - slot_key: "33086" - slot_value: "embedding_0.tmp_0" - slot_value: "embedding_1.tmp_0" - slot_value: "embedding_2.tmp_0" - slot_value: "embedding_3.tmp_0" - slot_value: "embedding_4.tmp_0" - slot_value: "embedding_5.tmp_0" - slot_value: "embedding_6.tmp_0" - slot_value: "embedding_7.tmp_0" - slot_value: "embedding_8.tmp_0" - slot_value: "embedding_9.tmp_0" - slot_value: "embedding_10.tmp_0" - slot_value: "embedding_11.tmp_0" - slot_value: "embedding_12.tmp_0" - slot_value: "embedding_13.tmp_0" - slot_value: "embedding_14.tmp_0" - slot_value: "embedding_15.tmp_0" - slot_value: "embedding_16.tmp_0" - slot_value: "embedding_17.tmp_0" - slot_value: "embedding_18.tmp_0" - slot_value: "embedding_19.tmp_0" - slot_value: "embedding_20.tmp_0" - slot_value: "embedding_21.tmp_0" - slot_value: "embedding_22.tmp_0" - slot_value: "embedding_23.tmp_0" - slot_value: "embedding_24.tmp_0" - slot_value: "embedding_25.tmp_0" - slot_value: "embedding_26.tmp_0" - slot_value: "embedding_27.tmp_0" - slot_value: "embedding_28.tmp_0" - slot_value: "embedding_29.tmp_0" - slot_value: "embedding_30.tmp_0" - slot_value: "embedding_31.tmp_0" - slot_value: "embedding_32.tmp_0" - slot_value: "embedding_33.tmp_0" - slot_value: "embedding_34.tmp_0" - slot_value: "embedding_35.tmp_0" - slot_value: "embedding_36.tmp_0" - slot_value: "embedding_37.tmp_0" - slot_value: "embedding_38.tmp_0" - slot_value: "embedding_39.tmp_0" - slot_value: "embedding_40.tmp_0" - slot_value: "embedding_41.tmp_0" - slot_value: "embedding_42.tmp_0" - slot_value: "embedding_43.tmp_0" - slot_value: "embedding_44.tmp_0" - slot_value: "embedding_45.tmp_0" - slot_value: "embedding_46.tmp_0" - slot_value: "embedding_47.tmp_0" - slot_value: "embedding_48.tmp_0" - slot_value: "embedding_49.tmp_0" - slot_value: "embedding_50.tmp_0" - slot_value: "embedding_51.tmp_0" - slot_value: "embedding_52.tmp_0" - slot_value: "embedding_53.tmp_0" - slot_value: "embedding_54.tmp_0" - slot_value: "embedding_55.tmp_0" - slot_value: "embedding_56.tmp_0" - slot_value: "embedding_57.tmp_0" - slot_value: "embedding_58.tmp_0" - slot_value: "embedding_59.tmp_0" - slot_value: "embedding_60.tmp_0" - slot_value: "embedding_61.tmp_0" - slot_value: "embedding_62.tmp_0" - slot_value: "embedding_63.tmp_0" - slot_value: "embedding_64.tmp_0" - slot_value: "embedding_65.tmp_0" - slot_value: "embedding_66.tmp_0" - slot_value: "embedding_67.tmp_0" - slot_value: "embedding_68.tmp_0" - slot_value: "embedding_69.tmp_0" - slot_value: "embedding_70.tmp_0" - slot_value: "embedding_71.tmp_0" - slot_value: "embedding_72.tmp_0" - slot_value: "embedding_73.tmp_0" - slot_value: "embedding_74.tmp_0" - slot_value: "embedding_75.tmp_0" - slot_value: "embedding_76.tmp_0" - slot_value: "embedding_77.tmp_0" - slot_value: "embedding_78.tmp_0" - slot_value: "embedding_79.tmp_0" - slot_value: "embedding_80.tmp_0" - slot_value: "embedding_81.tmp_0" - slot_value: "embedding_82.tmp_0" - slot_value: "embedding_83.tmp_0" - slot_value: "embedding_84.tmp_0" - slot_value: "embedding_85.tmp_0" - slot_value: "embedding_86.tmp_0" - slot_value: "embedding_87.tmp_0" - slot_value: "embedding_88.tmp_0" - slot_value: "embedding_89.tmp_0" - slot_value: "embedding_90.tmp_0" - slot_value: "embedding_91.tmp_0" - slot_value: "embedding_92.tmp_0" - slot_value: "embedding_93.tmp_0" - slot_value: "embedding_94.tmp_0" - slot_value: "embedding_95.tmp_0" - slot_value: "embedding_96.tmp_0" - slot_value: "embedding_97.tmp_0" - slot_value: "embedding_98.tmp_0" - slot_value: "embedding_99.tmp_0" - slot_value: "embedding_100.tmp_0" - slot_value: "embedding_101.tmp_0" - slot_value: "embedding_102.tmp_0" - slot_value: "embedding_103.tmp_0" - slot_value: "embedding_104.tmp_0" - slot_value: "embedding_105.tmp_0" - slot_value: "embedding_106.tmp_0" - slot_value: "embedding_107.tmp_0" - slot_value: "embedding_108.tmp_0" - slot_value: "embedding_109.tmp_0" - slot_value: "embedding_110.tmp_0" - slot_value: "embedding_111.tmp_0" - slot_value: "embedding_112.tmp_0" - slot_value: "embedding_113.tmp_0" - slot_value: "embedding_114.tmp_0" - slot_value: "embedding_115.tmp_0" - slot_value: "embedding_116.tmp_0" - slot_value: "embedding_117.tmp_0" - slot_value: "embedding_118.tmp_0" - slot_value: "embedding_119.tmp_0" - slot_value: "embedding_120.tmp_0" - slot_value: "embedding_121.tmp_0" - slot_value: "embedding_122.tmp_0" - slot_value: "embedding_123.tmp_0" - slot_value: "embedding_124.tmp_0" - slot_value: "embedding_125.tmp_0" - slot_value: "embedding_126.tmp_0" - slot_value: "embedding_127.tmp_0" - slot_value: "embedding_128.tmp_0" - slot_value: "embedding_129.tmp_0" - slot_value: "embedding_130.tmp_0" - slot_value: "embedding_131.tmp_0" - slot_value: "embedding_132.tmp_0" - slot_value: "embedding_133.tmp_0" - slot_value: "embedding_134.tmp_0" - slot_value: "embedding_135.tmp_0" - slot_value: "embedding_136.tmp_0" - slot_value: "embedding_137.tmp_0" - slot_value: "embedding_138.tmp_0" - slot_value: "embedding_139.tmp_0" - slot_value: "embedding_140.tmp_0" - slot_value: "embedding_141.tmp_0" - slot_value: "embedding_142.tmp_0" - slot_value: "embedding_143.tmp_0" - slot_value: "embedding_144.tmp_0" - slot_value: "embedding_145.tmp_0" - slot_value: "embedding_146.tmp_0" - slot_value: "embedding_147.tmp_0" - slot_value: "embedding_148.tmp_0" - slot_value: "embedding_149.tmp_0" - slot_value: "embedding_150.tmp_0" - slot_value: "embedding_151.tmp_0" - slot_value: "embedding_152.tmp_0" - slot_value: "embedding_153.tmp_0" - slot_value: "embedding_154.tmp_0" - slot_value: "embedding_155.tmp_0" - slot_value: "embedding_156.tmp_0" - slot_value: "embedding_157.tmp_0" - slot_value: "embedding_158.tmp_0" - slot_value: "embedding_159.tmp_0" - slot_value: "embedding_160.tmp_0" - slot_value: "embedding_161.tmp_0" - slot_value: "embedding_162.tmp_0" - slot_value: "embedding_163.tmp_0" - slot_value: "embedding_164.tmp_0" - slot_value: "embedding_165.tmp_0" - slot_value: "embedding_166.tmp_0" - slot_value: "embedding_167.tmp_0" - slot_value: "embedding_168.tmp_0" - slot_value: "embedding_169.tmp_0" - slot_value: "embedding_170.tmp_0" - slot_value: "embedding_171.tmp_0" - slot_value: "embedding_172.tmp_0" - slot_value: "embedding_173.tmp_0" - slot_value: "embedding_174.tmp_0" - slot_value: "embedding_175.tmp_0" - slot_value: "embedding_176.tmp_0" - slot_value: "embedding_177.tmp_0" - slot_value: "embedding_178.tmp_0" - slot_value: "embedding_179.tmp_0" - slot_value: "embedding_180.tmp_0" - slot_value: "embedding_181.tmp_0" - slot_value: "embedding_182.tmp_0" - slot_value: "embedding_183.tmp_0" - slot_value: "embedding_184.tmp_0" - slot_value: "embedding_185.tmp_0" - slot_value: "embedding_186.tmp_0" - slot_value: "embedding_187.tmp_0" - slot_value: "embedding_188.tmp_0" - slot_value: "embedding_189.tmp_0" - slot_value: "embedding_190.tmp_0" - slot_value: "embedding_191.tmp_0" - slot_value: "embedding_192.tmp_0" - slot_value: "embedding_193.tmp_0" - slot_value: "embedding_194.tmp_0" - slot_value: "embedding_195.tmp_0" - slot_value: "embedding_196.tmp_0" - slot_value: "embedding_197.tmp_0" - slot_value: "embedding_198.tmp_0" - slot_value: "embedding_199.tmp_0" - slot_value: "embedding_200.tmp_0" - slot_value: "embedding_201.tmp_0" - slot_value: "embedding_202.tmp_0" - slot_value: "embedding_203.tmp_0" - slot_value: "embedding_204.tmp_0" - slot_value: "embedding_205.tmp_0" - slot_value: "embedding_206.tmp_0" - slot_value: "embedding_207.tmp_0" - slot_value: "embedding_208.tmp_0" - slot_value: "embedding_209.tmp_0" - slot_value: "embedding_210.tmp_0" - slot_value: "embedding_211.tmp_0" - slot_value: "embedding_212.tmp_0" - slot_value: "embedding_213.tmp_0" - slot_value: "embedding_214.tmp_0" - slot_value: "embedding_215.tmp_0" - slot_value: "embedding_216.tmp_0" - slot_value: "embedding_217.tmp_0" - slot_value: "embedding_218.tmp_0" - slot_value: "embedding_219.tmp_0" - slot_value: "embedding_220.tmp_0" - slot_value: "embedding_221.tmp_0" - slot_value: "embedding_222.tmp_0" - slot_value: "embedding_223.tmp_0" - slot_value: "embedding_224.tmp_0" - slot_value: "embedding_225.tmp_0" - slot_value: "embedding_226.tmp_0" - slot_value: "embedding_227.tmp_0" - slot_value: "embedding_228.tmp_0" - slot_value: "embedding_229.tmp_0" - slot_value: "embedding_230.tmp_0" - slot_value: "embedding_231.tmp_0" - slot_value: "embedding_232.tmp_0" - slot_value: "embedding_233.tmp_0" - slot_value: "embedding_234.tmp_0" - slot_value: "embedding_235.tmp_0" - slot_value: "embedding_236.tmp_0" - slot_value: "embedding_237.tmp_0" - slot_value: "embedding_238.tmp_0" - slot_value: "embedding_239.tmp_0" - slot_value: "embedding_240.tmp_0" - slot_value: "embedding_241.tmp_0" - slot_value: "embedding_242.tmp_0" - slot_value: "embedding_243.tmp_0" - slot_value: "embedding_244.tmp_0" - slot_value: "embedding_245.tmp_0" - slot_value: "embedding_246.tmp_0" - slot_value: "embedding_247.tmp_0" - slot_value: "embedding_248.tmp_0" - slot_value: "embedding_249.tmp_0" - slot_value: "embedding_250.tmp_0" - slot_value: "embedding_251.tmp_0" - slot_value: "embedding_252.tmp_0" - slot_value: "embedding_253.tmp_0" - slot_value: "embedding_254.tmp_0" - slot_value: "embedding_255.tmp_0" - slot_value: "embedding_256.tmp_0" - slot_value: "embedding_257.tmp_0" - slot_value: "embedding_258.tmp_0" - slot_value: "embedding_259.tmp_0" - slot_value: "embedding_260.tmp_0" - slot_value: "embedding_261.tmp_0" - slot_value: "embedding_262.tmp_0" - slot_value: "embedding_263.tmp_0" - slot_value: "embedding_264.tmp_0" - slot_value: "embedding_265.tmp_0" - slot_value: "embedding_266.tmp_0" - slot_value: "embedding_267.tmp_0" - slot_value: "embedding_268.tmp_0" - slot_value: "embedding_269.tmp_0" - slot_value: "embedding_270.tmp_0" - slot_value: "embedding_271.tmp_0" - slot_value: "embedding_272.tmp_0" - slot_value: "embedding_273.tmp_0" - slot_value: "embedding_274.tmp_0" - slot_value: "embedding_275.tmp_0" - slot_value: "embedding_276.tmp_0" - slot_value: "embedding_277.tmp_0" - slot_value: "embedding_278.tmp_0" - slot_value: "embedding_279.tmp_0" - slot_value: "embedding_280.tmp_0" - slot_value: "embedding_281.tmp_0" - slot_value: "embedding_282.tmp_0" - slot_value: "embedding_283.tmp_0" - slot_value: "embedding_284.tmp_0" - slot_value: "embedding_285.tmp_0" - slot_value: "embedding_286.tmp_0" - slot_value: "embedding_287.tmp_0" - slot_value: "embedding_288.tmp_0" - slot_value: "embedding_289.tmp_0" - slot_value: "embedding_290.tmp_0" - slot_value: "embedding_291.tmp_0" - slot_value: "embedding_292.tmp_0" - slot_value: "embedding_293.tmp_0" - slot_value: "embedding_294.tmp_0" - slot_value: "embedding_295.tmp_0" - slot_value: "embedding_296.tmp_0" - slot_value: "embedding_297.tmp_0" - slot_value: "embedding_298.tmp_0" - slot_value: "embedding_299.tmp_0" - slot_value: "embedding_300.tmp_0" - slot_value: "embedding_301.tmp_0" - slot_value: "embedding_302.tmp_0" - slot_value: "embedding_303.tmp_0" - slot_value: "embedding_304.tmp_0" - slot_value: "embedding_305.tmp_0" - slot_value: "embedding_306.tmp_0" - slot_value: "embedding_307.tmp_0" - slot_value: "embedding_308.tmp_0" - slot_value: "embedding_309.tmp_0" - slot_value: "embedding_310.tmp_0" - slot_value: "embedding_311.tmp_0" - slot_value: "embedding_312.tmp_0" - slot_value: "embedding_313.tmp_0" - slot_value: "embedding_314.tmp_0" - slot_value: "embedding_315.tmp_0" - slot_value: "embedding_316.tmp_0" - slot_value: "embedding_317.tmp_0" - slot_value: "embedding_318.tmp_0" - slot_value: "embedding_319.tmp_0" - slot_value: "embedding_320.tmp_0" - slot_value: "embedding_321.tmp_0" - slot_value: "embedding_322.tmp_0" - slot_value: "embedding_323.tmp_0" - slot_value: "embedding_324.tmp_0" - slot_value: "embedding_325.tmp_0" - slot_value: "embedding_326.tmp_0" - slot_value: "embedding_327.tmp_0" - slot_value: "embedding_328.tmp_0" - slot_value: "embedding_329.tmp_0" - slot_value: "embedding_330.tmp_0" - slot_value: "embedding_331.tmp_0" - slot_value: "embedding_332.tmp_0" - slot_value: "embedding_333.tmp_0" - slot_value: "embedding_334.tmp_0" - slot_value: "embedding_335.tmp_0" - slot_value: "embedding_336.tmp_0" - slot_value: "embedding_337.tmp_0" - slot_value: "embedding_338.tmp_0" - slot_value: "embedding_339.tmp_0" - slot_value: "embedding_340.tmp_0" - slot_value: "embedding_341.tmp_0" - slot_value: "embedding_342.tmp_0" - slot_value: "embedding_343.tmp_0" - slot_value: "embedding_344.tmp_0" - slot_value: "embedding_345.tmp_0" - slot_value: "embedding_346.tmp_0" - slot_value: "embedding_347.tmp_0" - slot_value: "embedding_348.tmp_0" - slot_value: "embedding_349.tmp_0" - slot_value: "embedding_350.tmp_0" - slot_value: "embedding_351.tmp_0" - slot_value: "embedding_352.tmp_0" - slot_value: "embedding_353.tmp_0" - slot_value: "embedding_354.tmp_0" - slot_value: "embedding_355.tmp_0" - slot_value: "embedding_356.tmp_0" - slot_value: "embedding_357.tmp_0" - slot_value: "embedding_358.tmp_0" - slot_value: "embedding_359.tmp_0" - slot_value: "embedding_360.tmp_0" - slot_value: "embedding_361.tmp_0" - slot_value: "embedding_362.tmp_0" - slot_value: "embedding_363.tmp_0" - slot_value: "embedding_364.tmp_0" - slot_value: "embedding_365.tmp_0" - slot_value: "embedding_366.tmp_0" - slot_value: "embedding_367.tmp_0" - slot_value: "embedding_368.tmp_0" - slot_value: "embedding_369.tmp_0" - slot_value: "embedding_370.tmp_0" - slot_value: "embedding_371.tmp_0" - slot_value: "embedding_372.tmp_0" - slot_value: "embedding_373.tmp_0" - slot_value: "embedding_374.tmp_0" - slot_value: "embedding_375.tmp_0" - slot_value: "embedding_376.tmp_0" - slot_value: "embedding_377.tmp_0" - slot_value: "embedding_378.tmp_0" - slot_value: "embedding_379.tmp_0" - slot_value: "embedding_380.tmp_0" - slot_value: "embedding_381.tmp_0" - slot_value: "embedding_382.tmp_0" - slot_value: "embedding_383.tmp_0" - slot_value: "embedding_384.tmp_0" - slot_value: "embedding_385.tmp_0" - slot_value: "embedding_386.tmp_0" - slot_value: "embedding_387.tmp_0" - slot_value: "embedding_388.tmp_0" - slot_value: "embedding_389.tmp_0" - slot_value: "embedding_390.tmp_0" - slot_value: "embedding_391.tmp_0" - slot_value: "embedding_392.tmp_0" - slot_value: "embedding_393.tmp_0" - slot_value: "embedding_394.tmp_0" - slot_value: "embedding_395.tmp_0" - slot_value: "embedding_396.tmp_0" - slot_value: "embedding_397.tmp_0" - slot_value: "embedding_398.tmp_0" - slot_value: "embedding_399.tmp_0" - slot_value: "embedding_400.tmp_0" - slot_value: "embedding_401.tmp_0" - slot_value: "embedding_402.tmp_0" - slot_value: "embedding_403.tmp_0" - slot_value: "embedding_404.tmp_0" - slot_value: "embedding_405.tmp_0" - slot_value: "embedding_406.tmp_0" - slot_gradient: "embedding_0.tmp_0@GRAD" - slot_gradient: "embedding_1.tmp_0@GRAD" - slot_gradient: "embedding_2.tmp_0@GRAD" - slot_gradient: "embedding_3.tmp_0@GRAD" - slot_gradient: "embedding_4.tmp_0@GRAD" - slot_gradient: "embedding_5.tmp_0@GRAD" - slot_gradient: "embedding_6.tmp_0@GRAD" - slot_gradient: "embedding_7.tmp_0@GRAD" - slot_gradient: "embedding_8.tmp_0@GRAD" - slot_gradient: "embedding_9.tmp_0@GRAD" - slot_gradient: "embedding_10.tmp_0@GRAD" - slot_gradient: "embedding_11.tmp_0@GRAD" - slot_gradient: "embedding_12.tmp_0@GRAD" - slot_gradient: "embedding_13.tmp_0@GRAD" - slot_gradient: "embedding_14.tmp_0@GRAD" - slot_gradient: "embedding_15.tmp_0@GRAD" - slot_gradient: "embedding_16.tmp_0@GRAD" - slot_gradient: "embedding_17.tmp_0@GRAD" - slot_gradient: "embedding_18.tmp_0@GRAD" - slot_gradient: "embedding_19.tmp_0@GRAD" - slot_gradient: "embedding_20.tmp_0@GRAD" - slot_gradient: "embedding_21.tmp_0@GRAD" - slot_gradient: "embedding_22.tmp_0@GRAD" - slot_gradient: "embedding_23.tmp_0@GRAD" - slot_gradient: "embedding_24.tmp_0@GRAD" - slot_gradient: "embedding_25.tmp_0@GRAD" - slot_gradient: "embedding_26.tmp_0@GRAD" - slot_gradient: "embedding_27.tmp_0@GRAD" - slot_gradient: "embedding_28.tmp_0@GRAD" - slot_gradient: "embedding_29.tmp_0@GRAD" - slot_gradient: "embedding_30.tmp_0@GRAD" - slot_gradient: "embedding_31.tmp_0@GRAD" - slot_gradient: "embedding_32.tmp_0@GRAD" - slot_gradient: "embedding_33.tmp_0@GRAD" - slot_gradient: "embedding_34.tmp_0@GRAD" - slot_gradient: "embedding_35.tmp_0@GRAD" - slot_gradient: "embedding_36.tmp_0@GRAD" - slot_gradient: "embedding_37.tmp_0@GRAD" - slot_gradient: "embedding_38.tmp_0@GRAD" - slot_gradient: "embedding_39.tmp_0@GRAD" - slot_gradient: "embedding_40.tmp_0@GRAD" - slot_gradient: "embedding_41.tmp_0@GRAD" - slot_gradient: "embedding_42.tmp_0@GRAD" - slot_gradient: "embedding_43.tmp_0@GRAD" - slot_gradient: "embedding_44.tmp_0@GRAD" - slot_gradient: "embedding_45.tmp_0@GRAD" - slot_gradient: "embedding_46.tmp_0@GRAD" - slot_gradient: "embedding_47.tmp_0@GRAD" - slot_gradient: "embedding_48.tmp_0@GRAD" - slot_gradient: "embedding_49.tmp_0@GRAD" - slot_gradient: "embedding_50.tmp_0@GRAD" - slot_gradient: "embedding_51.tmp_0@GRAD" - slot_gradient: "embedding_52.tmp_0@GRAD" - slot_gradient: "embedding_53.tmp_0@GRAD" - slot_gradient: "embedding_54.tmp_0@GRAD" - slot_gradient: "embedding_55.tmp_0@GRAD" - slot_gradient: "embedding_56.tmp_0@GRAD" - slot_gradient: "embedding_57.tmp_0@GRAD" - slot_gradient: "embedding_58.tmp_0@GRAD" - slot_gradient: "embedding_59.tmp_0@GRAD" - slot_gradient: "embedding_60.tmp_0@GRAD" - slot_gradient: "embedding_61.tmp_0@GRAD" - slot_gradient: "embedding_62.tmp_0@GRAD" - slot_gradient: "embedding_63.tmp_0@GRAD" - slot_gradient: "embedding_64.tmp_0@GRAD" - slot_gradient: "embedding_65.tmp_0@GRAD" - slot_gradient: "embedding_66.tmp_0@GRAD" - slot_gradient: "embedding_67.tmp_0@GRAD" - slot_gradient: "embedding_68.tmp_0@GRAD" - slot_gradient: "embedding_69.tmp_0@GRAD" - slot_gradient: "embedding_70.tmp_0@GRAD" - slot_gradient: "embedding_71.tmp_0@GRAD" - slot_gradient: "embedding_72.tmp_0@GRAD" - slot_gradient: "embedding_73.tmp_0@GRAD" - slot_gradient: "embedding_74.tmp_0@GRAD" - slot_gradient: "embedding_75.tmp_0@GRAD" - slot_gradient: "embedding_76.tmp_0@GRAD" - slot_gradient: "embedding_77.tmp_0@GRAD" - slot_gradient: "embedding_78.tmp_0@GRAD" - slot_gradient: "embedding_79.tmp_0@GRAD" - slot_gradient: "embedding_80.tmp_0@GRAD" - slot_gradient: "embedding_81.tmp_0@GRAD" - slot_gradient: "embedding_82.tmp_0@GRAD" - slot_gradient: "embedding_83.tmp_0@GRAD" - slot_gradient: "embedding_84.tmp_0@GRAD" - slot_gradient: "embedding_85.tmp_0@GRAD" - slot_gradient: "embedding_86.tmp_0@GRAD" - slot_gradient: "embedding_87.tmp_0@GRAD" - slot_gradient: "embedding_88.tmp_0@GRAD" - slot_gradient: "embedding_89.tmp_0@GRAD" - slot_gradient: "embedding_90.tmp_0@GRAD" - slot_gradient: "embedding_91.tmp_0@GRAD" - slot_gradient: "embedding_92.tmp_0@GRAD" - slot_gradient: "embedding_93.tmp_0@GRAD" - slot_gradient: "embedding_94.tmp_0@GRAD" - slot_gradient: "embedding_95.tmp_0@GRAD" - slot_gradient: "embedding_96.tmp_0@GRAD" - slot_gradient: "embedding_97.tmp_0@GRAD" - slot_gradient: "embedding_98.tmp_0@GRAD" - slot_gradient: "embedding_99.tmp_0@GRAD" - slot_gradient: "embedding_100.tmp_0@GRAD" - slot_gradient: "embedding_101.tmp_0@GRAD" - slot_gradient: "embedding_102.tmp_0@GRAD" - slot_gradient: "embedding_103.tmp_0@GRAD" - slot_gradient: "embedding_104.tmp_0@GRAD" - slot_gradient: "embedding_105.tmp_0@GRAD" - slot_gradient: "embedding_106.tmp_0@GRAD" - slot_gradient: "embedding_107.tmp_0@GRAD" - slot_gradient: "embedding_108.tmp_0@GRAD" - slot_gradient: "embedding_109.tmp_0@GRAD" - slot_gradient: "embedding_110.tmp_0@GRAD" - slot_gradient: "embedding_111.tmp_0@GRAD" - slot_gradient: "embedding_112.tmp_0@GRAD" - slot_gradient: "embedding_113.tmp_0@GRAD" - slot_gradient: "embedding_114.tmp_0@GRAD" - slot_gradient: "embedding_115.tmp_0@GRAD" - slot_gradient: "embedding_116.tmp_0@GRAD" - slot_gradient: "embedding_117.tmp_0@GRAD" - slot_gradient: "embedding_118.tmp_0@GRAD" - slot_gradient: "embedding_119.tmp_0@GRAD" - slot_gradient: "embedding_120.tmp_0@GRAD" - slot_gradient: "embedding_121.tmp_0@GRAD" - slot_gradient: "embedding_122.tmp_0@GRAD" - slot_gradient: "embedding_123.tmp_0@GRAD" - slot_gradient: "embedding_124.tmp_0@GRAD" - slot_gradient: "embedding_125.tmp_0@GRAD" - slot_gradient: "embedding_126.tmp_0@GRAD" - slot_gradient: "embedding_127.tmp_0@GRAD" - slot_gradient: "embedding_128.tmp_0@GRAD" - slot_gradient: "embedding_129.tmp_0@GRAD" - slot_gradient: "embedding_130.tmp_0@GRAD" - slot_gradient: "embedding_131.tmp_0@GRAD" - slot_gradient: "embedding_132.tmp_0@GRAD" - slot_gradient: "embedding_133.tmp_0@GRAD" - slot_gradient: "embedding_134.tmp_0@GRAD" - slot_gradient: "embedding_135.tmp_0@GRAD" - slot_gradient: "embedding_136.tmp_0@GRAD" - slot_gradient: "embedding_137.tmp_0@GRAD" - slot_gradient: "embedding_138.tmp_0@GRAD" - slot_gradient: "embedding_139.tmp_0@GRAD" - slot_gradient: "embedding_140.tmp_0@GRAD" - slot_gradient: "embedding_141.tmp_0@GRAD" - slot_gradient: "embedding_142.tmp_0@GRAD" - slot_gradient: "embedding_143.tmp_0@GRAD" - slot_gradient: "embedding_144.tmp_0@GRAD" - slot_gradient: "embedding_145.tmp_0@GRAD" - slot_gradient: "embedding_146.tmp_0@GRAD" - slot_gradient: "embedding_147.tmp_0@GRAD" - slot_gradient: "embedding_148.tmp_0@GRAD" - slot_gradient: "embedding_149.tmp_0@GRAD" - slot_gradient: "embedding_150.tmp_0@GRAD" - slot_gradient: "embedding_151.tmp_0@GRAD" - slot_gradient: "embedding_152.tmp_0@GRAD" - slot_gradient: "embedding_153.tmp_0@GRAD" - slot_gradient: "embedding_154.tmp_0@GRAD" - slot_gradient: "embedding_155.tmp_0@GRAD" - slot_gradient: "embedding_156.tmp_0@GRAD" - slot_gradient: "embedding_157.tmp_0@GRAD" - slot_gradient: "embedding_158.tmp_0@GRAD" - slot_gradient: "embedding_159.tmp_0@GRAD" - slot_gradient: "embedding_160.tmp_0@GRAD" - slot_gradient: "embedding_161.tmp_0@GRAD" - slot_gradient: "embedding_162.tmp_0@GRAD" - slot_gradient: "embedding_163.tmp_0@GRAD" - slot_gradient: "embedding_164.tmp_0@GRAD" - slot_gradient: "embedding_165.tmp_0@GRAD" - slot_gradient: "embedding_166.tmp_0@GRAD" - slot_gradient: "embedding_167.tmp_0@GRAD" - slot_gradient: "embedding_168.tmp_0@GRAD" - slot_gradient: "embedding_169.tmp_0@GRAD" - slot_gradient: "embedding_170.tmp_0@GRAD" - slot_gradient: "embedding_171.tmp_0@GRAD" - slot_gradient: "embedding_172.tmp_0@GRAD" - slot_gradient: "embedding_173.tmp_0@GRAD" - slot_gradient: "embedding_174.tmp_0@GRAD" - slot_gradient: "embedding_175.tmp_0@GRAD" - slot_gradient: "embedding_176.tmp_0@GRAD" - slot_gradient: "embedding_177.tmp_0@GRAD" - slot_gradient: "embedding_178.tmp_0@GRAD" - slot_gradient: "embedding_179.tmp_0@GRAD" - slot_gradient: "embedding_180.tmp_0@GRAD" - slot_gradient: "embedding_181.tmp_0@GRAD" - slot_gradient: "embedding_182.tmp_0@GRAD" - slot_gradient: "embedding_183.tmp_0@GRAD" - slot_gradient: "embedding_184.tmp_0@GRAD" - slot_gradient: "embedding_185.tmp_0@GRAD" - slot_gradient: "embedding_186.tmp_0@GRAD" - slot_gradient: "embedding_187.tmp_0@GRAD" - slot_gradient: "embedding_188.tmp_0@GRAD" - slot_gradient: "embedding_189.tmp_0@GRAD" - slot_gradient: "embedding_190.tmp_0@GRAD" - slot_gradient: "embedding_191.tmp_0@GRAD" - slot_gradient: "embedding_192.tmp_0@GRAD" - slot_gradient: "embedding_193.tmp_0@GRAD" - slot_gradient: "embedding_194.tmp_0@GRAD" - slot_gradient: "embedding_195.tmp_0@GRAD" - slot_gradient: "embedding_196.tmp_0@GRAD" - slot_gradient: "embedding_197.tmp_0@GRAD" - slot_gradient: "embedding_198.tmp_0@GRAD" - slot_gradient: "embedding_199.tmp_0@GRAD" - slot_gradient: "embedding_200.tmp_0@GRAD" - slot_gradient: "embedding_201.tmp_0@GRAD" - slot_gradient: "embedding_202.tmp_0@GRAD" - slot_gradient: "embedding_203.tmp_0@GRAD" - slot_gradient: "embedding_204.tmp_0@GRAD" - slot_gradient: "embedding_205.tmp_0@GRAD" - slot_gradient: "embedding_206.tmp_0@GRAD" - slot_gradient: "embedding_207.tmp_0@GRAD" - slot_gradient: "embedding_208.tmp_0@GRAD" - slot_gradient: "embedding_209.tmp_0@GRAD" - slot_gradient: "embedding_210.tmp_0@GRAD" - slot_gradient: "embedding_211.tmp_0@GRAD" - slot_gradient: "embedding_212.tmp_0@GRAD" - slot_gradient: "embedding_213.tmp_0@GRAD" - slot_gradient: "embedding_214.tmp_0@GRAD" - slot_gradient: "embedding_215.tmp_0@GRAD" - slot_gradient: "embedding_216.tmp_0@GRAD" - slot_gradient: "embedding_217.tmp_0@GRAD" - slot_gradient: "embedding_218.tmp_0@GRAD" - slot_gradient: "embedding_219.tmp_0@GRAD" - slot_gradient: "embedding_220.tmp_0@GRAD" - slot_gradient: "embedding_221.tmp_0@GRAD" - slot_gradient: "embedding_222.tmp_0@GRAD" - slot_gradient: "embedding_223.tmp_0@GRAD" - slot_gradient: "embedding_224.tmp_0@GRAD" - slot_gradient: "embedding_225.tmp_0@GRAD" - slot_gradient: "embedding_226.tmp_0@GRAD" - slot_gradient: "embedding_227.tmp_0@GRAD" - slot_gradient: "embedding_228.tmp_0@GRAD" - slot_gradient: "embedding_229.tmp_0@GRAD" - slot_gradient: "embedding_230.tmp_0@GRAD" - slot_gradient: "embedding_231.tmp_0@GRAD" - slot_gradient: "embedding_232.tmp_0@GRAD" - slot_gradient: "embedding_233.tmp_0@GRAD" - slot_gradient: "embedding_234.tmp_0@GRAD" - slot_gradient: "embedding_235.tmp_0@GRAD" - slot_gradient: "embedding_236.tmp_0@GRAD" - slot_gradient: "embedding_237.tmp_0@GRAD" - slot_gradient: "embedding_238.tmp_0@GRAD" - slot_gradient: "embedding_239.tmp_0@GRAD" - slot_gradient: "embedding_240.tmp_0@GRAD" - slot_gradient: "embedding_241.tmp_0@GRAD" - slot_gradient: "embedding_242.tmp_0@GRAD" - slot_gradient: "embedding_243.tmp_0@GRAD" - slot_gradient: "embedding_244.tmp_0@GRAD" - slot_gradient: "embedding_245.tmp_0@GRAD" - slot_gradient: "embedding_246.tmp_0@GRAD" - slot_gradient: "embedding_247.tmp_0@GRAD" - slot_gradient: "embedding_248.tmp_0@GRAD" - slot_gradient: "embedding_249.tmp_0@GRAD" - slot_gradient: "embedding_250.tmp_0@GRAD" - slot_gradient: "embedding_251.tmp_0@GRAD" - slot_gradient: "embedding_252.tmp_0@GRAD" - slot_gradient: "embedding_253.tmp_0@GRAD" - slot_gradient: "embedding_254.tmp_0@GRAD" - slot_gradient: "embedding_255.tmp_0@GRAD" - slot_gradient: "embedding_256.tmp_0@GRAD" - slot_gradient: "embedding_257.tmp_0@GRAD" - slot_gradient: "embedding_258.tmp_0@GRAD" - slot_gradient: "embedding_259.tmp_0@GRAD" - slot_gradient: "embedding_260.tmp_0@GRAD" - slot_gradient: "embedding_261.tmp_0@GRAD" - slot_gradient: "embedding_262.tmp_0@GRAD" - slot_gradient: "embedding_263.tmp_0@GRAD" - slot_gradient: "embedding_264.tmp_0@GRAD" - slot_gradient: "embedding_265.tmp_0@GRAD" - slot_gradient: "embedding_266.tmp_0@GRAD" - slot_gradient: "embedding_267.tmp_0@GRAD" - slot_gradient: "embedding_268.tmp_0@GRAD" - slot_gradient: "embedding_269.tmp_0@GRAD" - slot_gradient: "embedding_270.tmp_0@GRAD" - slot_gradient: "embedding_271.tmp_0@GRAD" - slot_gradient: "embedding_272.tmp_0@GRAD" - slot_gradient: "embedding_273.tmp_0@GRAD" - slot_gradient: "embedding_274.tmp_0@GRAD" - slot_gradient: "embedding_275.tmp_0@GRAD" - slot_gradient: "embedding_276.tmp_0@GRAD" - slot_gradient: "embedding_277.tmp_0@GRAD" - slot_gradient: "embedding_278.tmp_0@GRAD" - slot_gradient: "embedding_279.tmp_0@GRAD" - slot_gradient: "embedding_280.tmp_0@GRAD" - slot_gradient: "embedding_281.tmp_0@GRAD" - slot_gradient: "embedding_282.tmp_0@GRAD" - slot_gradient: "embedding_283.tmp_0@GRAD" - slot_gradient: "embedding_284.tmp_0@GRAD" - slot_gradient: "embedding_285.tmp_0@GRAD" - slot_gradient: "embedding_286.tmp_0@GRAD" - slot_gradient: "embedding_287.tmp_0@GRAD" - slot_gradient: "embedding_288.tmp_0@GRAD" - slot_gradient: "embedding_289.tmp_0@GRAD" - slot_gradient: "embedding_290.tmp_0@GRAD" - slot_gradient: "embedding_291.tmp_0@GRAD" - slot_gradient: "embedding_292.tmp_0@GRAD" - slot_gradient: "embedding_293.tmp_0@GRAD" - slot_gradient: "embedding_294.tmp_0@GRAD" - slot_gradient: "embedding_295.tmp_0@GRAD" - slot_gradient: "embedding_296.tmp_0@GRAD" - slot_gradient: "embedding_297.tmp_0@GRAD" - slot_gradient: "embedding_298.tmp_0@GRAD" - slot_gradient: "embedding_299.tmp_0@GRAD" - slot_gradient: "embedding_300.tmp_0@GRAD" - slot_gradient: "embedding_301.tmp_0@GRAD" - slot_gradient: "embedding_302.tmp_0@GRAD" - slot_gradient: "embedding_303.tmp_0@GRAD" - slot_gradient: "embedding_304.tmp_0@GRAD" - slot_gradient: "embedding_305.tmp_0@GRAD" - slot_gradient: "embedding_306.tmp_0@GRAD" - slot_gradient: "embedding_307.tmp_0@GRAD" - slot_gradient: "embedding_308.tmp_0@GRAD" - slot_gradient: "embedding_309.tmp_0@GRAD" - slot_gradient: "embedding_310.tmp_0@GRAD" - slot_gradient: "embedding_311.tmp_0@GRAD" - slot_gradient: "embedding_312.tmp_0@GRAD" - slot_gradient: "embedding_313.tmp_0@GRAD" - slot_gradient: "embedding_314.tmp_0@GRAD" - slot_gradient: "embedding_315.tmp_0@GRAD" - slot_gradient: "embedding_316.tmp_0@GRAD" - slot_gradient: "embedding_317.tmp_0@GRAD" - slot_gradient: "embedding_318.tmp_0@GRAD" - slot_gradient: "embedding_319.tmp_0@GRAD" - slot_gradient: "embedding_320.tmp_0@GRAD" - slot_gradient: "embedding_321.tmp_0@GRAD" - slot_gradient: "embedding_322.tmp_0@GRAD" - slot_gradient: "embedding_323.tmp_0@GRAD" - slot_gradient: "embedding_324.tmp_0@GRAD" - slot_gradient: "embedding_325.tmp_0@GRAD" - slot_gradient: "embedding_326.tmp_0@GRAD" - slot_gradient: "embedding_327.tmp_0@GRAD" - slot_gradient: "embedding_328.tmp_0@GRAD" - slot_gradient: "embedding_329.tmp_0@GRAD" - slot_gradient: "embedding_330.tmp_0@GRAD" - slot_gradient: "embedding_331.tmp_0@GRAD" - slot_gradient: "embedding_332.tmp_0@GRAD" - slot_gradient: "embedding_333.tmp_0@GRAD" - slot_gradient: "embedding_334.tmp_0@GRAD" - slot_gradient: "embedding_335.tmp_0@GRAD" - slot_gradient: "embedding_336.tmp_0@GRAD" - slot_gradient: "embedding_337.tmp_0@GRAD" - slot_gradient: "embedding_338.tmp_0@GRAD" - slot_gradient: "embedding_339.tmp_0@GRAD" - slot_gradient: "embedding_340.tmp_0@GRAD" - slot_gradient: "embedding_341.tmp_0@GRAD" - slot_gradient: "embedding_342.tmp_0@GRAD" - slot_gradient: "embedding_343.tmp_0@GRAD" - slot_gradient: "embedding_344.tmp_0@GRAD" - slot_gradient: "embedding_345.tmp_0@GRAD" - slot_gradient: "embedding_346.tmp_0@GRAD" - slot_gradient: "embedding_347.tmp_0@GRAD" - slot_gradient: "embedding_348.tmp_0@GRAD" - slot_gradient: "embedding_349.tmp_0@GRAD" - slot_gradient: "embedding_350.tmp_0@GRAD" - slot_gradient: "embedding_351.tmp_0@GRAD" - slot_gradient: "embedding_352.tmp_0@GRAD" - slot_gradient: "embedding_353.tmp_0@GRAD" - slot_gradient: "embedding_354.tmp_0@GRAD" - slot_gradient: "embedding_355.tmp_0@GRAD" - slot_gradient: "embedding_356.tmp_0@GRAD" - slot_gradient: "embedding_357.tmp_0@GRAD" - slot_gradient: "embedding_358.tmp_0@GRAD" - slot_gradient: "embedding_359.tmp_0@GRAD" - slot_gradient: "embedding_360.tmp_0@GRAD" - slot_gradient: "embedding_361.tmp_0@GRAD" - slot_gradient: "embedding_362.tmp_0@GRAD" - slot_gradient: "embedding_363.tmp_0@GRAD" - slot_gradient: "embedding_364.tmp_0@GRAD" - slot_gradient: "embedding_365.tmp_0@GRAD" - slot_gradient: "embedding_366.tmp_0@GRAD" - slot_gradient: "embedding_367.tmp_0@GRAD" - slot_gradient: "embedding_368.tmp_0@GRAD" - slot_gradient: "embedding_369.tmp_0@GRAD" - slot_gradient: "embedding_370.tmp_0@GRAD" - slot_gradient: "embedding_371.tmp_0@GRAD" - slot_gradient: "embedding_372.tmp_0@GRAD" - slot_gradient: "embedding_373.tmp_0@GRAD" - slot_gradient: "embedding_374.tmp_0@GRAD" - slot_gradient: "embedding_375.tmp_0@GRAD" - slot_gradient: "embedding_376.tmp_0@GRAD" - slot_gradient: "embedding_377.tmp_0@GRAD" - slot_gradient: "embedding_378.tmp_0@GRAD" - slot_gradient: "embedding_379.tmp_0@GRAD" - slot_gradient: "embedding_380.tmp_0@GRAD" - slot_gradient: "embedding_381.tmp_0@GRAD" - slot_gradient: "embedding_382.tmp_0@GRAD" - slot_gradient: "embedding_383.tmp_0@GRAD" - slot_gradient: "embedding_384.tmp_0@GRAD" - slot_gradient: "embedding_385.tmp_0@GRAD" - slot_gradient: "embedding_386.tmp_0@GRAD" - slot_gradient: "embedding_387.tmp_0@GRAD" - slot_gradient: "embedding_388.tmp_0@GRAD" - slot_gradient: "embedding_389.tmp_0@GRAD" - slot_gradient: "embedding_390.tmp_0@GRAD" - slot_gradient: "embedding_391.tmp_0@GRAD" - slot_gradient: "embedding_392.tmp_0@GRAD" - slot_gradient: "embedding_393.tmp_0@GRAD" - slot_gradient: "embedding_394.tmp_0@GRAD" - slot_gradient: "embedding_395.tmp_0@GRAD" - slot_gradient: "embedding_396.tmp_0@GRAD" - slot_gradient: "embedding_397.tmp_0@GRAD" - slot_gradient: "embedding_398.tmp_0@GRAD" - slot_gradient: "embedding_399.tmp_0@GRAD" - slot_gradient: "embedding_400.tmp_0@GRAD" - slot_gradient: "embedding_401.tmp_0@GRAD" - slot_gradient: "embedding_402.tmp_0@GRAD" - slot_gradient: "embedding_403.tmp_0@GRAD" - slot_gradient: "embedding_404.tmp_0@GRAD" - slot_gradient: "embedding_405.tmp_0@GRAD" - slot_gradient: "embedding_406.tmp_0@GRAD" - } - skip_op: "lookup_table" - skip_op: "lookup_table_grad" -} -fs_client_param { - uri: "afs://xingtian.afs.baidu.com:9902" - user: "mlarch_pro" - passwd: "proisvip" - hadoop_bin: "$HADOOP_HOME/bin/hadoop" -} diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/scripts/xbox_compressor_mf.py b/feed/feed_deploy/news_jingpai/package/my_nets/scripts/xbox_compressor_mf.py deleted file mode 100755 index b306ddfeb183515c7652b2f0d08cbe98f95033b4..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/scripts/xbox_compressor_mf.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/python -""" -xbox model compressor -""" - -import sys -import math -import time -import re - -#WISE -#SHOW_COMPRESS_RATIO : 8192 -#CLICK_COMPRESS_RATIO : 8192 -#LR_COMPRESS_RATIO : 1048576 -#MIO_COMPRESS_RATIO:8192 - -#PC -#MIO_COMPRESS_RATIO : 1024 -#SHOW_COMPRESS_RATIO : 128 -#CLICK_COMPRESS_RATIO : 1024 -#LR_COMPRESS_RATIO : 8192 - -#STAMP_COL = 2 -SHOW_COL = 3 -CLICK_COL = 4 -LR_W_COL = 5 -LR_G2SUM_COL = 6 -FM_COL = 9 - -#DAY_SPAN = 300 - -#show clk lr = float -SHOW_RATIO = 1 -#SHOW_RATIO = 1024 -CLK_RATIO = 8 -#CLK_RATIO = 1024 -LR_RATIO = 1024 -MF_RATIO = 1024 - -base_update_threshold=0.965 -base_xbox_clk_cof=1 -base_xbox_nonclk_cof=0.2 - -def as_num(x): - y='{:.5f}'.format(x) - return(y) - -def compress_show(xx): - """ - compress show - """ - preci = SHOW_RATIO - - x = float(xx) - return str(int(math.floor(x * preci + 0.5))) - - -def compress_clk(xx): - """ - compress clk - """ - preci = CLK_RATIO - - x = float(xx) - clk = int(math.floor(x * preci + 0.5)) - if clk == 0: - return "" - return str(clk) - - -def compress_lr(xx): - """ - compress lr - """ - preci = LR_RATIO - - x = float(xx) - lr = int(math.floor(x * preci + 0.5)) - if lr == 0: - return "" - return str(lr) - -def compress_mf(xx): - """ - compress mf - """ - preci = MF_RATIO - - x = float(xx) - return int(math.floor(x * preci + 0.5)) - - -def show_clk_score(show, clk): - """ - calculate show_clk score - """ - return (show - clk) * 0.2 + clk - - -for l in sys.stdin: - cols = re.split(r'\s+', l.strip()) - key = cols[0].strip() - - #day = int(cols[STAMP_COL].strip()) - #cur_day = int(time.time()/3600/24) - #if (day + DAY_SPAN) <= cur_day: - # continue - - # cvm features - show = cols[SHOW_COL] - click = cols[CLICK_COL] - pred = "" - - f_show = float(show) - f_clk = float(click) - """ - if f_show != 0: - show_log = math.log(f_show) - else: - show_log = 0 - - if f_clk != 0: - click_log = math.log(f_clk) - show_log - else: - click_log = 0 - """ - show_log = f_show - click_log = f_clk - #print f_show, f_clk - #if show_clk_score(f_show, f_clk) < base_update_threshold: - # continue - - #show = compress_show(show) - show = compress_show(show_log) - #clk = compress_clk(click) - clk = compress_clk(click_log) - - # personal lr weight - lr_w = cols[LR_W_COL].strip() - lr_wei = compress_lr(lr_w) - - # fm weight - fm_wei = [] - fm_sum = 0 - if len(cols) > 7: - #fm_dim = int(cols[FM_COL].strip()) - #if fm_dim != 0: - for v in xrange(FM_COL, len(cols), 1): - mf_v = compress_mf(cols[v]) - #print mf_v - fm_wei.append(str(mf_v)) - fm_sum += (mf_v * mf_v) - - sys.stdout.write("%s\t%s\t%s\t%s" % (key, show, clk, pred)) - sys.stdout.write("\t") - sys.stdout.write("%s" % lr_wei) - if len(fm_wei) > 0 and fm_sum > 0: - sys.stdout.write("\t%s" % "\t".join(fm_wei)) - else: - sys.stdout.write("\t[\t]") - sys.stdout.write("\n") - diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/scripts/xbox_decompressor_mf.awk b/feed/feed_deploy/news_jingpai/package/my_nets/scripts/xbox_decompressor_mf.awk deleted file mode 100755 index 080e84419bc47675cb46a725b4e94480cd3da920..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/scripts/xbox_decompressor_mf.awk +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/awk -f -{ - OFS="\t"; - SHOW_RATIO = 1; - CLK_RATIO = 8; - LR_RATIO = 1024; - MF_RATIO = 1024; -} - -function decompress_show(x) { - x = x * 1.0 / SHOW_RATIO; - return x; -} - -function decompress_clk(x) { - if (x == "") { - x = 0; - } - x = x * 1.0 / CLK_RATIO; - return x; -} - -function decompress_lr(x) { - return x * 1.0 / LR_RATIO; -} - -function decompress_mf(x) { - return x * 1.0 / MF_RATIO; -} - -function show_clk_sore(show, clk, nonclk_coeff, clk_coeff) { - return (show - clk) * nonclk_coeff + clk * clk_coeff; -} - -#key, show, clk, pred, lr_w, mf_w or [\t] -{ - l=split($0, a, "\t"); - - show = decompress_show(a[2]); - click = decompress_clk(a[3]); - lr = decompress_lr(a[5]); - printf("%s\t0\t0\t%s\t%s\t%s\t0\t", a[1], show, click, lr); - if (l == 7) { - printf("\n"); - } else { - printf("%d", l-5) - for(i = 6; i <= l; i++) { - printf("\t%s", decompress_mf(a[i])); - } - printf("\n"); - } -} diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/slot/slot b/feed/feed_deploy/news_jingpai/package/my_nets/slot/slot deleted file mode 100644 index dd6723ffb39ee17c44e0119c96d9481bd3ce98ef..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/slot/slot +++ /dev/null @@ -1,407 +0,0 @@ -6048 -6002 -6145 -6202 -6201 -6121 -6738 -6119 -6146 -6120 -6147 -6122 -6123 -6118 -6142 -6143 -6008 -6148 -6151 -6127 -6144 -6094 -6083 -6952 -6739 -6150 -6109 -6003 -6099 -6149 -6129 -6203 -6153 -6152 -6128 -6106 -6251 -7082 -7515 -6951 -6949 -7080 -6066 -7507 -6186 -6007 -7514 -6125 -7506 -10001 -6006 -7023 -6085 -10000 -6098 -6250 -6110 -6124 -6090 -6082 -6067 -6101 -6004 -6191 -7075 -6948 -6157 -6126 -6188 -7077 -6070 -6111 -6087 -6103 -6107 -6194 -6156 -6005 -6247 -6814 -6158 -7122 -6058 -6189 -7058 -6059 -6115 -7079 -7081 -6833 -7024 -6108 -13342 -13345 -13412 -13343 -13350 -13346 -13409 -6009 -6011 -6012 -6013 -6014 -6015 -6019 -6023 -6024 -6027 -6029 -6031 -6050 -6060 -6068 -6069 -6089 -6095 -6105 -6112 -6130 -6131 -6132 -6134 -6161 -6162 -6163 -6166 -6182 -6183 -6185 -6190 -6212 -6213 -6231 -6233 -6234 -6236 -6238 -6239 -6240 -6241 -6242 -6243 -6244 -6245 -6354 -7002 -7005 -7008 -7010 -7013 -7015 -7019 -7020 -7045 -7046 -7048 -7049 -7052 -7054 -7056 -7064 -7066 -7076 -7078 -7083 -7084 -7085 -7086 -7087 -7088 -7089 -7090 -7099 -7100 -7101 -7102 -7103 -7104 -7105 -7109 -7124 -7126 -7136 -7142 -7143 -7144 -7145 -7146 -7147 -7148 -7150 -7151 -7152 -7153 -7154 -7155 -7156 -7157 -7047 -7050 -6257 -6259 -6260 -6261 -7170 -7185 -7186 -6751 -6755 -6757 -6759 -6760 -6763 -6764 -6765 -6766 -6767 -6768 -6769 -6770 -7502 -7503 -7504 -7505 -7510 -7511 -7512 -7513 -6806 -6807 -6808 -6809 -6810 -6811 -6812 -6813 -6815 -6816 -6817 -6819 -6823 -6828 -6831 -6840 -6845 -6875 -6879 -6881 -6888 -6889 -6947 -6950 -6956 -6957 -6959 -10006 -10008 -10009 -10010 -10011 -10016 -10017 -10018 -10019 -10020 -10021 -10022 -10023 -10024 -10029 -10030 -10031 -10032 -10033 -10034 -10035 -10036 -10037 -10038 -10039 -10040 -10041 -10042 -10044 -10045 -10046 -10051 -10052 -10053 -10054 -10055 -10056 -10057 -10060 -10066 -10069 -6820 -6821 -6822 -13333 -13334 -13335 -13336 -13337 -13338 -13339 -13340 -13341 -13351 -13352 -13353 -13359 -13361 -13362 -13363 -13366 -13367 -13368 -13369 -13370 -13371 -13375 -13376 -5700 -5702 -13400 -13401 -13402 -13403 -13404 -13406 -13407 -13408 -13410 -13417 -13418 -13419 -13420 -13422 -13425 -13427 -13428 -13429 -13430 -13431 -13433 -13434 -13436 -13437 -13326 -13330 -13331 -5717 -13442 -13451 -13452 -13455 -13456 -13457 -13458 -13459 -13460 -13461 -13462 -13463 -13464 -13465 -13466 -13467 -13468 -1104 -1106 -1107 -1108 -1109 -1110 -1111 -1112 -1113 -1114 -1115 -1116 -1117 -1119 -1120 -1121 -1122 -1123 -1124 -1125 -1126 -1127 -1128 -1129 -13812 -13813 -6740 -1490 -32915 -32950 -32952 -32953 -32954 -33077 -33085 -33086 diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/slot/slot_common b/feed/feed_deploy/news_jingpai/package/my_nets/slot/slot_common deleted file mode 100644 index 869fb695282eed4a69928e7af52dd49a62e0d4c6..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/slot/slot_common +++ /dev/null @@ -1,99 +0,0 @@ -6048 -6002 -6145 -6202 -6201 -6121 -6738 -6119 -6146 -6120 -6147 -6122 -6123 -6118 -6142 -6143 -6008 -6148 -6151 -6127 -6144 -6094 -6083 -6952 -6739 -6150 -6109 -6003 -6099 -6149 -6129 -6203 -6153 -6152 -6128 -6106 -6251 -7082 -7515 -6951 -6949 -7080 -6066 -7507 -6186 -6007 -7514 -6125 -7506 -10001 -6006 -7023 -6085 -10000 -6098 -6250 -6110 -6124 -6090 -6082 -6067 -6101 -6004 -6191 -7075 -6948 -6157 -6126 -6188 -7077 -6070 -6111 -6087 -6103 -6107 -6194 -6156 -6005 -6247 -6814 -6158 -7122 -6058 -6189 -7058 -6059 -6115 -7079 -7081 -6833 -7024 -6108 -13342 -13345 -13412 -13343 -13350 -13346 -13409 diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/tmp/slot/slot b/feed/feed_deploy/news_jingpai/package/my_nets/tmp/slot/slot deleted file mode 100644 index 3e91b42e36e3bef406efc31c50a997ea7dc58f86..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/tmp/slot/slot +++ /dev/null @@ -1,408 +0,0 @@ -6048 -6002 -6145 -6202 -6201 -6121 -6738 -6119 -6146 -6120 -6147 -6122 -6123 -6118 -6142 -6143 -6008 -6148 -6151 -6127 -6144 -6094 -6083 -6952 -6739 -6150 -6109 -6003 -6099 -6149 -6129 -6203 -6153 -6152 -6128 -6106 -6251 -7082 -7515 -6951 -6949 -7080 -6066 -7507 -6186 -6007 -7514 -6125 -7506 -10001 -6006 -7023 -6085 -10000 -6098 -6250 -6110 -6124 -6090 -6082 -6067 -6101 -6004 -6191 -7075 -6948 -6157 -6126 -6188 -7077 -6070 -6111 -6087 -6103 -6107 -6194 -6156 -6005 -6247 -6814 -6158 -7122 -6058 -6189 -7058 -6059 -6115 -7079 -7081 -6833 -7024 -6108 -13342 -13345 -13412 -13343 -13350 -13346 -13409 -6009 -6011 -6012 -6013 -6014 -6015 -6019 -6023 -6024 -6027 -6029 -6031 -6050 -6060 -6068 -6069 -6089 -6095 -6105 -6112 -6130 -6131 -6132 -6134 -6161 -6162 -6163 -6166 -6182 -6183 -6185 -6190 -6212 -6213 -6231 -6233 -6234 -6236 -6238 -6239 -6240 -6241 -6242 -6243 -6244 -6245 -6354 -7002 -7005 -7008 -7010 -7012 -7013 -7015 -7016 -7017 -7018 -7019 -7020 -7045 -7046 -7048 -7049 -7052 -7054 -7056 -7064 -7066 -7076 -7078 -7083 -7084 -7085 -7086 -7087 -7088 -7089 -7090 -7099 -7100 -7101 -7102 -7103 -7104 -7105 -7109 -7124 -7126 -7136 -7142 -7143 -7144 -7145 -7146 -7147 -7148 -7150 -7151 -7152 -7153 -7154 -7155 -7156 -7157 -7047 -7050 -6253 -6254 -6255 -6256 -6257 -6259 -6260 -6261 -7170 -7185 -7186 -6751 -6755 -6757 -6759 -6760 -6763 -6764 -6765 -6766 -6767 -6768 -6769 -6770 -7502 -7503 -7504 -7505 -7510 -7511 -7512 -7513 -6806 -6807 -6808 -6809 -6810 -6811 -6812 -6813 -6815 -6816 -6817 -6819 -6823 -6828 -6831 -6840 -6845 -6875 -6879 -6881 -6888 -6889 -6947 -6950 -6956 -6957 -6959 -10006 -10008 -10009 -10010 -10011 -10016 -10017 -10018 -10019 -10020 -10021 -10022 -10023 -10024 -10029 -10030 -10031 -10032 -10033 -10034 -10035 -10036 -10037 -10038 -10039 -10040 -10041 -10042 -10044 -10045 -10046 -10051 -10052 -10053 -10054 -10055 -10056 -10057 -10060 -10066 -10069 -6820 -6821 -6822 -13333 -13334 -13335 -13336 -13337 -13338 -13339 -13340 -13341 -13351 -13352 -13353 -13359 -13361 -13362 -13363 -13366 -13367 -13368 -13369 -13370 -13371 -13375 -13376 -5700 -5702 -13400 -13401 -13402 -13403 -13404 -13406 -13407 -13408 -13410 -13417 -13418 -13419 -13420 -13422 -13425 -13427 -13428 -13429 -13430 -13431 -13433 -13434 -13436 -13437 -13326 -13330 -13331 -5717 -13442 -13451 -13452 -13455 -13456 -13457 -13458 -13459 -13460 -13461 -13462 -13463 -13464 -13465 -13466 -13467 -13468 -1104 -1106 -1107 -1108 -1109 -1110 -1111 -1112 -1113 -1114 -1115 -1116 -1117 -1119 -1120 -1121 -1122 -1123 -1124 -1125 -1126 -1127 -1128 -1129 -13812 -13813 -6740 -1490 -1491 diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/tmp/slot/slot_common b/feed/feed_deploy/news_jingpai/package/my_nets/tmp/slot/slot_common deleted file mode 100644 index 869fb695282eed4a69928e7af52dd49a62e0d4c6..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/tmp/slot/slot_common +++ /dev/null @@ -1,99 +0,0 @@ -6048 -6002 -6145 -6202 -6201 -6121 -6738 -6119 -6146 -6120 -6147 -6122 -6123 -6118 -6142 -6143 -6008 -6148 -6151 -6127 -6144 -6094 -6083 -6952 -6739 -6150 -6109 -6003 -6099 -6149 -6129 -6203 -6153 -6152 -6128 -6106 -6251 -7082 -7515 -6951 -6949 -7080 -6066 -7507 -6186 -6007 -7514 -6125 -7506 -10001 -6006 -7023 -6085 -10000 -6098 -6250 -6110 -6124 -6090 -6082 -6067 -6101 -6004 -6191 -7075 -6948 -6157 -6126 -6188 -7077 -6070 -6111 -6087 -6103 -6107 -6194 -6156 -6005 -6247 -6814 -6158 -7122 -6058 -6189 -7058 -6059 -6115 -7079 -7081 -6833 -7024 -6108 -13342 -13345 -13412 -13343 -13350 -13346 -13409 diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/tmp/slot/to.py b/feed/feed_deploy/news_jingpai/package/my_nets/tmp/slot/to.py deleted file mode 100644 index 638c53647dc2adc1d502ed53630f07dbcfe8ffce..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/tmp/slot/to.py +++ /dev/null @@ -1,5 +0,0 @@ -with open("session_slot", "r") as fin: - res = [] - for i in fin: - res.append("\"" + i.strip() + "\"") - print ", ".join(res) diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/trainer_online.py b/feed/feed_deploy/news_jingpai/package/my_nets/trainer_online.py deleted file mode 100644 index 8f29b42cce434085b0d4e3a969d7d6657e19d109..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/trainer_online.py +++ /dev/null @@ -1,593 +0,0 @@ -import numpy as np -import os -import sys -import paddle -import paddle.fluid as fluid -import threading -import time -import config -from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet -from paddle.fluid.incubate.fleet.utils.fleet_util import FleetUtil -from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient -from model_new import Model -from model_new_jc import ModelJoinCommon -import util -from util import * - -fleet_util = FleetUtil() - -def time_prefix_str(): - return "\n" + time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()) + "[0]:" - -auc_record = {} -def check_auc_ok(auc_label, auc_log, auc_alarm): - auc_datas = auc_log.split(' AUC=') - if len(auc_datas) < 2: - return True - if auc_label not in auc_record: - auc_record[auc_label] = 0.0 - auc = float(auc_datas[1].split(' ')[0]) - if auc < auc_record[auc_label] and auc < auc_alarm: - fleet_util.rank0_print("label:%s, auc:%s, check bad" % (auc_label, auc)) - return False - auc_record[auc_label] = auc - fleet_util.rank0_print("label:%s, auc:%s, check ok" % (auc_label, auc)) - return True - -def create_model(slot_file, slot_common_file, all_slot_file): - join_common_model = ModelJoinCommon(slot_file, slot_common_file, all_slot_file, 20) - update_model = Model(slot_file, all_slot_file, False, 0, True) - with open("join_common_main_program.pbtxt", "w") as fout: - print >> fout, join_common_model._train_program - with open("join_common_startup_program.pbtxt", "w") as fout: - print >> fout, join_common_model._startup_program - with open("update_main_program.pbtxt", "w") as fout: - print >> fout, update_model._train_program - with open("update_startup_program.pbtxt", "w") as fout: - print >> fout, update_model._startup_program - return [join_common_model, update_model] - -def create_dataset(use_var_list, my_filelist): - dataset = fluid.DatasetFactory().create_dataset(config.dataset_type) - dataset.set_batch_size(config.batch_size) - dataset.set_thread(config.thread_num) - dataset.set_hdfs_config(config.fs_name, config.fs_ugi) - dataset.set_pipe_command(config.pipe_command) - dataset.set_filelist(my_filelist) - dataset.set_use_var(use_var_list) - #dataset.set_fleet_send_sleep_seconds(2) - #dataset.set_fleet_send_batch_size(80000) - return dataset - -def hdfs_ls(path): - configs = { - "fs.default.name": config.fs_name, - "hadoop.job.ugi": config.fs_ugi - } - hdfs_client = HDFSClient("$HADOOP_HOME", configs) - filelist = [] - for i in path: - cur_path = hdfs_client.ls(i) - if config.fs_name.startswith("hdfs:"): - cur_path = ["hdfs:" + j for j in cur_path] - elif config.fs_name.startswith("afs:"): - cur_path = ["afs:" + j for j in cur_path] - filelist += cur_path - return filelist - -def get_avg_cost_mins(value): - t1 = time.time() - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - t2 = time.time() - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost) - t3 = time.time() - avg_cost = float(global_cost[0]) / fleet.worker_num() - avg_cost /= 60.0 - t4 = time.time() - tc = (t2 - t1 + t4 - t3) / 60.0 - tb = (t3 - t2) / 60.0 - fleet_util.rank0_print("get_avg_cost_mins calc time %s barrier time %s" % (tc, tb)) - return avg_cost - -def get_max_cost_mins(value): - from mpi4py import MPI - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost, op=MPI.MAX) - fleet_util.rank0_print("max train time %s mins" % (float(global_cost[0]) / 60.0)) - -def get_min_cost_mins(value): - from mpi4py import MPI - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost, op=MPI.MIN) - fleet_util.rank0_print("min train time %s mins" % (float(global_cost[0]) / 60.0)) - -def get_data_max(value): - from mpi4py import MPI - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost, op=MPI.MAX) - fleet_util.rank0_print("data size max %s" % global_cost[0]) - -def get_data_min(value): - from mpi4py import MPI - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost, op=MPI.MIN) - fleet_util.rank0_print("data size min %s" % global_cost[0]) - -def clear_metrics(fleet_util, model, scope): - fleet_util.set_zero(model.stat_pos.name, scope) - fleet_util.set_zero(model.stat_neg.name, scope) - fleet_util.set_zero(model.batch_stat_pos.name, scope) - fleet_util.set_zero(model.batch_stat_neg.name, scope) - fleet_util.set_zero(model.abserr.name, scope, param_type="float32") - fleet_util.set_zero(model.sqrerr.name, scope, param_type="float32") - fleet_util.set_zero(model.prob.name, scope, param_type="float32") - fleet_util.set_zero(model.q.name, scope, param_type="float32") - fleet_util.set_zero(model.pos.name, scope, param_type="float32") - fleet_util.set_zero(model.total.name, scope, param_type="float32") - -def clear_metrics_2(fleet_util, model, scope): - fleet_util.set_zero(model.join_stat_pos.name, scope) - fleet_util.set_zero(model.join_stat_neg.name, scope) - fleet_util.set_zero(model.join_batch_stat_pos.name, scope) - fleet_util.set_zero(model.join_batch_stat_neg.name, scope) - fleet_util.set_zero(model.join_abserr.name, scope, param_type="float32") - fleet_util.set_zero(model.join_sqrerr.name, scope, param_type="float32") - fleet_util.set_zero(model.join_prob.name, scope, param_type="float32") - fleet_util.set_zero(model.join_q.name, scope, param_type="float32") - fleet_util.set_zero(model.join_pos.name, scope, param_type="float32") - fleet_util.set_zero(model.join_total.name, scope, param_type="float32") - - fleet_util.set_zero(model.common_stat_pos.name, scope) - fleet_util.set_zero(model.common_stat_neg.name, scope) - fleet_util.set_zero(model.common_batch_stat_pos.name, scope) - fleet_util.set_zero(model.common_batch_stat_neg.name, scope) - fleet_util.set_zero(model.common_abserr.name, scope, param_type="float32") - fleet_util.set_zero(model.common_sqrerr.name, scope, param_type="float32") - fleet_util.set_zero(model.common_prob.name, scope, param_type="float32") - fleet_util.set_zero(model.common_q.name, scope, param_type="float32") - fleet_util.set_zero(model.common_pos.name, scope, param_type="float32") - fleet_util.set_zero(model.common_total.name, scope, param_type="float32") - -def save_delta(day, pass_index, xbox_base_key, cur_path, exe, scope_join, scope_common, scope_update, join_model, - join_common_model, update_model, join_save_params, common_save_params, update_save_params, monitor_data): - stdout_str = "" - fleet_util.rank0_print("begin save delta model") - begin = time.time() - if pass_index == -1: - fleet_util.save_xbox_base_model(config.output_path, day) - else: - fleet_util.save_delta_model(config.output_path, day, pass_index) - end = time.time() - fleet_util.save_paddle_params(exe, scope_join, join_model._train_program, "paddle_dense.model.0", - config.output_path, day, pass_index, config.fs_name, config.fs_ugi, - var_names=join_save_params) - fleet_util.save_paddle_params(exe, scope_common, join_common_model._train_program, "paddle_dense.model.1", - config.output_path, day, pass_index, config.fs_name, config.fs_ugi, - var_names=common_save_params) - fleet_util.save_paddle_params(exe, scope_update, update_model._train_program, "paddle_dense.model.2", - config.output_path, day, pass_index, config.fs_name, config.fs_ugi, - var_names=update_save_params) - log_str = "end save delta cost %s min" % ((end - begin) / 60.0) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - fleet_util.rank0_print("begin save cache") - begin = time.time() - if pass_index == -1: - key_num = fleet_util.save_cache_base_model(config.output_path, day) - else: - key_num = fleet_util.save_cache_model(config.output_path, day, pass_index) - fleet_util.write_cache_donefile(config.output_path, day, pass_index, key_num, config.fs_name, config.fs_ugi) - end = time.time() - log_str = "end save cache cost %s min, key_num=%s" % ((end - begin) / 60.0, key_num) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - write_xbox_donefile(day, pass_index, xbox_base_key, ",".join(cur_path), monitor_data=monitor_data) - return stdout_str - -if __name__ == "__main__": - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - fleet.init(exe) - - slot_file = "slot/slot" - slot_common_file = "slot/slot_common" - all_slot_file = "all_slot.dict" - - join_common_model, update_model = create_model(slot_file, slot_common_file, all_slot_file) - - scope2 = fluid.Scope() - scope3 = fluid.Scope() - - adjust_ins_weight = { "need_adjust" : True, "nid_slot" : "6002", "nid_adjw_threshold" : 1000, "nid_adjw_ratio": 20, - "ins_weight_slot": update_model.ins_weight.name } - - thread_stat_var_names = [] - thread_stat_var_names.append(join_common_model.join_stat_pos.name) - thread_stat_var_names.append(join_common_model.join_stat_neg.name) - thread_stat_var_names.append(join_common_model.join_sqrerr.name) - thread_stat_var_names.append(join_common_model.join_abserr.name) - thread_stat_var_names.append(join_common_model.join_prob.name) - thread_stat_var_names.append(join_common_model.join_q.name) - thread_stat_var_names.append(join_common_model.join_pos.name) - thread_stat_var_names.append(join_common_model.join_total.name) - - thread_stat_var_names.append(join_common_model.common_stat_pos.name) - thread_stat_var_names.append(join_common_model.common_stat_neg.name) - thread_stat_var_names.append(join_common_model.common_sqrerr.name) - thread_stat_var_names.append(join_common_model.common_abserr.name) - thread_stat_var_names.append(join_common_model.common_prob.name) - thread_stat_var_names.append(join_common_model.common_q.name) - thread_stat_var_names.append(join_common_model.common_pos.name) - thread_stat_var_names.append(join_common_model.common_total.name) - - thread_stat_var_names.append(update_model.stat_pos.name) - thread_stat_var_names.append(update_model.stat_neg.name) - thread_stat_var_names.append(update_model.sqrerr.name) - thread_stat_var_names.append(update_model.abserr.name) - thread_stat_var_names.append(update_model.prob.name) - thread_stat_var_names.append(update_model.q.name) - thread_stat_var_names.append(update_model.pos.name) - thread_stat_var_names.append(update_model.total.name) - - thread_stat_var_names = list(set(thread_stat_var_names)) - - - adam = fluid.optimizer.Adam(learning_rate=0.000005) - adam = fleet.distributed_optimizer(adam, strategy={"use_cvm" : True, "adjust_ins_weight" : adjust_ins_weight, "scale_datanorm" : 1e-4, "dump_slot": True, "stat_var_names": thread_stat_var_names, "fleet_desc_file": "reqi_fleet_desc"}) - adam.minimize([join_common_model.joint_cost, update_model.avg_cost], [scope2, scope3]) - - join_common_model._train_program._fleet_opt["program_configs"][str(id(join_common_model.joint_cost.block.program))]["push_sparse"] = [] - - join_save_params = ["join.batch_size", "join.batch_sum", "join.batch_square_sum", - "join_0.w_0", "join_0.b_0", "join_1.w_0", "join_1.b_0", "join_2.w_0", "join_2.b_0", - "join_3.w_0", "join_3.b_0", "join_4.w_0", "join_4.b_0", "join_5.w_0", "join_5.b_0", - "join_6.w_0", "join_6.b_0", "join_7.w_0", "join_7.b_0"] - common_save_params = ["common.batch_size", "common.batch_sum", "common.batch_square_sum", - "common_0.w_0", "common_0.b_0", "common_1.w_0", "common_1.b_0", "common_2.w_0", "common_2.b_0", - "common_3.w_0", "common_3.b_0", "common_4.w_0", "common_4.b_0", "common_5.w_0", "common_5.b_0", - "common_6.w_0", "common_6.b_0", "common_7.w_0", "common_7.b_0"] - update_save_params = ["fc_0.w_0", "fc_0.b_0", "fc_1.w_0", "fc_1.b_0", - "fc_2.w_0", "fc_2.b_0", "fc_3.w_0", "fc_3.b_0", - "fc_4.w_0", "fc_4.b_0", "fc_5.w_0", "fc_5.b_0"] - - if fleet.is_server(): - fleet.run_server() - elif fleet.is_worker(): - with fluid.scope_guard(scope3): - exe.run(update_model._startup_program) - with fluid.scope_guard(scope2): - exe.run(join_common_model._startup_program) - - configs = { - "fs.default.name": config.fs_name, - "hadoop.job.ugi": config.fs_ugi - } - hdfs_client = HDFSClient("$HADOOP_HOME", configs) - - save_first_base = config.save_first_base - path = config.train_data_path - online_pass_interval = fleet_util.get_online_pass_interval(config.days, config.hours, config.split_interval, config.split_per_pass, False) - pass_per_day = len(online_pass_interval) - last_day, last_pass, last_path, xbox_base_key = fleet_util.get_last_save_model(config.output_path, config.fs_name, config.fs_ugi) - reqi = True if last_day != -1 else False - - if config.need_reqi_changeslot and config.reqi_dnn_plugin_day >= last_day and config.reqi_dnn_plugin_pass >= last_pass: - util.reqi_changeslot(config.hdfs_dnn_plugin_path, join_save_params, common_save_params, update_save_params, scope2, scope3) - fleet.init_worker() - - dataset = None - next_dataset = None - cur_path = None - next_path = None - start_train = False - days = os.popen("echo -n " + config.days).read().split(" ") - hours = os.popen("echo -n " + config.hours).read().split(" ") - stdout_str = "" - begin_days = {} - for day_index in range(len(days)): - day = days[day_index] - if last_day != -1 and int(day) < last_day: - continue - for pass_index in range(1, pass_per_day + 1): - dataset = next_dataset - next_dataset = None - cur_path = next_path - next_path = None - if (last_day != -1 and int(day) == last_day) and (last_pass != -1 and int(pass_index) < last_pass): - continue - if reqi: - begin = time.time() - log_str = "going to load model %s" % last_path - fleet_util.rank0_print(log_str) - if config.need_reqi_changeslot and config.reqi_dnn_plugin_day >= last_day and config.reqi_dnn_plugin_pass >= last_pass: - fleet.load_one_table(0, last_path) - else: - fleet_util.load_fleet_model(last_path) - - end = time.time() - log_str = "load model cost %s min" % ((end - begin) / 60.0) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - reqi = False - if (last_day != -1 and int(day) == last_day) and (last_pass != -1 and int(pass_index) == last_pass): - continue - - #log_str = "===========going to train day/pass %s/%s===========" % (day, pass_index) - - if begin_days.get(day) is None: - log_str = "======== BEGIN DAY:%s ========" % day - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - begin_days[day] = True - - log_str = " ==== begin delta:%s ========" % pass_index - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - - if save_first_base: - log_str = "save_first_base=True" - fleet_util.rank0_print(log_str) - save_first_base = False - last_base_day, last_base_path, tmp_xbox_base_key = \ - fleet_util.get_last_save_xbox_base(config.output_path, config.fs_name, config.fs_ugi) - if int(day) > last_base_day: - log_str = "going to save xbox base model" - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - xbox_base_key = int(time.time()) - cur = [] - for interval in online_pass_interval[pass_index - 1]: - for p in path: - cur.append(p + "/" + day + "/" + interval) - stdout_str += save_delta(day, -1, xbox_base_key, cur, exe, scope2, scope2, scope3, - join_common_model, join_common_model, update_model, - join_save_params, common_save_params, update_save_params, "") - elif int(day) == last_base_day: - xbox_base_key = tmp_xbox_base_key - log_str = "xbox base model exists" - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - else: - log_str = "xbox base model exists" - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - - start_train = True - train_begin = time.time() - - if dataset is not None: - begin = time.time() - dataset.wait_preload_done() - end = time.time() - log_str = "wait data preload done cost %s min" % ((end - begin) / 60.0) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - - if dataset is None: - cur_pass = online_pass_interval[pass_index - 1] - cur_path = [] - for interval in cur_pass: - for p in path: - cur_path.append(p + "/" + day + "/" + interval) - log_str = "data path: " + ",".join(cur_path) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - for i in cur_path: - while not hdfs_client.is_exist(i + "/to.hadoop.done"): - fleet_util.rank0_print("wait for data ready: %s" % i) - time.sleep(config.check_exist_seconds) - my_filelist = fleet.split_files(hdfs_ls(cur_path)) - - dataset = create_dataset(join_common_model._all_slots, my_filelist) - fleet_util.rank0_print("going to load into memory") - begin = time.time() - dataset.load_into_memory() - end = time.time() - log_str = "load into memory done, cost %s min" % ((end - begin) / 60.0) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - - fleet_util.rank0_print("going to global shuffle") - begin = time.time() - dataset.global_shuffle(fleet, config.shuffle_thread) - end = time.time() - log_str = "global shuffle done, cost %s min, data size %s" % ((end - begin) / 60.0, dataset.get_shuffle_data_size(fleet)) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - get_data_max(dataset.get_shuffle_data_size()) - get_data_min(dataset.get_shuffle_data_size()) - - if config.prefetch and (pass_index < pass_per_day or pass_index == pass_per_day and day_index < len(days) - 1): - if pass_index < pass_per_day: - next_pass = online_pass_interval[pass_index] - next_day = day - else: - next_pass = online_pass_interval[0] - next_day = days[day_index + 1] - next_path = [] - for interval in next_pass: - for p in path: - next_path.append(p + "/" + next_day + "/" + interval) - next_data_ready = True - for i in next_path: - if not hdfs_client.is_exist(i + "/to.hadoop.done"): - next_data_ready = False - fleet_util.rank0_print("next data not ready: %s" % i) - if not next_data_ready: - next_dataset = None - else: - my_filelist = fleet.split_files(hdfs_ls(next_path)) - next_dataset = create_dataset(join_common_model._all_slots, my_filelist) - log_str = "next pass data preload %s " % ",".join(next_path) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - next_dataset.preload_into_memory(config.preload_thread) - - - join_cost = 0 - common_cost = 0 - update_cost = 0 - monitor_data = "" - - with fluid.scope_guard(scope2): - fleet_util.rank0_print("Begin join + common pass") - begin = time.time() - exe.train_from_dataset(join_common_model._train_program, - dataset, - scope2, - thread=config.join_common_thread, - debug=False) - end = time.time() - avg_cost = get_avg_cost_mins(end - begin) - - fleet_util.rank0_print("avg train time %s mins" % avg_cost) - - get_max_cost_mins(end - begin) - get_min_cost_mins(end - begin) - - common_cost = avg_cost - - monitor_data = "" - log_str = print_global_metrics(scope2, join_common_model.join_stat_pos.name, join_common_model.join_stat_neg.name, - join_common_model.join_sqrerr.name, join_common_model.join_abserr.name, - join_common_model.join_prob.name, - join_common_model.join_q.name, join_common_model.join_pos.name, - join_common_model.join_total.name, "joining pass:")#"join pass:") - check_auc_ok("joining pass:", log_str, 0.79) - monitor_data += log_str - stdout_str += time_prefix_str() + "joining pass:" - stdout_str += time_prefix_str() + log_str - - log_str = print_global_metrics(scope2, join_common_model.common_stat_pos.name, join_common_model.common_stat_neg.name, - join_common_model.common_sqrerr.name, join_common_model.common_abserr.name, - join_common_model.common_prob.name, - join_common_model.common_q.name, join_common_model.common_pos.name, - join_common_model.common_total.name, "common pass:") - check_auc_ok("common pass:", log_str, 0.70) - monitor_data += " " + log_str - stdout_str += time_prefix_str() + "common pass:" - stdout_str += time_prefix_str() + log_str - fleet_util.rank0_print("End join+common pass") - clear_metrics_2(fleet_util, join_common_model, scope2) - - if config.save_xbox_before_update and pass_index % config.save_delta_frequency == 0: - fleet_util.rank0_print("going to save delta model") - last_xbox_day, last_xbox_pass, last_xbox_path, _ = fleet_util.get_last_save_xbox(config.output_path, config.fs_name, config.fs_ugi) - if int(day) < last_xbox_day or int(day) == last_xbox_day and int(pass_index) <= last_xbox_pass: - log_str = "delta model exists" - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - else: - stdout_str += save_delta(day, pass_index, xbox_base_key, cur_path, exe, scope2, scope2, scope3, - join_common_model, join_common_model, update_model, - join_save_params, common_save_params, update_save_params, monitor_data) - - with fluid.scope_guard(scope3): - fleet_util.rank0_print("Begin update pass") - begin = time.time() - exe.train_from_dataset(update_model._train_program, - dataset, - scope3, - thread=config.update_thread, - debug=False) - end = time.time() - avg_cost = get_avg_cost_mins(end - begin) - - get_max_cost_mins(end - begin) - get_min_cost_mins(end - begin) - - update_cost = avg_cost - - log_str = print_global_metrics(scope3, update_model.stat_pos.name, update_model.stat_neg.name, - update_model.sqrerr.name, update_model.abserr.name, update_model.prob.name, - update_model.q.name, update_model.pos.name, update_model.total.name, - "updating pass:")#"update pass:") - check_auc_ok("updating pass:", log_str, 0.79) - stdout_str += time_prefix_str() + "updating pass:" - stdout_str += time_prefix_str() + log_str - fleet_util.rank0_print("End update pass") - clear_metrics(fleet_util, update_model, scope3) - - begin = time.time() - dataset.release_memory() - end = time.time() - fleet_util.rank0_print("release_memory cost %s min" % ((end - begin) / 60.0)) - - if (pass_index % config.checkpoint_per_pass) == 0 and pass_index != pass_per_day: - begin = time.time() - fleet_util.save_model(config.output_path, day, pass_index) - fleet_util.write_model_donefile(config.output_path, day, pass_index, xbox_base_key, config.fs_name, config.fs_ugi) - end = time.time() - log_str = "save model cost %s min" % ((end - begin) / 60.0) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - if not config.save_xbox_before_update and pass_index % config.save_delta_frequency == 0: - fleet_util.rank0_print("going to save delta model") - last_xbox_day, last_xbox_pass, last_xbox_path, _ = fleet_util.get_last_save_xbox(config.output_path, config.fs_name, config.fs_ugi) - if int(day) < last_xbox_day or int(day) == last_xbox_day and int(pass_index) <= last_xbox_pass: - log_str = "delta model exists" - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - else: - stdout_str += save_delta(day, pass_index, xbox_base_key, cur_path, exe, scope2, scope2, scope3, - join_common_model, join_common_model, update_model, - join_save_params, common_save_params, update_save_params, monitor_data) - - train_end = time.time() - train_cost = (train_end - train_begin) / 60.0 - other_cost = train_cost - join_cost - common_cost - update_cost - log_str = "finished train day %s pass %s time cost:%s min job time cost" \ - ":[join:%s min][join_common:%s min][update:%s min][other:%s min]" \ - % (day, pass_index, train_cost, join_cost, common_cost, update_cost, other_cost) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - - if pass_index % config.write_stdout_frequency == 0: - write_stdout(stdout_str) - stdout_str = "" - - xbox_base_key = int(time.time()) - if not start_train: - write_stdout(stdout_str) - stdout_str = "" - continue - - - fleet_util.rank0_print("going to save batch model/base xbox model") - last_base_day, last_base_path, _ = fleet_util.get_last_save_xbox_base(config.output_path, config.fs_name, config.fs_ugi) - nextday = int(days[day_index + 1]) - if nextday <= last_base_day: - log_str = "batch model/base xbox model exists" - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - else: - stdout_str += save_delta(nextday, -1, xbox_base_key, cur_path, exe, scope2, scope2, scope3, - join_common_model, join_common_model, update_model, - join_save_params, common_save_params, update_save_params, monitor_data) - - fleet_util.rank0_print("shrink table") - begin = time.time() - fleet.shrink_sparse_table() - fleet.shrink_dense_table(0.98, scope=scope2, table_id=1) - fleet.shrink_dense_table(0.98, scope=scope2, table_id=2) - fleet.shrink_dense_table(0.98, scope=scope3, table_id=3) - end = time.time() - log_str = "shrink table done, cost %s min" % ((end - begin) / 60.0) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - - begin = time.time() - fleet_util.save_batch_model(config.output_path, nextday) - fleet_util.write_model_donefile(config.output_path, nextday, -1, xbox_base_key, config.fs_name, config.fs_ugi) - end = time.time() - log_str = "save batch model cost %s min" % ((end - begin) / 60.0) - fleet_util.rank0_print(log_str) - stdout_str += time_prefix_str() + log_str - write_stdout(stdout_str) - stdout_str = "" diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/trainer_online_local.py b/feed/feed_deploy/news_jingpai/package/my_nets/trainer_online_local.py deleted file mode 100644 index c7e1811e7ad6133bfe2f4aed209064ee42103358..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/trainer_online_local.py +++ /dev/null @@ -1,500 +0,0 @@ -import numpy as np -import os -import sys -import paddle -import paddle.fluid as fluid -import threading -import time -import config -from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet -from paddle.fluid.incubate.fleet.utils.fleet_util import FleetUtil -from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient -from model_new import Model -from model_new_jc import ModelJoinCommon - -fleet_util = FleetUtil() - -def create_model(slot_file, slot_common_file, all_slot_file): - join_common_model = ModelJoinCommon(slot_file, slot_common_file, all_slot_file, 20) - update_model = Model(slot_file, all_slot_file, False, 0, True) - with open("join_common_main_program.pbtxt", "w") as fout: - print >> fout, join_common_model._train_program - with open("join_common_startup_program.pbtxt", "w") as fout: - print >> fout, join_common_model._startup_program - with open("update_main_program.pbtxt", "w") as fout: - print >> fout, update_model._train_program - with open("update_startup_program.pbtxt", "w") as fout: - print >> fout, update_model._startup_program - return [join_common_model, update_model] - -def create_dataset(use_var_list, my_filelist): - dataset = fluid.DatasetFactory().create_dataset(config.dataset_type) - dataset.set_batch_size(config.batch_size) - dataset.set_thread(config.thread_num) - dataset.set_hdfs_config(config.fs_name, config.fs_ugi) - dataset.set_pipe_command(config.pipe_command) - dataset.set_filelist(my_filelist) - dataset.set_use_var(use_var_list) - return dataset - -def hdfs_ls(path): - configs = { - "fs.default.name": config.fs_name, - "hadoop.job.ugi": config.fs_ugi - } - hdfs_client = HDFSClient("$HADOOP_HOME", configs) - filelist = [] - for i in path: - cur_path = hdfs_client.ls(i) - if config.fs_name.startswith("hdfs:"): - cur_path = ["hdfs:" + j for j in cur_path] - elif config.fs_name.startswith("afs:"): - cur_path = ["afs:" + j for j in cur_path] - filelist += cur_path - return filelist - -def get_avg_cost_mins(value): - t1 = time.time() - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - t2 = time.time() - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost) - t3 = time.time() - avg_cost = float(global_cost[0]) / fleet.worker_num() - avg_cost /= 60.0 - t4 = time.time() - tc = (t2 - t1 + t4 - t3) / 60.0 - tb = (t3 - t2) / 60.0 - fleet_util.rank0_print("get_avg_cost_mins calc time %s barrier time %s" % (tc, tb)) - return avg_cost - -def get_max_cost_mins(value): - from mpi4py import MPI - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost, op=MPI.MAX) - fleet_util.rank0_print("max train time %s mins" % (float(global_cost[0]) / 60.0)) - -def get_min_cost_mins(value): - from mpi4py import MPI - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost, op=MPI.MIN) - fleet_util.rank0_print("min train time %s mins" % (float(global_cost[0]) / 60.0)) - -def get_data_max(value): - from mpi4py import MPI - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost, op=MPI.MAX) - fleet_util.rank0_print("data size max %s" % global_cost[0]) - -def get_data_min(value): - from mpi4py import MPI - local_cost = np.array([value]) - global_cost = np.copy(local_cost) * 0 - fleet._role_maker._node_type_comm.Allreduce(local_cost, global_cost, op=MPI.MIN) - fleet_util.rank0_print("data size min %s" % global_cost[0]) - -def clear_metrics(fleet_util, model, scope): - fleet_util.set_zero(model.stat_pos.name, scope) - fleet_util.set_zero(model.stat_neg.name, scope) - fleet_util.set_zero(model.batch_stat_pos.name, scope) - fleet_util.set_zero(model.batch_stat_neg.name, scope) - fleet_util.set_zero(model.abserr.name, scope, param_type="float32") - fleet_util.set_zero(model.sqrerr.name, scope, param_type="float32") - fleet_util.set_zero(model.prob.name, scope, param_type="float32") - fleet_util.set_zero(model.q.name, scope, param_type="float32") - fleet_util.set_zero(model.pos.name, scope, param_type="float32") - fleet_util.set_zero(model.total.name, scope, param_type="float32") - -def clear_metrics_2(fleet_util, model, scope): - fleet_util.set_zero(model.join_stat_pos.name, scope) - fleet_util.set_zero(model.join_stat_neg.name, scope) - fleet_util.set_zero(model.join_batch_stat_pos.name, scope) - fleet_util.set_zero(model.join_batch_stat_neg.name, scope) - fleet_util.set_zero(model.join_abserr.name, scope, param_type="float32") - fleet_util.set_zero(model.join_sqrerr.name, scope, param_type="float32") - fleet_util.set_zero(model.join_prob.name, scope, param_type="float32") - fleet_util.set_zero(model.join_q.name, scope, param_type="float32") - fleet_util.set_zero(model.join_pos.name, scope, param_type="float32") - fleet_util.set_zero(model.join_total.name, scope, param_type="float32") - - fleet_util.set_zero(model.common_stat_pos.name, scope) - fleet_util.set_zero(model.common_stat_neg.name, scope) - fleet_util.set_zero(model.common_batch_stat_pos.name, scope) - fleet_util.set_zero(model.common_batch_stat_neg.name, scope) - fleet_util.set_zero(model.common_abserr.name, scope, param_type="float32") - fleet_util.set_zero(model.common_sqrerr.name, scope, param_type="float32") - fleet_util.set_zero(model.common_prob.name, scope, param_type="float32") - fleet_util.set_zero(model.common_q.name, scope, param_type="float32") - fleet_util.set_zero(model.common_pos.name, scope, param_type="float32") - fleet_util.set_zero(model.common_total.name, scope, param_type="float32") - -def save_delta(day, pass_index, xbox_base_key, cur_path, exe, scope_join, scope_common, scope_update, join_model, - join_common_model, update_model, join_save_params, common_save_params, update_save_params): - fleet_util.rank0_print("begin save delta model") - begin = time.time() - if pass_index == -1: - fleet_util.save_xbox_base_model(config.output_path, day) - else: - fleet_util.save_delta_model(config.output_path, day, pass_index) - end = time.time() - fleet_util.save_paddle_params(exe, scope_join, join_model._train_program, "paddle_dense.model.0", - config.output_path, day, pass_index, config.fs_name, config.fs_ugi, - var_names=join_save_params) - fleet_util.save_paddle_params(exe, scope_common, join_common_model._train_program, "paddle_dense.model.1", - config.output_path, day, pass_index, config.fs_name, config.fs_ugi, - var_names=common_save_params) - fleet_util.save_paddle_params(exe, scope_update, update_model._train_program, "paddle_dense.model.2", - config.output_path, day, pass_index, config.fs_name, config.fs_ugi, - var_names=update_save_params) - fleet_util.rank0_print("end save delta cost %s min" % ((end - begin) / 60.0)) - fleet_util.rank0_print("begin save cache") - begin = time.time() - if pass_index == -1: - key_num = fleet_util.save_cache_base_model(config.output_path, day) - else: - key_num = fleet_util.save_cache_model(config.output_path, day, pass_index) - fleet_util.write_cache_donefile(config.output_path, day, pass_index, key_num, config.fs_name, config.fs_ugi) - end = time.time() - fleet_util.rank0_print("end save cache cost %s min, key_num=%s" % ((end - begin) / 60.0, key_num)) - fleet_util.write_xbox_donefile(config.output_path, day, pass_index, xbox_base_key, ",".join(cur_path), - config.fs_name, config.fs_ugi) - -if __name__ == "__main__": - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - fleet.init(exe) - - slot_file = "slot/slot" - slot_common_file = "slot/slot_common" - all_slot_file = "all_slot.dict" - - join_common_model, update_model = create_model(slot_file, slot_common_file, all_slot_file) - - scope2 = fluid.Scope() - scope3 = fluid.Scope() - - adjust_ins_weight = { "need_adjust" : True, "nid_slot" : "6002", "nid_adjw_threshold" : 1000, "nid_adjw_ratio": 20, - "ins_weight_slot": update_model.ins_weight.name } - - thread_stat_var_names = [] - thread_stat_var_names.append(join_common_model.join_stat_pos.name) - thread_stat_var_names.append(join_common_model.join_stat_neg.name) - thread_stat_var_names.append(join_common_model.join_sqrerr.name) - thread_stat_var_names.append(join_common_model.join_abserr.name) - thread_stat_var_names.append(join_common_model.join_prob.name) - thread_stat_var_names.append(join_common_model.join_q.name) - thread_stat_var_names.append(join_common_model.join_pos.name) - thread_stat_var_names.append(join_common_model.join_total.name) - - thread_stat_var_names.append(join_common_model.common_stat_pos.name) - thread_stat_var_names.append(join_common_model.common_stat_neg.name) - thread_stat_var_names.append(join_common_model.common_sqrerr.name) - thread_stat_var_names.append(join_common_model.common_abserr.name) - thread_stat_var_names.append(join_common_model.common_prob.name) - thread_stat_var_names.append(join_common_model.common_q.name) - thread_stat_var_names.append(join_common_model.common_pos.name) - thread_stat_var_names.append(join_common_model.common_total.name) - - thread_stat_var_names.append(update_model.stat_pos.name) - thread_stat_var_names.append(update_model.stat_neg.name) - thread_stat_var_names.append(update_model.sqrerr.name) - thread_stat_var_names.append(update_model.abserr.name) - thread_stat_var_names.append(update_model.prob.name) - thread_stat_var_names.append(update_model.q.name) - thread_stat_var_names.append(update_model.pos.name) - thread_stat_var_names.append(update_model.total.name) - - thread_stat_var_names = list(set(thread_stat_var_names)) - - - adam = fluid.optimizer.Adam(learning_rate=0.000005) - adam = fleet.distributed_optimizer(adam, strategy={"use_cvm" : True, "adjust_ins_weight" : adjust_ins_weight, "scale_datanorm" : 1e-4, "dump_slot": True, "stat_var_names": thread_stat_var_names, "fleet_desc_file": "fleet_desc_combinejoincommon.prototxt"}) - adam.minimize([join_common_model.joint_cost, update_model.avg_cost], [scope2, scope3]) - - join_common_model._train_program._fleet_opt["program_configs"][str(id(join_common_model.joint_cost.block.program))]["push_sparse"] = [] - - join_save_params = ["join.batch_size", "join.batch_sum", "join.batch_square_sum", - "join_0.w_0", "join_0.b_0", "join_1.w_0", "join_1.b_0", "join_2.w_0", "join_2.b_0", - "join_3.w_0", "join_3.b_0", "join_4.w_0", "join_4.b_0", "join_5.w_0", "join_5.b_0", - "join_6.w_0", "join_6.b_0", "join_7.w_0", "join_7.b_0"] - common_save_params = ["common.batch_size", "common.batch_sum", "common.batch_square_sum", - "common_0.w_0", "common_0.b_0", "common_1.w_0", "common_1.b_0", "common_2.w_0", "common_2.b_0", - "common_3.w_0", "common_3.b_0", "common_4.w_0", "common_4.b_0", "common_5.w_0", "common_5.b_0", - "common_6.w_0", "common_6.b_0", "common_7.w_0", "common_7.b_0"] - update_save_params = ["fc_0.w_0", "fc_0.b_0", "fc_1.w_0", "fc_1.b_0", - "fc_2.w_0", "fc_2.b_0", "fc_3.w_0", "fc_3.b_0", - "fc_4.w_0", "fc_4.b_0", "fc_5.w_0", "fc_5.b_0"] - - if fleet.is_server(): - fleet.run_server() - elif fleet.is_worker(): - with fluid.scope_guard(scope3): - exe.run(update_model._startup_program) - with fluid.scope_guard(scope2): - exe.run(join_common_model._startup_program) - fleet.init_worker() - - configs = { - "fs.default.name": config.fs_name, - "hadoop.job.ugi": config.fs_ugi - } - hdfs_client = HDFSClient("$HADOOP_HOME", configs) - - save_first_base = config.save_first_base - path = config.train_data_path - online_pass_interval = fleet_util.get_online_pass_interval(config.days, config.hours, config.split_interval, config.split_per_pass, False) - pass_per_day = len(online_pass_interval) - last_day, last_pass, last_path, xbox_base_key = fleet_util.get_last_save_model(config.output_path, config.fs_name, config.fs_ugi) - reqi = True if last_day != -1 else False - - dataset = None - next_dataset = None - cur_path = None - next_path = None - start_train = False - days = os.popen("echo -n " + config.days).read().split(" ") - hours = os.popen("echo -n " + config.hours).read().split(" ") - for day_index in range(len(days)): - day = days[day_index] - if last_day != -1 and int(day) < last_day: - continue - for pass_index in range(1, pass_per_day + 1): - dataset = next_dataset - next_dataset = None - cur_path = next_path - next_path = None - if (last_day != -1 and int(day) == last_day) and (last_pass != -1 and int(pass_index) < last_pass): - continue - if reqi: - begin = time.time() - fleet_util.rank0_print("going to load model %s" % last_path) - # fleet_util.load_fleet_model(last_path) - # fleet.load_one_table(0, last_path) - # tmppath = "afs:/user/feed/mlarch/sequence_generator/wuzhihua02/xujiaqi/test_combinejoincommon_0921_72/new_model" - #"afs:/user/feed/mlarch/sequence_generator/wuzhihua02/xujiaqi/test_combinejoincommon_0920_108/new_model" - #"afs:/user/feed/mlarch/sequence_generator/wuzhihua02/xujiaqi/test_combinejoincommon_0915/new_model" - # fleet.load_one_table(1,tmppath) - # fleet.load_one_table(2,tmppath) - # fleet.load_one_table(3,tmppath) - - end = time.time() - fleet_util.rank0_print("load model cost %s min" % ((end - begin) / 60.0)) - reqi = False - if (last_day != -1 and int(day) == last_day) and (last_pass != -1 and int(pass_index) == last_pass): - continue - - fleet_util.rank0_print("===========going to train day/pass %s/%s===========" % (day, pass_index)) - - if save_first_base: - fleet_util.rank0_print("save_first_base=True") - save_first_base = False - last_base_day, last_base_path, tmp_xbox_base_key = \ - fleet_util.get_last_save_xbox_base(config.output_path, config.fs_name, config.fs_ugi) - if int(day) > last_base_day: - fleet_util.rank0_print("going to save xbox base model") - xbox_base_key = int(time.time()) - cur = [] - for interval in online_pass_interval[pass_index - 1]: - for p in path: - cur.append(p + "/" + day + "/" + interval) - save_delta(day, -1, xbox_base_key, cur, exe, scope2, scope2, scope3, - join_common_model, join_common_model, update_model, - join_save_params, common_save_params, update_save_params) - elif int(day) == last_base_day: - xbox_base_key = tmp_xbox_base_key - fleet_util.rank0_print("xbox base model exists") - else: - fleet_util.rank0_print("xbox base model exists") - - start_train = True - train_begin = time.time() - - if dataset is not None: - begin = time.time() - dataset.wait_preload_done() - end = time.time() - fleet_util.rank0_print("wait data preload done cost %s min" % ((end - begin) / 60.0)) - - if dataset is None: - cur_pass = online_pass_interval[pass_index - 1] - cur_path = [] - for interval in cur_pass: - for p in path: - cur_path.append(p + "/" + day + "/" + interval) - fleet_util.rank0_print("data path: " + ",".join(cur_path)) - #for i in cur_path: - # while not hdfs_client.is_exist(i + "/to.hadoop.done"): - # fleet_util.rank0_print("wait for data ready: %s" % i) - # time.sleep(config.check_exist_seconds) - my_filelist = ["part-00000_1"]#fleet.split_files(hdfs_ls(cur_path)) - - dataset = create_dataset(join_common_model._all_slots, my_filelist) - fleet_util.rank0_print("going to load into memory") - begin = time.time() - dataset.load_into_memory() - end = time.time() - fleet_util.rank0_print("load into memory done, cost %s min" % ((end - begin) / 60.0)) - - if config.prefetch and (pass_index < pass_per_day or pass_index == pass_per_day and day_index < len(days) - 1): - if pass_index < pass_per_day: - next_pass = online_pass_interval[pass_index] - next_day = day - else: - next_pass = online_pass_interval[0] - next_day = days[day_index + 1] - next_path = [] - for interval in next_pass: - for p in path: - next_path.append(p + "/" + next_day + "/" + interval) - next_data_ready = True - #for i in next_path: - # if not hdfs_client.is_exist(i + "/to.hadoop.done"): - # next_data_ready = False - # fleet_util.rank0_print("next data not ready: %s" % i) - if not next_data_ready: - next_dataset = None - else: - my_filelist = ["part-00000_1"]#fleet.split_files(hdfs_ls(next_path)) - next_dataset = create_dataset(join_common_model._all_slots, my_filelist) - fleet_util.rank0_print("next pass data preload %s " % ",".join(next_path)) - next_dataset.preload_into_memory(config.preload_thread) - - fleet_util.rank0_print("going to global shuffle") - begin = time.time() - dataset.global_shuffle(fleet, config.shuffle_thread) - end = time.time() - fleet_util.rank0_print("global shuffle done, cost %s min, data size %s" % ((end - begin) / 60.0, dataset.get_shuffle_data_size(fleet))) - - get_data_max(dataset.get_shuffle_data_size()) - get_data_min(dataset.get_shuffle_data_size()) - - join_cost = 0 - common_cost = 0 - update_cost = 0 - - with fluid.scope_guard(scope2): - fleet_util.rank0_print("Begin join + common pass") - begin = time.time() - exe.train_from_dataset(join_common_model._train_program, - dataset, - scope2, - thread=config.join_common_thread, - debug=False) - end = time.time() - avg_cost = get_avg_cost_mins(end - begin) - - fleet_util.rank0_print("avg train time %s mins" % avg_cost) - - get_max_cost_mins(end - begin) - get_min_cost_mins(end - begin) - - common_cost = avg_cost - - fleet_util.print_global_metrics(scope2, join_common_model.join_stat_pos.name, join_common_model.join_stat_neg.name, - join_common_model.join_sqrerr.name, join_common_model.join_abserr.name, - join_common_model.join_prob.name, - join_common_model.join_q.name, join_common_model.join_pos.name, - join_common_model.join_total.name, - "join pass:") - - fleet_util.print_global_metrics(scope2, join_common_model.common_stat_pos.name, join_common_model.common_stat_neg.name, - join_common_model.common_sqrerr.name, join_common_model.common_abserr.name, - join_common_model.common_prob.name, - join_common_model.common_q.name, join_common_model.common_pos.name, - join_common_model.common_total.name, - "common pass:") - fleet_util.rank0_print("End join+common pass") - clear_metrics_2(fleet_util, join_common_model, scope2) - - if config.save_xbox_before_update and pass_index % config.save_delta_frequency == 0: - fleet_util.rank0_print("going to save delta model") - last_xbox_day, last_xbox_pass, last_xbox_path, _ = fleet_util.get_last_save_xbox(config.output_path, config.fs_name, config.fs_ugi) - if int(day) < last_xbox_day or int(day) == last_xbox_day and int(pass_index) <= last_xbox_pass: - fleet_util.rank0_print("delta model exists") - else: - save_delta(day, pass_index, xbox_base_key, cur_path, exe, scope2, scope2, scope3, - join_common_model, join_common_model, update_model, - join_save_params, common_save_params, update_save_params) - - with fluid.scope_guard(scope3): - fleet_util.rank0_print("Begin update pass") - begin = time.time() - exe.train_from_dataset(update_model._train_program, - dataset, - scope3, - thread=config.update_thread, - debug=False) - end = time.time() - avg_cost = get_avg_cost_mins(end - begin) - update_cost = avg_cost - - fleet_util.print_global_metrics(scope3, update_model.stat_pos.name, update_model.stat_neg.name, - update_model.sqrerr.name, update_model.abserr.name, update_model.prob.name, - update_model.q.name, update_model.pos.name, update_model.total.name, - "update pass:") - fleet_util.rank0_print("End update pass") - clear_metrics(fleet_util, update_model, scope3) - - begin = time.time() - dataset.release_memory() - end = time.time() - - print pass_index - print config.checkpoint_per_pass - - if (pass_index % config.checkpoint_per_pass) == 0 and pass_index != pass_per_day: - print "save" - begin = time.time() - fleet_util.save_model(config.output_path, day, pass_index) - fleet_util.write_model_donefile(config.output_path, day, pass_index, xbox_base_key, config.fs_name, config.fs_ugi) - end = time.time() - fleet_util.rank0_print("save model cost %s min" % ((end - begin) / 60.0)) - if not config.save_xbox_before_update and pass_index % config.save_delta_frequency == 0: - fleet_util.rank0_print("going to save delta model") - last_xbox_day, last_xbox_pass, last_xbox_path, _ = fleet_util.get_last_save_xbox(config.output_path, config.fs_name, config.fs_ugi) - if int(day) < last_xbox_day or int(day) == last_xbox_day and int(pass_index) <= last_xbox_pass: - fleet_util.rank0_print("delta model exists") - else: - save_delta(day, pass_index, xbox_base_key, cur_path, exe, scope2, scope2, scope3, - join_common_model, join_common_model, update_model, - join_save_params, common_save_params, update_save_params) - - train_end = time.time() - train_cost = (train_end - train_begin) / 60.0 - other_cost = train_cost - join_cost - common_cost - update_cost - fleet_util.rank0_print(\ - "finished train day %s pass %s time cost:%s min job time cost" - ":[join:%s min][join_common:%s min][update:%s min][other:%s min]" \ - % (day, pass_index, train_cost, join_cost, common_cost, update_cost, other_cost)) - - xbox_base_key = int(time.time()) - if not start_train: - continue - - fleet_util.rank0_print("shrink table") - begin = time.time() - fleet.shrink_sparse_table() - fleet.shrink_dense_table(0.98, scope=scope2, table_id=1) - fleet.shrink_dense_table(0.98, scope=scope2, table_id=2) - fleet.shrink_dense_table(0.98, scope=scope3, table_id=3) - end = time.time() - fleet_util.rank0_print("shrink table done, cost %s min" % ((end - begin) / 60.0)) - - fleet_util.rank0_print("going to save batch model/base xbox model") - last_base_day, last_base_path, _ = fleet_util.get_last_save_xbox_base(config.output_path, config.fs_name, config.fs_ugi) - nextday = int(days[day_index + 1]) - if nextday <= last_base_day: - fleet_util.rank0_print("batch model/base xbox model exists") - else: - save_delta(nextday, -1, xbox_base_key, cur_path, exe, scope2, scope2, scope3, - join_common_model, join_common_model, update_model, - join_save_params, common_save_params, update_save_params) - begin = time.time() - fleet_util.save_batch_model(config.output_path, nextday) - fleet_util.write_model_donefile(config.output_path, nextday, -1, xbox_base_key, config.fs_name, config.fs_ugi) - end = time.time() - fleet_util.rank0_print("save batch model cost %s min" % ((end - begin) / 60.0)) diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/util.bak.py b/feed/feed_deploy/news_jingpai/package/my_nets/util.bak.py deleted file mode 100644 index 15e96c9e63bdee985be5bea396195d174c2cdf27..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/util.bak.py +++ /dev/null @@ -1,135 +0,0 @@ -import paddle -import paddle.fluid as fluid -from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet -import os -import numpy as np -import config - -def jingpai_load_paddle_model(old_startup_program_bin, - old_train_program_bin, - old_model_path, - old_slot_list, - new_slot_list, - model_all_vars, - new_scope, - modify_layer_names): - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - old_scope = fluid.Scope() - old_program = fluid.Program() - old_program = old_program.parse_from_string(open(old_train_program_bin, "rb").read()) - old_startup_program = fluid.Program() - old_startup_program = old_startup_program.parse_from_string(open(old_startup_program_bin, "rb").read()) - with fluid.scope_guard(old_scope): - exe.run(old_startup_program) - variables = [old_program.global_block().var(i) for i in model_all_vars] - if os.path.isfile(old_model_path): - path = os.path.dirname(old_model_path) - path = "./" if path == "" else path - filename = os.path.basename(old_model_path) - fluid.io.load_vars(exe, path, old_program, vars=variables, filename=filename) - else: - fluid.io.load_vars(exe, old_model_path, old_program, vars=variables) - - old_pos = {} - idx = 0 - for i in old_slot_list: - old_pos[i] = idx - idx += 1 - - for i in modify_layer_names: - if old_scope.find_var(i) is None: - print("%s not found in old scope, skip" % i) - continue - elif new_scope.find_var(i) is None: - print("%s not found in new scope, skip" % i) - continue - old_param = old_scope.var(i).get_tensor() - old_param_array = np.array(old_param).astype("float32") - old_shape = old_param_array.shape - #print i," old_shape ", old_shape - - new_param = new_scope.var(i).get_tensor() - new_param_array = np.array(new_param).astype("float32") - new_shape = new_param_array.shape - #print i," new_shape ", new_shape - - per_dim = len(new_param_array) / len(new_slot_list) - #print "len(new_param_array) ",len(new_param_array),\ - # "len(new_slot_list) ", len(new_slot_list)," per_dim ", per_dim - - idx = -per_dim - for s in new_slot_list: - idx += per_dim - if old_pos.get(s) is None: - continue - for j in range(0, per_dim): - #print i," row/value ", idx + j, " copy from ", old_pos[s] * per_dim + j - # a row or a value - new_param_array[idx + j] = old_param_array[old_pos[s] * per_dim + j] - - new_param.set(new_param_array, place) - - for i in model_all_vars: - if i in modify_layer_names: - continue - old_param = old_scope.find_var(i).get_tensor() - old_param_array = np.array(old_param).astype("float32") - new_param = new_scope.find_var(i).get_tensor() - new_param.set(old_param_array, place) - - -def reqi_changeslot(hdfs_dnn_plugin_path, join_save_params, common_save_params, update_save_params, scope2, scope3): - if fleet.worker_index() != 0: - return - - print("load paddle model %s" % hdfs_dnn_plugin_path) - - os.system("rm -rf dnn_plugin/ ; hadoop fs -D hadoop.job.ugi=%s -D fs.default.name=%s -get %s ." % (config.fs_ugi, config.fs_name, hdfs_dnn_plugin_path)) - - new_join_slot = [] - for line in open("slot/slot", 'r'): - slot = line.strip() - new_join_slot.append(slot) - old_join_slot = [] - for line in open("old_slot/slot", 'r'): - slot = line.strip() - old_join_slot.append(slot) - - new_common_slot = [] - for line in open("slot/slot_common", 'r'): - slot = line.strip() - new_common_slot.append(slot) - old_common_slot = [] - for line in open("old_slot/slot_common", 'r'): - slot = line.strip() - old_common_slot.append(slot) - - - jingpai_load_paddle_model("old_program/old_join_common_startup_program.bin", - "old_program/old_join_common_train_program.bin", - "dnn_plugin/paddle_dense.model.0", - old_join_slot, - new_join_slot, - join_save_params, - scope2, - ["join.batch_size","join.batch_sum","join.batch_square_sum","join_0.w_0"]) - - jingpai_load_paddle_model("old_program/old_join_common_startup_program.bin", - "old_program/old_join_common_train_program.bin", - "dnn_plugin/paddle_dense.model.1", - old_common_slot, - new_common_slot, - common_save_params, - scope2, - ["common.batch_size","common.batch_sum","common.batch_square_sum","common_0.w_0"]) - - jingpai_load_paddle_model("old_program/old_update_startup_program.bin", - "old_program/old_update_main_program.bin", - "dnn_plugin/paddle_dense.model.2", - old_join_slot, - new_join_slot, - update_save_params, - scope3, - ["fc_0.w_0"]) diff --git a/feed/feed_deploy/news_jingpai/package/my_nets/util.py b/feed/feed_deploy/news_jingpai/package/my_nets/util.py deleted file mode 100644 index 46de454f3e7ec05c8ddc07494cc4c255d28b1ec8..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/package/my_nets/util.py +++ /dev/null @@ -1,286 +0,0 @@ -import paddle -import paddle.fluid as fluid -from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet -import os -import numpy as np -import config -from paddle.fluid.incubate.fleet.utils.fleet_util import FleetUtil -from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient -import collections -import json -import time - -fleet_util = FleetUtil() - -def print_global_metrics(scope, stat_pos_name, stat_neg_name, sqrerr_name, - abserr_name, prob_name, q_name, pos_ins_num_name, - total_ins_num_name, print_prefix): - auc, bucket_error, mae, rmse, actual_ctr, predicted_ctr, copc,\ - mean_predict_qvalue, total_ins_num = fleet_util.get_global_metrics(\ - scope, stat_pos_name, stat_neg_name, sqrerr_name, abserr_name,\ - prob_name, q_name, pos_ins_num_name, total_ins_num_name) - log_str = "AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f " \ - "RMSE=%.6f Actural_CTR=%.6f Predicted_CTR=%.6f " \ - "COPC=%.6f MEAN Q_VALUE=%.6f Ins number=%s" % \ - (auc, bucket_error, mae, rmse, \ - actual_ctr, predicted_ctr, copc, mean_predict_qvalue, \ - total_ins_num) - fleet_util.rank0_print(print_prefix + " " + log_str) - return print_prefix + " " + log_str #print_prefix + "\n " + log_str - -def write_stdout(stdout_str): - if fleet.worker_index() != 0: - fleet._role_maker._barrier_worker() - return - hadoop_home="$HADOOP_HOME" - configs = {"fs.default.name": config.fs_name, "hadoop.job.ugi": config.fs_ugi} - client = HDFSClient(hadoop_home, configs) - out_dir = config.output_path + "/stdout/" - if not client.is_exist(out_dir): - client.makedirs(out_dir) - job_id_with_host = os.popen("echo -n ${JOB_ID}").read().strip() - instance_id = os.popen("echo -n ${INSTANCE_ID}").read().strip() - start_pos = instance_id.find(job_id_with_host) - end_pos = instance_id.find("--") - if start_pos != -1 and end_pos != -1: - job_id_with_host = instance_id[start_pos:end_pos] - file_path = out_dir + job_id_with_host - if client.is_file(file_path): - pre_content = client.cat(file_path) - with open(job_id_with_host, "w") as f: - f.write(pre_content + "\n") - f.write(stdout_str + "\n") - client.delete(file_path) - client.upload(out_dir, job_id_with_host, multi_processes=1, overwrite=False) - else: - with open(job_id_with_host, "w") as f: - f.write(stdout_str + "\n") - client.upload(out_dir, job_id_with_host, multi_processes=1, overwrite=False) - fleet_util.rank0_info("write %s succeed" % file_path) - fleet._role_maker._barrier_worker() - -def _get_xbox_str(day, model_path, xbox_base_key, data_path, monitor_data, mode="patch"): - xbox_dict = collections.OrderedDict() - if mode == "base": - xbox_dict["id"] = str(xbox_base_key) - elif mode == "patch": - xbox_dict["id"] = str(int(time.time())) - else: - print("warning: unknown mode %s, set it to patch" % mode) - mode = "patch" - xbox_dict["id"] = str(int(time.time())) - xbox_dict["key"] = str(xbox_base_key) - if model_path.startswith("hdfs:") or model_path.startswith("afs:"): - model_path = model_path[model_path.find(":") + 1:] - xbox_dict["input"] = config.fs_name + model_path.rstrip("/") + "/000" - xbox_dict["record_count"] = "111111" - xbox_dict["partition_type"] = "2" - xbox_dict["job_name"] = "default_job_name" - xbox_dict["ins_tag"] = "feasign" - xbox_dict["ins_path"] = data_path - job_id_with_host = os.popen("echo -n ${JOB_ID}").read().strip() - instance_id = os.popen("echo -n ${INSTANCE_ID}").read().strip() - start_pos = instance_id.find(job_id_with_host) - end_pos = instance_id.find("--") - if start_pos != -1 and end_pos != -1: - job_id_with_host = instance_id[start_pos:end_pos] - xbox_dict["job_id"] = job_id_with_host - xbox_dict["monitor_data"] = monitor_data - xbox_dict["monitor_path"] = config.output_path.rstrip("/") + "/monitor/" \ - + day + ".txt" - xbox_dict["mpi_size"] = str(fleet.worker_num()) - return json.dumps(xbox_dict) - -def write_xbox_donefile(day, pass_id, xbox_base_key, data_path, donefile_name=None, monitor_data=""): - if fleet.worker_index() != 0: - fleet._role_maker._barrier_worker() - return - day = str(day) - pass_id = str(pass_id) - xbox_base_key = int(xbox_base_key) - mode = None - if pass_id != "-1": - mode = "patch" - suffix_name = "/%s/delta-%s/" % (day, pass_id) - model_path = config.output_path.rstrip("/") + suffix_name - if donefile_name is None: - donefile_name = "xbox_patch_done.txt" - else: - mode = "base" - suffix_name = "/%s/base/" % day - model_path = config.output_path.rstrip("/") + suffix_name - if donefile_name is None: - donefile_name = "xbox_base_done.txt" - if isinstance(data_path, list): - data_path = ",".join(data_path) - - if fleet.worker_index() == 0: - donefile_path = config.output_path + "/" + donefile_name - xbox_str = _get_xbox_str(day, model_path, xbox_base_key, data_path, monitor_data, mode) - configs = {"fs.default.name": config.fs_name, "hadoop.job.ugi": config.fs_ugi} - client = HDFSClient("$HADOOP_HOME", configs) - if client.is_file(donefile_path): - pre_content = client.cat(donefile_path) - last_dict = json.loads(pre_content.split("\n")[-1]) - last_day = last_dict["input"].split("/")[-3] - last_pass = last_dict["input"].split("/")[-2].split("-")[-1] - exist = False - if int(day) < int(last_day) or \ - int(day) == int(last_day) and \ - int(pass_id) <= int(last_pass): - exist = True - if not exist: - with open(donefile_name, "w") as f: - f.write(pre_content + "\n") - f.write(xbox_str + "\n") - client.delete(donefile_path) - client.upload( - config.output_path, - donefile_name, - multi_processes=1, - overwrite=False) - fleet_util.rank0_info("write %s/%s %s succeed" % \ - (day, pass_id, donefile_name)) - else: - fleet_util.rank0_error("not write %s because %s/%s already " - "exists" % (donefile_name, day, pass_id)) - else: - with open(donefile_name, "w") as f: - f.write(xbox_str + "\n") - client.upload( - config.output_path, - donefile_name, - multi_processes=1, - overwrite=False) - fleet_util.rank0_error("write %s/%s %s succeed" % \ - (day, pass_id, donefile_name)) - fleet._role_maker._barrier_worker() - -def jingpai_load_paddle_model(old_startup_program_bin, - old_train_program_bin, - old_model_path, - old_slot_list, - new_slot_list, - model_all_vars, - new_scope, - modify_layer_names): - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - old_scope = fluid.Scope() - old_program = fluid.Program() - old_program = old_program.parse_from_string(open(old_train_program_bin, "rb").read()) - old_startup_program = fluid.Program() - old_startup_program = old_startup_program.parse_from_string(open(old_startup_program_bin, "rb").read()) - with fluid.scope_guard(old_scope): - exe.run(old_startup_program) - variables = [old_program.global_block().var(i) for i in model_all_vars] - if os.path.isfile(old_model_path): - path = os.path.dirname(old_model_path) - path = "./" if path == "" else path - filename = os.path.basename(old_model_path) - fluid.io.load_vars(exe, path, old_program, vars=variables, filename=filename) - else: - fluid.io.load_vars(exe, old_model_path, old_program, vars=variables) - - old_pos = {} - idx = 0 - for i in old_slot_list: - old_pos[i] = idx - idx += 1 - - for i in modify_layer_names: - if old_scope.find_var(i) is None: - print("%s not found in old scope, skip" % i) - continue - elif new_scope.find_var(i) is None: - print("%s not found in new scope, skip" % i) - continue - old_param = old_scope.var(i).get_tensor() - old_param_array = np.array(old_param).astype("float32") - old_shape = old_param_array.shape - #print i," old_shape ", old_shape - - new_param = new_scope.var(i).get_tensor() - new_param_array = np.array(new_param).astype("float32") - new_shape = new_param_array.shape - #print i," new_shape ", new_shape - - per_dim = len(new_param_array) / len(new_slot_list) - #print "len(new_param_array) ",len(new_param_array),\ - # "len(new_slot_list) ", len(new_slot_list)," per_dim ", per_dim - - idx = -per_dim - for s in new_slot_list: - idx += per_dim - if old_pos.get(s) is None: - continue - for j in range(0, per_dim): - #print i," row/value ", idx + j, " copy from ", old_pos[s] * per_dim + j - # a row or a value - new_param_array[idx + j] = old_param_array[old_pos[s] * per_dim + j] - - new_param.set(new_param_array, place) - - for i in model_all_vars: - if i in modify_layer_names: - continue - old_param = old_scope.find_var(i).get_tensor() - old_param_array = np.array(old_param).astype("float32") - new_param = new_scope.find_var(i).get_tensor() - new_param.set(old_param_array, place) - - -def reqi_changeslot(hdfs_dnn_plugin_path, join_save_params, common_save_params, update_save_params, scope2, scope3): - if fleet.worker_index() != 0: - return - - print("load paddle model %s" % hdfs_dnn_plugin_path) - - os.system("rm -rf dnn_plugin/ ; hadoop fs -D hadoop.job.ugi=%s -D fs.default.name=%s -get %s ." % (config.fs_ugi, config.fs_name, hdfs_dnn_plugin_path)) - - new_join_slot = [] - for line in open("slot/slot", 'r'): - slot = line.strip() - new_join_slot.append(slot) - old_join_slot = [] - for line in open("old_slot/slot", 'r'): - slot = line.strip() - old_join_slot.append(slot) - - new_common_slot = [] - for line in open("slot/slot_common", 'r'): - slot = line.strip() - new_common_slot.append(slot) - old_common_slot = [] - for line in open("old_slot/slot_common", 'r'): - slot = line.strip() - old_common_slot.append(slot) - - - jingpai_load_paddle_model("old_program/old_join_common_startup_program.bin", - "old_program/old_join_common_train_program.bin", - "dnn_plugin/paddle_dense.model.0", - old_join_slot, - new_join_slot, - join_save_params, - scope2, - ["join.batch_size","join.batch_sum","join.batch_square_sum","join_0.w_0"]) - - jingpai_load_paddle_model("old_program/old_join_common_startup_program.bin", - "old_program/old_join_common_train_program.bin", - "dnn_plugin/paddle_dense.model.1", - old_common_slot, - new_common_slot, - common_save_params, - scope2, - ["common.batch_size","common.batch_sum","common.batch_square_sum","common_0.w_0"]) - - jingpai_load_paddle_model("old_program/old_update_startup_program.bin", - "old_program/old_update_main_program.bin", - "dnn_plugin/paddle_dense.model.2", - old_join_slot, - new_join_slot, - update_save_params, - scope3, - ["fc_0.w_0"]) diff --git a/feed/feed_deploy/news_jingpai/qsub_f.conf b/feed/feed_deploy/news_jingpai/qsub_f.conf deleted file mode 100644 index 35f7d5aefc66a9cce85f57f42c0eb33ba388da86..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/qsub_f.conf +++ /dev/null @@ -1,4 +0,0 @@ -SERVER=yq01-hpc-lvliang01-smart-master.dmop.baidu.com -QUEUE=feed5 -PRIORITY=very_high -USE_FLAGS_ADVRES=yes diff --git a/feed/feed_deploy/news_jingpai/run.sh b/feed/feed_deploy/news_jingpai/run.sh deleted file mode 100755 index f3ee6d87eea33dc5cb2445d5ea1e775d7ebd700a..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/run.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/sh -source ~/.bashrc - -# Author: Wu.ZG -# Created Time : 2017-08-14 21:31:56 -# File Name: guard.sh -# Version: -# Description: -# Last modified: 2018-01-29 11:00:42 - -set -x - -SLEEP=10 -HOST=`hostname` -WORKROOT=${PWD} -RUN_SCRIPT="${WORKROOT}/submit.sh" -#ALARM="./alarm.sh" -on_duty=( - # RD - # OP - # QA - 15101120768 -) - -function alarm() { - content=$1 - for phone_num in ${on_duty[@]};do - echo ${phone_num} ${content} - gsmsend -s emp01.baidu.com:15001 "${phone_num}"@"$1" - done - echo "$1" | mail -s "$1" $email -} - -pid=$$ -echo ${pid} > pid - -if [ ! -d "./log" ];then - mkdir log -fi - -while [ 1 ] -do - sh ${RUN_SCRIPT} > log/"`date +"%Y%m%d_%H%M%S"`".log - RET=$? - - #source ${ALARM} - if [ ${RET} -ne 0 ];then - content="`date +"%Y%m%d %H:%M:%S "` Job fail. Exit ${RET}. From ${HOST}:${WORKROOT}. Pid=${pid}" - echo "${content}" - alarm "${content}" - else - content="`date +"%Y%m%d %H:%M:%S "` Job finish. From ${HOST}:${WORKROOT}. Pid=${pid}" - echo "${content}" - alarm "${content}" - break - fi - - sleep ${SLEEP} - -done - -echo "`date +"%Y%m%d %H:%M:%S "` guard exit." diff --git a/feed/feed_deploy/news_jingpai/submit.sh b/feed/feed_deploy/news_jingpai/submit.sh deleted file mode 100644 index 26715ec67eae37eb2bcb4855c4cb38e87b7b38c9..0000000000000000000000000000000000000000 --- a/feed/feed_deploy/news_jingpai/submit.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash - -source ./package/my_nets/config.py - -rm -r tmp/* -mkdir tmp -cd tmp - -mkdir ./package -cp -r ../package/python ./package -cp -r ../package/my_nets/* ./package -cp -r ../hadoop-client_mpi ./package/hadoop-client -cp ../qsub_f.conf ./ -cp ../job.sh ./ -cp ../job.sh ./package - -if [ "a${sparse_table_storage}" = "assd" ];then - sed -i 's/DownpourSparseTable/DownpourSparseSSDTable/g' ./package/reqi_fleet_desc - sed -i 's/sparse_table_cache_rate: 0.00055/sparse_table_cache_rate: 0.0025/g' ./package/reqi_fleet_desc -fi - -current=`date "+%Y-%m-%d %H:%M:%S"` -timeStamp=`date -d "$current" +%s` -output_path=${output_path#*:} -hdfs_output=${output_path}/$timeStamp - -export HADOOP_HOME="${local_hadoop_home}" - -MPI_NODE_MEM=${node_memory} -echo "SERVER=${mpi_server}" > qsub_f.conf -echo "QUEUE=${mpi_queue}" >> qsub_f.conf -echo "PRIORITY=${mpi_priority}" >> qsub_f.conf -echo "USE_FLAGS_ADVRES=yes" >> qsub_f.conf - -if [ "a${sparse_table_storage}" = "assd" ];then - ${smart_client_home}/bin/qsub_f \ - -N $task_name \ - --conf ./qsub_f.conf \ - --hdfs $fs_name \ - --ugi $fs_ugi \ - --hout $hdfs_output \ - --am-type smart_am \ - --files ./package \ - --workspace /home/ssd1/normandy/maybach \ - -l nodes=$nodes,walltime=1000:00:00,pmem-hard=$MPI_NODE_MEM,pcpu-soft=280,pnetin-soft=1000,pnetout-soft=1000 ./job.sh -else - ${smart_client_home}/bin/qsub_f \ - -N $task_name \ - --conf ./qsub_f.conf \ - --hdfs $fs_name \ - --ugi $fs_ugi \ - --hout $hdfs_output \ - --am-type smart_am \ - --files ./package \ - -l nodes=$nodes,walltime=1000:00:00,pmem-hard=$MPI_NODE_MEM,pcpu-soft=280,pnetin-soft=1000,pnetout-soft=1000 ./job.sh -fi diff --git a/feed/pybind/CMakeLists.txt b/feed/pybind/CMakeLists.txt deleted file mode 100755 index fc988818b7c4098e14b32534a4fb5df85c4a5fba..0000000000000000000000000000000000000000 --- a/feed/pybind/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -set(FEED_PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper - pass_builder parallel_executor profiler layer tracer engine scope_pool -dict_plugin fs shell) - -if(WITH_PYTHON) - list(APPEND FEED_PYBIND_DEPS py_func_op) -endif() - -set(FEED_PYBIND_SRCS - expand_api.cc - ) - -if(WITH_PYTHON) - if(WITH_AMD_GPU) - hip_library(feed_paddle_pybind SRCS ${FEED_PYBIND_SRCS} DEPS ARCHIVE_START ${FEED_PYBIND_DEPS} ARCHIVE_END) - else() - cc_library(feed_paddle_pybind SRCS ${FEED_PYBIND_SRCS} DEPS ${FEED_PYBIND_DEPS}) - endif(WITH_AMD_GPU) - - get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) - target_link_libraries(feed_paddle_pybind ${os_dependency_modules}) - -endif(WITH_PYTHON) diff --git a/feed/pybind/expand_api.cc b/feed/pybind/expand_api.cc deleted file mode 100755 index 7ab1ece5066d94865a8c669b27226ddcadab25bf..0000000000000000000000000000000000000000 --- a/feed/pybind/expand_api.cc +++ /dev/null @@ -1,42 +0,0 @@ -#include "paddle/fluid/feed/pybind/expand_api.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "paddle/fluid/feed/src/common/dict_plugin.h" - -namespace py = pybind11; - -namespace paddle { -namespace pybind { -using paddle::framework::DictPluginManager; -using paddle::framework::FeasignCacheDict; - -void BindExpandDictPlugin(py::module *m); - -void BindExpandApi(py::module *m) { - BindExpandDictPlugin(m); -} - -void BindExpandDictPlugin(py::module *m) { - py::class_(*m, "FeasignCacheDict") - .def(py::init<>()) - .def(py::init()) - .def("load", &FeasignCacheDict::Load); - py::class_(*m, "DictPluginManager") - .def(py::init<>()) - .def_static("instance", &DictPluginManager::Instance) - .def("load_dict", &DictPluginManager::LoadDict) - .def("create_dict", &DictPluginManager::CreateDict); -} - - - -} // namespace pybind -} // namespace paddle diff --git a/feed/pybind/expand_api.h b/feed/pybind/expand_api.h deleted file mode 100755 index d1ca24ccd6253d3720d3176e2ccb516769ebbda8..0000000000000000000000000000000000000000 --- a/feed/pybind/expand_api.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include - -namespace paddle { -namespace pybind { -void BindExpandApi(pybind11::module *m); -} // namespace pybind -} // namespace paddle diff --git a/feed/src/CMakeLists.txt b/feed/src/CMakeLists.txt deleted file mode 100755 index d8362ae245d9617e2e5376486e3988f63d468335..0000000000000000000000000000000000000000 --- a/feed/src/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -add_subdirectory(common) -add_subdirectory(data_reader) diff --git a/feed/src/common/CMakeLists.txt b/feed/src/common/CMakeLists.txt deleted file mode 100755 index febff4385427993ab460a884ef2979115835b376..0000000000000000000000000000000000000000 --- a/feed/src/common/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -cc_library(dict_plugin SRCS dict_plugin.cc DEPS glog boost fs) diff --git a/feed/src/common/bhopscotch_map.h b/feed/src/common/bhopscotch_map.h deleted file mode 100755 index c165b7ba713421cc736e44a048bd6a0134e2f6df..0000000000000000000000000000000000000000 --- a/feed/src/common/bhopscotch_map.h +++ /dev/null @@ -1,675 +0,0 @@ -/** - * MIT License - * - * Copyright (c) 2017 Tessil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TSL_BHOPSCOTCH_MAP_H -#define TSL_BHOPSCOTCH_MAP_H - - -#include -#include -#include -#include -#include -#include -#include -#include -#include "paddle/fluid/feed/src/common/hopscotch_hash.h" - - -namespace tsl { - - -/** - * Similar to tsl::hopscotch_map but instead of using a list for overflowing elements it uses - * a binary search tree. It thus needs an additional template parameter Compare. Compare should - * be arithmetically coherent with KeyEqual. - * - * The binary search tree allows the map to have a worst-case scenario of O(log n) for search - * and delete, even if the hash function maps all the elements to the same bucket. - * For insert, the amortized worst case is O(log n), but the worst case is O(n) in case of rehash. - * - * This makes the map resistant to DoS attacks (but doesn't preclude you to have a good hash function, - * as an element in the bucket array is faster to retrieve than in the tree). - * - * @copydoc hopscotch_map - */ -template, - class KeyEqual = std::equal_to, - class Compare = std::less, - class Allocator = std::allocator>, - unsigned int NeighborhoodSize = 62, - bool StoreHash = false, - class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> -class bhopscotch_map { -private: - template - using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; - - class KeySelect { - public: - using key_type = Key; - - const key_type& operator()(const std::pair& key_value) const { - return key_value.first; - } - - const key_type& operator()(std::pair& key_value) { - return key_value.first; - } - }; - - class ValueSelect { - public: - using value_type = T; - - const value_type& operator()(const std::pair& key_value) const { - return key_value.second; - } - - value_type& operator()(std::pair& key_value) { - return key_value.second; - } - }; - - - // TODO Not optimal as we have to use std::pair as ValueType which forbid - // us to move the key in the bucket array, we have to use copy. Optimize. - using overflow_container_type = std::map; - using ht = detail_hopscotch_hash::hopscotch_hash, KeySelect, ValueSelect, - Hash, KeyEqual, - Allocator, NeighborhoodSize, - StoreHash, GrowthPolicy, - overflow_container_type>; - -public: - using key_type = typename ht::key_type; - using mapped_type = T; - using value_type = typename ht::value_type; - using size_type = typename ht::size_type; - using difference_type = typename ht::difference_type; - using hasher = typename ht::hasher; - using key_equal = typename ht::key_equal; - using key_compare = Compare; - using allocator_type = typename ht::allocator_type; - using reference = typename ht::reference; - using const_reference = typename ht::const_reference; - using pointer = typename ht::pointer; - using const_pointer = typename ht::const_pointer; - using iterator = typename ht::iterator; - using const_iterator = typename ht::const_iterator; - - - /* - * Constructors - */ - bhopscotch_map() : bhopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE) { - } - - explicit bhopscotch_map(size_type bucket_count, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator(), - const Compare& comp = Compare()) : - m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, comp) - { - } - - bhopscotch_map(size_type bucket_count, - const Allocator& alloc) : bhopscotch_map(bucket_count, Hash(), KeyEqual(), alloc) - { - } - - bhopscotch_map(size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : bhopscotch_map(bucket_count, hash, KeyEqual(), alloc) - { - } - - explicit bhopscotch_map(const Allocator& alloc) : bhopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { - } - - template - bhopscotch_map(InputIt first, InputIt last, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : bhopscotch_map(bucket_count, hash, equal, alloc) - { - insert(first, last); - } - - template - bhopscotch_map(InputIt first, InputIt last, - size_type bucket_count, - const Allocator& alloc) : bhopscotch_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) - { - } - - template - bhopscotch_map(InputIt first, InputIt last, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : bhopscotch_map(first, last, bucket_count, hash, KeyEqual(), alloc) - { - } - - bhopscotch_map(std::initializer_list init, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : - bhopscotch_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) - { - } - - bhopscotch_map(std::initializer_list init, - size_type bucket_count, - const Allocator& alloc) : - bhopscotch_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) - { - } - - bhopscotch_map(std::initializer_list init, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : - bhopscotch_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) - { - } - - - bhopscotch_map& operator=(std::initializer_list ilist) { - m_ht.clear(); - - m_ht.reserve(ilist.size()); - m_ht.insert(ilist.begin(), ilist.end()); - - return *this; - } - - allocator_type get_allocator() const { return m_ht.get_allocator(); } - - - /* - * Iterators - */ - iterator begin() noexcept { return m_ht.begin(); } - const_iterator begin() const noexcept { return m_ht.begin(); } - const_iterator cbegin() const noexcept { return m_ht.cbegin(); } - - iterator end() noexcept { return m_ht.end(); } - const_iterator end() const noexcept { return m_ht.end(); } - const_iterator cend() const noexcept { return m_ht.cend(); } - - - /* - * Capacity - */ - bool empty() const noexcept { return m_ht.empty(); } - size_type size() const noexcept { return m_ht.size(); } - size_type max_size() const noexcept { return m_ht.max_size(); } - - /* - * Modifiers - */ - void clear() noexcept { m_ht.clear(); } - - - - - std::pair insert(const value_type& value) { - return m_ht.insert(value); - } - - template::value>::type* = nullptr> - std::pair insert(P&& value) { - return m_ht.insert(std::forward

(value)); - } - - std::pair insert(value_type&& value) { - return m_ht.insert(std::move(value)); - } - - - iterator insert(const_iterator hint, const value_type& value) { - return m_ht.insert(hint, value); - } - - template::value>::type* = nullptr> - iterator insert(const_iterator hint, P&& value) { - return m_ht.insert(hint, std::forward

(value)); - } - - iterator insert(const_iterator hint, value_type&& value) { - return m_ht.insert(hint, std::move(value)); - } - - - template - void insert(InputIt first, InputIt last) { - m_ht.insert(first, last); - } - - void insert(std::initializer_list ilist) { - m_ht.insert(ilist.begin(), ilist.end()); - } - - - - - template - std::pair insert_or_assign(const key_type& k, M&& obj) { - return m_ht.insert_or_assign(k, std::forward(obj)); - } - - template - std::pair insert_or_assign(key_type&& k, M&& obj) { - return m_ht.insert_or_assign(std::move(k), std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { - return m_ht.insert_or_assign(hint, k, std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { - return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); - } - - - - /** - * Due to the way elements are stored, emplace will need to move or copy the key-value once. - * The method is equivalent to insert(value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - std::pair emplace(Args&&... args) { - return m_ht.emplace(std::forward(args)...); - } - - - - - /** - * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. - * The method is equivalent to insert(hint, value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - iterator emplace_hint(const_iterator hint, Args&&... args) { - return m_ht.emplace_hint(hint, std::forward(args)...); - } - - - - - template - std::pair try_emplace(const key_type& k, Args&&... args) { - return m_ht.try_emplace(k, std::forward(args)...); - } - - template - std::pair try_emplace(key_type&& k, Args&&... args) { - return m_ht.try_emplace(std::move(k), std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { - return m_ht.try_emplace(hint, k, std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { - return m_ht.try_emplace(hint, std::move(k), std::forward(args)...); - } - - - - - iterator erase(iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } - size_type erase(const key_type& key) { return m_ht.erase(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - size_type erase(const key_type& key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent - * and Compare::is_transparent exist. - * If so, K must be hashable and comparable to Key. - */ - template::value && has_is_transparent::value>::type* = nullptr> - size_type erase(const K& key) { return m_ht.erase(key); } - - /** - * @copydoc erase(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); } - - - - - void swap(bhopscotch_map& other) { other.m_ht.swap(m_ht); } - - /* - * Lookup - */ - T& at(const Key& key) { return m_ht.at(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } - - const T& at(const Key& key) const { return m_ht.at(key); } - - /** - * @copydoc at(const Key& key, std::size_t precalculated_hash) - */ - const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent - * and Compare::is_transparent exist. - * If so, K must be hashable and comparable to Key. - */ - template::value && has_is_transparent::value>::type* = nullptr> - T& at(const K& key) { return m_ht.at(key); } - - /** - * @copydoc at(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } - - /** - * @copydoc at(const K& key) - */ - template::value && has_is_transparent::value>::type* = nullptr> - const T& at(const K& key) const { return m_ht.at(key); } - - /** - * @copydoc at(const K& key, std::size_t precalculated_hash) - */ - template::value && has_is_transparent::value>::type* = nullptr> - const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } - - - - - T& operator[](const Key& key) { return m_ht[key]; } - T& operator[](Key&& key) { return m_ht[std::move(key)]; } - - - - - size_type count(const Key& key) const { return m_ht.count(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent - * and Compare::is_transparent exist. - * If so, K must be hashable and comparable to Key. - */ - template::value && has_is_transparent::value>::type* = nullptr> - size_type count(const K& key) const { return m_ht.count(key); } - - /** - * @copydoc count(const K& key) const - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } - - - - - iterator find(const Key& key) { return m_ht.find(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - const_iterator find(const Key& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const Key& key, std::size_t precalculated_hash) - */ - const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent - * and Compare::is_transparent exist. - * If so, K must be hashable and comparable to Key. - */ - template::value && has_is_transparent::value>::type* = nullptr> - iterator find(const K& key) { return m_ht.find(key); } - - /** - * @copydoc find(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - /** - * @copydoc find(const K& key) - */ - template::value && has_is_transparent::value>::type* = nullptr> - const_iterator find(const K& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const K& key, std::size_t precalculated_hash) - */ - template::value && has_is_transparent::value>::type* = nullptr> - const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } - - - - - std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent - * and Compare::is_transparent exist. - * If so, K must be hashable and comparable to Key. - */ - template::value && has_is_transparent::value>::type* = nullptr> - std::pair equal_range(const K& key) { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * @copydoc equal_range(const K& key) - */ - template::value && has_is_transparent::value>::type* = nullptr> - std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const K& key, std::size_t precalculated_hash) - */ - template::value && has_is_transparent::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - - - - /* - * Bucket interface - */ - size_type bucket_count() const { return m_ht.bucket_count(); } - size_type max_bucket_count() const { return m_ht.max_bucket_count(); } - - - /* - * Hash policy - */ - float load_factor() const { return m_ht.load_factor(); } - float max_load_factor() const { return m_ht.max_load_factor(); } - void max_load_factor(float ml) { m_ht.max_load_factor(ml); } - - void rehash(size_type count_) { m_ht.rehash(count_); } - void reserve(size_type count_) { m_ht.reserve(count_); } - - - /* - * Observers - */ - hasher hash_function() const { return m_ht.hash_function(); } - key_equal key_eq() const { return m_ht.key_eq(); } - key_compare key_comp() const { return m_ht.key_comp(); } - - /* - * Other - */ - - /** - * Convert a const_iterator to an iterator. - */ - iterator mutable_iterator(const_iterator pos) { - return m_ht.mutable_iterator(pos); - } - - size_type overflow_size() const noexcept { return m_ht.overflow_size(); } - - friend bool operator==(const bhopscotch_map& lhs, const bhopscotch_map& rhs) { - if(lhs.size() != rhs.size()) { - return false; - } - - for(const auto& element_lhs : lhs) { - const auto it_element_rhs = rhs.find(element_lhs.first); - if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) { - return false; - } - } - - return true; - } - - friend bool operator!=(const bhopscotch_map& lhs, const bhopscotch_map& rhs) { - return !operator==(lhs, rhs); - } - - friend void swap(bhopscotch_map& lhs, bhopscotch_map& rhs) { - lhs.swap(rhs); - } - - - -private: - ht m_ht; -}; - - -/** - * Same as `tsl::bhopscotch_map`. - */ -template, - class KeyEqual = std::equal_to, - class Compare = std::less, - class Allocator = std::allocator>, - unsigned int NeighborhoodSize = 62, - bool StoreHash = false> -using bhopscotch_pg_map = bhopscotch_map; - -} // end namespace tsl - -#endif diff --git a/feed/src/common/bhopscotch_set.h b/feed/src/common/bhopscotch_set.h deleted file mode 100755 index d9fc0037595f655ac6d7f465de2138ec9bcf83bf..0000000000000000000000000000000000000000 --- a/feed/src/common/bhopscotch_set.h +++ /dev/null @@ -1,529 +0,0 @@ -/** - * MIT License - * - * Copyright (c) 2017 Tessil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TSL_BHOPSCOTCH_SET_H -#define TSL_BHOPSCOTCH_SET_H - - -#include -#include -#include -#include -#include -#include -#include -#include -#include "paddle/fluid/feed/src/common/hopscotch_hash.h" - - -namespace tsl { - - -/** - * Similar to tsl::hopscotch_set but instead of using a list for overflowing elements it uses - * a binary search tree. It thus needs an additional template parameter Compare. Compare should - * be arithmetically coherent with KeyEqual. - * - * The binary search tree allows the set to have a worst-case scenario of O(log n) for search - * and delete, even if the hash function maps all the elements to the same bucket. - * For insert, the amortized worst case is O(log n), but the worst case is O(n) in case of rehash. - * - * This makes the set resistant to DoS attacks (but doesn't preclude you to have a good hash function, - * as an element in the bucket array is faster to retrieve than in the tree). - * - * @copydoc hopscotch_set - */ -template, - class KeyEqual = std::equal_to, - class Compare = std::less, - class Allocator = std::allocator, - unsigned int NeighborhoodSize = 62, - bool StoreHash = false, - class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> -class bhopscotch_set { -private: - template - using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; - - class KeySelect { - public: - using key_type = Key; - - const key_type& operator()(const Key& key) const { - return key; - } - - key_type& operator()(Key& key) { - return key; - } - }; - - - using overflow_container_type = std::set; - using ht = tsl::detail_hopscotch_hash::hopscotch_hash; - -public: - using key_type = typename ht::key_type; - using value_type = typename ht::value_type; - using size_type = typename ht::size_type; - using difference_type = typename ht::difference_type; - using hasher = typename ht::hasher; - using key_equal = typename ht::key_equal; - using key_compare = Compare; - using allocator_type = typename ht::allocator_type; - using reference = typename ht::reference; - using const_reference = typename ht::const_reference; - using pointer = typename ht::pointer; - using const_pointer = typename ht::const_pointer; - using iterator = typename ht::iterator; - using const_iterator = typename ht::const_iterator; - - - /* - * Constructors - */ - bhopscotch_set() : bhopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) { - } - - explicit bhopscotch_set(size_type bucket_count, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator(), - const Compare& comp = Compare()) : - m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, comp) - { - } - - bhopscotch_set(size_type bucket_count, - const Allocator& alloc) : bhopscotch_set(bucket_count, Hash(), KeyEqual(), alloc) - { - } - - bhopscotch_set(size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : bhopscotch_set(bucket_count, hash, KeyEqual(), alloc) - { - } - - explicit bhopscotch_set(const Allocator& alloc) : bhopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { - } - - template - bhopscotch_set(InputIt first, InputIt last, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : bhopscotch_set(bucket_count, hash, equal, alloc) - { - insert(first, last); - } - - template - bhopscotch_set(InputIt first, InputIt last, - size_type bucket_count, - const Allocator& alloc) : bhopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) - { - } - - template - bhopscotch_set(InputIt first, InputIt last, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : bhopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc) - { - } - - bhopscotch_set(std::initializer_list init, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : - bhopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) - { - } - - bhopscotch_set(std::initializer_list init, - size_type bucket_count, - const Allocator& alloc) : - bhopscotch_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) - { - } - - bhopscotch_set(std::initializer_list init, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : - bhopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) - { - } - - - bhopscotch_set& operator=(std::initializer_list ilist) { - m_ht.clear(); - - m_ht.reserve(ilist.size()); - m_ht.insert(ilist.begin(), ilist.end()); - - return *this; - } - - allocator_type get_allocator() const { return m_ht.get_allocator(); } - - - /* - * Iterators - */ - iterator begin() noexcept { return m_ht.begin(); } - const_iterator begin() const noexcept { return m_ht.begin(); } - const_iterator cbegin() const noexcept { return m_ht.cbegin(); } - - iterator end() noexcept { return m_ht.end(); } - const_iterator end() const noexcept { return m_ht.end(); } - const_iterator cend() const noexcept { return m_ht.cend(); } - - - /* - * Capacity - */ - bool empty() const noexcept { return m_ht.empty(); } - size_type size() const noexcept { return m_ht.size(); } - size_type max_size() const noexcept { return m_ht.max_size(); } - - /* - * Modifiers - */ - void clear() noexcept { m_ht.clear(); } - - - - - std::pair insert(const value_type& value) { return m_ht.insert(value); } - std::pair insert(value_type&& value) { return m_ht.insert(std::move(value)); } - - iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert(hint, value); } - iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert(hint, std::move(value)); } - - template - void insert(InputIt first, InputIt last) { m_ht.insert(first, last); } - void insert(std::initializer_list ilist) { m_ht.insert(ilist.begin(), ilist.end()); } - - - - - /** - * Due to the way elements are stored, emplace will need to move or copy the key-value once. - * The method is equivalent to insert(value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - std::pair emplace(Args&&... args) { return m_ht.emplace(std::forward(args)...); } - - - - - /** - * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. - * The method is equivalent to insert(hint, value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - iterator emplace_hint(const_iterator hint, Args&&... args) { - return m_ht.emplace_hint(hint, std::forward(args)...); - } - - - - - iterator erase(iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } - size_type erase(const key_type& key) { return m_ht.erase(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - size_type erase(const key_type& key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent - * and Compare::is_transparent exist. - * If so, K must be hashable and comparable to Key. - */ - template::value && has_is_transparent::value>::type* = nullptr> - size_type erase(const K& key) { return m_ht.erase(key); } - - /** - * @copydoc erase(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); } - - - - - void swap(bhopscotch_set& other) { other.m_ht.swap(m_ht); } - - - /* - * Lookup - */ - size_type count(const Key& key) const { return m_ht.count(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent - * and Compare::is_transparent exist. - * If so, K must be hashable and comparable to Key. - */ - template::value && has_is_transparent::value>::type* = nullptr> - size_type count(const K& key) const { return m_ht.count(key); } - - /** - * @copydoc count(const K& key) const - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } - - - - - iterator find(const Key& key) { return m_ht.find(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - const_iterator find(const Key& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const Key& key, std::size_t precalculated_hash) - */ - const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent - * and Compare::is_transparent exist. - * If so, K must be hashable and comparable to Key. - */ - template::value && has_is_transparent::value>::type* = nullptr> - iterator find(const K& key) { return m_ht.find(key); } - - /** - * @copydoc find(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - /** - * @copydoc find(const K& key) - */ - template::value && has_is_transparent::value>::type* = nullptr> - const_iterator find(const K& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } - - - - - std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent - * and Compare::is_transparent exist. - * If so, K must be hashable and comparable to Key. - */ - template::value && has_is_transparent::value>::type* = nullptr> - std::pair equal_range(const K& key) { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value && has_is_transparent::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * @copydoc equal_range(const K& key) - */ - template::value && has_is_transparent::value>::type* = nullptr> - std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const K& key, std::size_t precalculated_hash) - */ - template::value && has_is_transparent::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - - - - /* - * Bucket interface - */ - size_type bucket_count() const { return m_ht.bucket_count(); } - size_type max_bucket_count() const { return m_ht.max_bucket_count(); } - - - /* - * Hash policy - */ - float load_factor() const { return m_ht.load_factor(); } - float max_load_factor() const { return m_ht.max_load_factor(); } - void max_load_factor(float ml) { m_ht.max_load_factor(ml); } - - void rehash(size_type count_) { m_ht.rehash(count_); } - void reserve(size_type count_) { m_ht.reserve(count_); } - - - /* - * Observers - */ - hasher hash_function() const { return m_ht.hash_function(); } - key_equal key_eq() const { return m_ht.key_eq(); } - key_compare key_comp() const { return m_ht.key_comp(); } - - - /* - * Other - */ - - /** - * Convert a const_iterator to an iterator. - */ - iterator mutable_iterator(const_iterator pos) { - return m_ht.mutable_iterator(pos); - } - - size_type overflow_size() const noexcept { return m_ht.overflow_size(); } - - friend bool operator==(const bhopscotch_set& lhs, const bhopscotch_set& rhs) { - if(lhs.size() != rhs.size()) { - return false; - } - - for(const auto& element_lhs : lhs) { - const auto it_element_rhs = rhs.find(element_lhs); - if(it_element_rhs == rhs.cend()) { - return false; - } - } - - return true; - } - - friend bool operator!=(const bhopscotch_set& lhs, const bhopscotch_set& rhs) { - return !operator==(lhs, rhs); - } - - friend void swap(bhopscotch_set& lhs, bhopscotch_set& rhs) { - lhs.swap(rhs); - } - -private: - ht m_ht; -}; - - -/** - * Same as `tsl::bhopscotch_set`. - */ -template, - class KeyEqual = std::equal_to, - class Compare = std::less, - class Allocator = std::allocator, - unsigned int NeighborhoodSize = 62, - bool StoreHash = false> -using bhopscotch_pg_set = bhopscotch_set; - -} // end namespace tsl - -#endif diff --git a/feed/src/common/dict_plugin.cc b/feed/src/common/dict_plugin.cc deleted file mode 100755 index 2adcce63697d21d720a7794873d693df2bf142f5..0000000000000000000000000000000000000000 --- a/feed/src/common/dict_plugin.cc +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include "paddle/fluid/feed/src/common/dict_plugin.h" -#include "paddle/fluid/framework/io/fs.h" - -namespace paddle { -namespace framework { - -int FeasignCacheDict::Load( - const std::string& path, const std::string& converter) { - auto version = version_ + 1; - if (version >= versioned_entity_.size()) { - version = 0; - } - auto& entity = versioned_entity_[version]; - uint64_t data_count = 0; - auto file_list = fs_list(path); - for (auto& file_path : file_list) { - int err_no = 0; - int line_len = 0; - size_t buffer_size = 0; - char *buffer = nullptr; - char* data_ptr = NULL; - auto file = fs_open_read(file_path, &err_no, converter); - CHECK(err_no == 0); - while ((line_len = getline(&buffer, &buffer_size, file.get())) > 0) { - if (line_len <= 1) { - continue; - } - ++data_count; - entity.Append(strtoul(buffer, &data_ptr, 10), entity.Size()); - } - if (buffer != nullptr) { - free(buffer); - } - } - version_ = version; - std::cerr << "Load success data_count" << data_count << " to version:" << version_ << std::endl; - return 0; -} - -} // namespace framework -} // namespace paddle diff --git a/feed/src/common/dict_plugin.h b/feed/src/common/dict_plugin.h deleted file mode 100755 index f7c48b6304a223983b8afb8d6381e5a6a327c6e9..0000000000000000000000000000000000000000 --- a/feed/src/common/dict_plugin.h +++ /dev/null @@ -1,128 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include "paddle/fluid/feed/src/common/bhopscotch_map.h" - -namespace paddle { -namespace framework { -class DictPlugin { -public: - DictPlugin() {} - virtual ~DictPlugin() {} - virtual int Load(const std::string& path, const std::string& converter) = 0; -}; - -template -class KvEntity { -public: - KvEntity() {} - ~KvEntity() {} - uint32_t Size() { - return _key_list.size(); - } - void Append(const K& k, const V& v) { - if (_dict_data.find(k) != _dict_data.end()) { - return; - } - _key_list.push_back(k); - _dict_data.emplace(k, v); - } - std::vector _key_list; - tsl::bhopscotch_pg_map _dict_data; -}; - -template -class KvDictPlugin : public DictPlugin { -public: - KvDictPlugin() { - versioned_entity_.resize(2); - } - virtual ~KvDictPlugin() {} - - // GetValue with version, Return: value - virtual int GetValueWithVersion(uint32_t version, const K& key, V& v) { - CHECK(version < versioned_entity_.size()); - auto& entity = versioned_entity_[version]; - auto itr = entity._dict_data.find(key); - if (itr == entity._dict_data.end()) { - return -1; // miss - } - v = itr->second; - return 0; - } - - // GetValue without version, Return: value version - virtual int GetValue(const K& key, V& v, uint32_t& version) { - version = version_; - auto& entity = versioned_entity_[version]; - auto itr = entity._dict_data.find(key); - if (itr == entity._dict_data.end()) { - return -1; // miss - } - v = itr->second; - return 0; - } - - virtual int GetVersion() { - return version_; - } -protected: - uint32_t version_ = 0; - // double-buffer support version:0 1 - std::vector> versioned_entity_; -}; - -class FeasignCacheDict : public KvDictPlugin { -public: - FeasignCacheDict(){} - virtual ~FeasignCacheDict(){} - virtual int Load(const std::string& path, const std::string& converter); -}; - -class DictPluginManager { -public: - DictPluginManager() {} - virtual ~DictPluginManager(){} - - static DictPluginManager& Instance() { - static DictPluginManager manager; - return manager; - } - inline int CreateDict(const std::string& dict_name) { - #define PADDLE_DICT_PLUGIN_REGIST(dict) \ - if (dict_name == #dict) { \ - dicts_map_[dict_name].reset(new dict()); \ - return 0; \ - } - - PADDLE_DICT_PLUGIN_REGIST(FeasignCacheDict) - #undef PADDLE_DICT_PLUGIN_REGIST - return -1; - } - inline DictPlugin* GetDict(const std::string& dict_name) { - if (dicts_map_.count(dict_name)) { - return dicts_map_[dict_name].get(); - } - return nullptr; - } - inline int LoadDict(const std::string& dict_name, - const std::string& path, const std::string converter) { - auto dict = GetDict(dict_name); - if (!dict) { - return -1; - } - return dict->Load(path, converter); - } -private: - std::unordered_map> dicts_map_; -}; - -} // namespace framework -} // namespace paddle diff --git a/feed/src/common/hopscotch_growth_policy.h b/feed/src/common/hopscotch_growth_policy.h deleted file mode 100755 index bb7a257468f16a00859a4fdc615df52a46996709..0000000000000000000000000000000000000000 --- a/feed/src/common/hopscotch_growth_policy.h +++ /dev/null @@ -1,348 +0,0 @@ -/** - * MIT License - * - * Copyright (c) 2018 Tessil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H -#define TSL_HOPSCOTCH_GROWTH_POLICY_H - - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace tsl { -namespace hh { - -/** - * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows - * the table to use a mask operation instead of a modulo operation to map a hash to a bucket. - * - * GrowthFactor must be a power of two >= 2. - */ -template -class power_of_two_growth_policy { -public: - /** - * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter. - * This number is a minimum, the policy may update this value with a higher value if needed (but not lower). - * - * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and - * bucket_for_hash must always return 0 in this case. - */ - explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { - if(min_bucket_count_in_out > max_bucket_count()) { - throw std::length_error("The hash table exceeds its maxmimum size."); - } - - if(min_bucket_count_in_out > 0) { - min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out); - m_mask = min_bucket_count_in_out - 1; - } - else { - m_mask = 0; - } - } - - /** - * Return the bucket [0, bucket_count()) to which the hash belongs. - * If bucket_count() is 0, it must always return 0. - */ - std::size_t bucket_for_hash(std::size_t hash) const noexcept { - return hash & m_mask; - } - - /** - * Return the bucket count to use when the bucket array grows on rehash. - */ - std::size_t next_bucket_count() const { - if((m_mask + 1) > max_bucket_count() / GrowthFactor) { - throw std::length_error("The hash table exceeds its maxmimum size."); - } - - return (m_mask + 1) * GrowthFactor; - } - - /** - * Return the maximum number of buckets supported by the policy. - */ - std::size_t max_bucket_count() const { - // Largest power of two. - return (std::numeric_limits::max() / 2) + 1; - } - - /** - * Reset the growth policy as if it was created with a bucket count of 0. - * After a clear, the policy must always return 0 when bucket_for_hash is called. - */ - void clear() noexcept { - m_mask = 0; - } - -private: - static std::size_t round_up_to_power_of_two(std::size_t value) { - if(is_power_of_two(value)) { - return value; - } - - if(value == 0) { - return 1; - } - - --value; - for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { - value |= value >> i; - } - - return value + 1; - } - - static constexpr bool is_power_of_two(std::size_t value) { - return value != 0 && (value & (value - 1)) == 0; - } - -private: - static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2."); - - std::size_t m_mask; -}; - - -/** - * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash - * to a bucket. Slower but it can be useful if you want a slower growth. - */ -template> -class mod_growth_policy { -public: - explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) { - if(min_bucket_count_in_out > max_bucket_count()) { - throw std::length_error("The hash table exceeds its maxmimum size."); - } - - if(min_bucket_count_in_out > 0) { - m_mod = min_bucket_count_in_out; - } - else { - m_mod = 1; - } - } - - std::size_t bucket_for_hash(std::size_t hash) const noexcept { - return hash % m_mod; - } - - std::size_t next_bucket_count() const { - if(m_mod == max_bucket_count()) { - throw std::length_error("The hash table exceeds its maxmimum size."); - } - - const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); - if(!std::isnormal(next_bucket_count)) { - throw std::length_error("The hash table exceeds its maxmimum size."); - } - - if(next_bucket_count > double(max_bucket_count())) { - return max_bucket_count(); - } - else { - return std::size_t(next_bucket_count); - } - } - - std::size_t max_bucket_count() const { - return MAX_BUCKET_COUNT; - } - - void clear() noexcept { - m_mod = 1; - } - -private: - static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den; - static const std::size_t MAX_BUCKET_COUNT = - std::size_t(double( - std::numeric_limits::max() / REHASH_SIZE_MULTIPLICATION_FACTOR - )); - - static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1."); - - std::size_t m_mod; -}; - - - -namespace detail { - -static constexpr const std::array PRIMES = {{ - 1ull, 3ull, 5ull, 7ull, 11ull, 13ull, 17ull, 23ull, 29ull, 37ull, 47ull, - 59ull, 73ull, 97ull, 127ull, 151ull, 197ull, 251ull, 313ull, 397ull, - 499ull, 631ull, 797ull, 1009ull, 1259ull, 1597ull, 2011ull, 2539ull, - 3203ull, 4027ull, 5087ull, 6421ull, 8089ull, 10193ull, 12853ull, 16193ull, - 20399ull, 25717ull, 32401ull, 40823ull, 51437ull, 64811ull, 81649ull, - 102877ull, 129607ull, 163307ull, 205759ull, 259229ull, 326617ull, - 411527ull, 518509ull, 653267ull, 823117ull, 1037059ull, 1306601ull, - 1646237ull, 2074129ull, 2613229ull, 3292489ull, 4148279ull, 5226491ull, - 6584983ull, 8296553ull, 10453007ull, 13169977ull, 16593127ull, 20906033ull, - 26339969ull, 33186281ull, 41812097ull, 52679969ull, 66372617ull, - 83624237ull, 105359939ull, 132745199ull, 167248483ull, 210719881ull, - 265490441ull, 334496971ull, 421439783ull, 530980861ull, 668993977ull, - 842879579ull, 1061961721ull, 1337987929ull, 1685759167ull, 2123923447ull, - 2675975881ull, 3371518343ull, 4247846927ull, 5351951779ull, 6743036717ull, - 8495693897ull, 10703903591ull, 13486073473ull, 16991387857ull, - 21407807219ull, 26972146961ull, 33982775741ull, 42815614441ull, - 53944293929ull, 67965551447ull, 85631228929ull, 107888587883ull, - 135931102921ull, 171262457903ull, 215777175787ull, 271862205833ull, - 342524915839ull, 431554351609ull, 543724411781ull, 685049831731ull, - 863108703229ull, 1087448823553ull, 1370099663459ull, 1726217406467ull, - 2174897647073ull, 2740199326961ull, 3452434812973ull, 4349795294267ull, - 5480398654009ull, 6904869625999ull, 8699590588571ull, 10960797308051ull, - 13809739252051ull, 17399181177241ull, 21921594616111ull, 27619478504183ull, - 34798362354533ull, 43843189232363ull, 55238957008387ull, 69596724709081ull, - 87686378464759ull, 110477914016779ull, 139193449418173ull, - 175372756929481ull, 220955828033581ull, 278386898836457ull, - 350745513859007ull, 441911656067171ull, 556773797672909ull, - 701491027718027ull, 883823312134381ull, 1113547595345903ull, - 1402982055436147ull, 1767646624268779ull, 2227095190691797ull, - 2805964110872297ull, 3535293248537579ull, 4454190381383713ull, - 5611928221744609ull, 7070586497075177ull, 8908380762767489ull, - 11223856443489329ull, 14141172994150357ull, 17816761525534927ull, - 22447712886978529ull, 28282345988300791ull, 35633523051069991ull, - 44895425773957261ull, 56564691976601587ull, 71267046102139967ull, - 89790851547914507ull, 113129383953203213ull, 142534092204280003ull, - 179581703095829107ull, 226258767906406483ull, 285068184408560057ull, - 359163406191658253ull, 452517535812813007ull, 570136368817120201ull, - 718326812383316683ull, 905035071625626043ull, 1140272737634240411ull, - 1436653624766633509ull, 1810070143251252131ull, 2280545475268481167ull, - 2873307249533267101ull, 3620140286502504283ull, 4561090950536962147ull, - 5746614499066534157ull, 7240280573005008577ull, 9122181901073924329ull, - 11493228998133068689ull, 14480561146010017169ull, 18446744073709551557ull -}}; - -template -static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; } - -// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the -// compiler can optimize the modulo code better with a constant known at the compilation. -static constexpr const std::array MOD_PRIME = {{ - &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, - &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, - &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, - &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>, &mod<40>, - &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>, &mod<46>, &mod<47>, &mod<48>, &mod<49>, &mod<50>, - &mod<51>, &mod<52>, &mod<53>, &mod<54>, &mod<55>, &mod<56>, &mod<57>, &mod<58>, &mod<59>, &mod<60>, - &mod<61>, &mod<62>, &mod<63>, &mod<64>, &mod<65>, &mod<66>, &mod<67>, &mod<68>, &mod<69>, &mod<70>, - &mod<71>, &mod<72>, &mod<73>, &mod<74>, &mod<75>, &mod<76>, &mod<77>, &mod<78>, &mod<79>, &mod<80>, - &mod<81>, &mod<82>, &mod<83>, &mod<84>, &mod<85>, &mod<86>, &mod<87>, &mod<88>, &mod<89>, &mod<90>, - &mod<91>, &mod<92>, &mod<93>, &mod<94>, &mod<95>, &mod<96>, &mod<97>, &mod<98>, &mod<99>, &mod<100>, - &mod<101>, &mod<102>, &mod<103>, &mod<104>, &mod<105>, &mod<106>, &mod<107>, &mod<108>, &mod<109>, &mod<110>, - &mod<111>, &mod<112>, &mod<113>, &mod<114>, &mod<115>, &mod<116>, &mod<117>, &mod<118>, &mod<119>, &mod<120>, - &mod<121>, &mod<122>, &mod<123>, &mod<124>, &mod<125>, &mod<126>, &mod<127>, &mod<128>, &mod<129>, &mod<130>, - &mod<131>, &mod<132>, &mod<133>, &mod<134>, &mod<135>, &mod<136>, &mod<137>, &mod<138>, &mod<139>, &mod<140>, - &mod<141>, &mod<142>, &mod<143>, &mod<144>, &mod<145>, &mod<146>, &mod<147>, &mod<148>, &mod<149>, &mod<150>, - &mod<151>, &mod<152>, &mod<153>, &mod<154>, &mod<155>, &mod<156>, &mod<157>, &mod<158>, &mod<159>, &mod<160>, - &mod<161>, &mod<162>, &mod<163>, &mod<164>, &mod<165>, &mod<166>, &mod<167>, &mod<168>, &mod<169>, &mod<170>, - &mod<171>, &mod<172>, &mod<173>, &mod<174>, &mod<175>, &mod<176>, &mod<177>, &mod<178>, &mod<179>, &mod<180>, - &mod<181>, &mod<182>, &mod<183>, &mod<184>, &mod<185> -}}; - -} - -/** - * Grow the hash table by using prime numbers as bucket count. Slower than tsl::hh::power_of_two_growth_policy in - * general but will probably distribute the values around better in the buckets with a poor hash function. - * - * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers. - * - * With a switch the code would look like: - * \code - * switch(iprime) { // iprime is the current prime of the hash table - * case 0: hash % 5ul; - * break; - * case 1: hash % 17ul; - * break; - * case 2: hash % 29ul; - * break; - * ... - * } - * \endcode - * - * Due to the constant variable in the modulo the compiler is able to optimize the operation - * by a series of multiplications, substractions and shifts. - * - * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement. - */ -class prime_growth_policy { -public: - explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) { - auto it_prime = std::lower_bound(detail::PRIMES.begin(), - detail::PRIMES.end(), min_bucket_count_in_out); - if(it_prime == detail::PRIMES.end()) { - throw std::length_error("The hash table exceeds its maxmimum size."); - } - - m_iprime = static_cast(std::distance(detail::PRIMES.begin(), it_prime)); - if(min_bucket_count_in_out > 0) { - min_bucket_count_in_out = *it_prime; - } - else { - min_bucket_count_in_out = 0; - } - } - - std::size_t bucket_for_hash(std::size_t hash) const noexcept { - return detail::MOD_PRIME[m_iprime](hash); - } - - std::size_t next_bucket_count() const { - if(m_iprime + 1 >= detail::PRIMES.size()) { - throw std::length_error("The hash table exceeds its maxmimum size."); - } - - return detail::PRIMES[m_iprime + 1]; - } - - std::size_t max_bucket_count() const { - return detail::PRIMES.back(); - } - - void clear() noexcept { - m_iprime = 0; - } - -private: - unsigned int m_iprime; - - static_assert(std::numeric_limits::max() >= detail::PRIMES.size(), - "The type of m_iprime is not big enough."); -}; - -} -} - -#endif diff --git a/feed/src/common/hopscotch_hash.h b/feed/src/common/hopscotch_hash.h deleted file mode 100755 index 69f8f4d376b0525caee632c4b380eb0a0003cecb..0000000000000000000000000000000000000000 --- a/feed/src/common/hopscotch_hash.h +++ /dev/null @@ -1,1817 +0,0 @@ -/** - * MIT License - * - * Copyright (c) 2017 Tessil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TSL_HOPSCOTCH_HASH_H -#define TSL_HOPSCOTCH_HASH_H - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "paddle/fluid/feed/src/common/hopscotch_growth_policy.h" - - - -#if (defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)) -# define TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR -#endif - - -/* - * Only activate tsl_hh_assert if TSL_DEBUG is defined. - * This way we avoid the performance hit when NDEBUG is not defined with assert as tsl_hh_assert is used a lot - * (people usually compile with "-O3" and not "-O3 -DNDEBUG"). - */ -#ifdef TSL_DEBUG -# define tsl_hh_assert(expr) assert(expr) -#else -# define tsl_hh_assert(expr) (static_cast(0)) -#endif - - -namespace tsl { - -namespace detail_hopscotch_hash { - - -template -struct make_void { - using type = void; -}; - - -template -struct has_is_transparent : std::false_type { -}; - -template -struct has_is_transparent::type> : std::true_type { -}; - - -template -struct has_key_compare : std::false_type { -}; - -template -struct has_key_compare::type> : std::true_type { -}; - - -template -struct is_power_of_two_policy: std::false_type { -}; - -template -struct is_power_of_two_policy>: std::true_type { -}; - - - - - -/* - * smallest_type_for_min_bits::type returns the smallest type that can fit MinBits. - */ -static const std::size_t SMALLEST_TYPE_MAX_BITS_SUPPORTED = 64; -template -class smallest_type_for_min_bits { -}; - -template -class smallest_type_for_min_bits 0) && (MinBits <= 8)>::type> { -public: - using type = std::uint_least8_t; -}; - -template -class smallest_type_for_min_bits 8) && (MinBits <= 16)>::type> { -public: - using type = std::uint_least16_t; -}; - -template -class smallest_type_for_min_bits 16) && (MinBits <= 32)>::type> { -public: - using type = std::uint_least32_t; -}; - -template -class smallest_type_for_min_bits 32) && (MinBits <= 64)>::type> { -public: - using type = std::uint_least64_t; -}; - - - -/* - * Each bucket may store up to three elements: - * - An aligned storage to store a value_type object with placement-new. - * - An (optional) hash of the value in the bucket. - * - An unsigned integer of type neighborhood_bitmap used to tell us which buckets in the neighborhood of the - * current bucket contain a value with a hash belonging to the current bucket. - * - * For a bucket 'bct', a bit 'i' (counting from 0 and from the least significant bit to the most significant) - * set to 1 means that the bucket 'bct + i' contains a value with a hash belonging to bucket 'bct'. - * The bits used for that, start from the third least significant bit. - * The two least significant bits are reserved: - * - The least significant bit is set to 1 if there is a value in the bucket storage. - * - The second least significant bit is set to 1 if there is an overflow. More than NeighborhoodSize values - * give the same hash, all overflow values are stored in the m_overflow_elements list of the map. - * - * Details regarding hopscotch hashing an its implementation can be found here: - * https://tessil.github.io/2016/08/29/hopscotch-hashing.html - */ -static const std::size_t NB_RESERVED_BITS_IN_NEIGHBORHOOD = 2; - - -using truncated_hash_type = std::uint_least32_t; - -/** - * Helper class that stores a truncated hash if StoreHash is true and nothing otherwise. - */ -template -class hopscotch_bucket_hash { -public: - bool bucket_hash_equal(std::size_t /*hash*/) const noexcept { - return true; - } - - truncated_hash_type truncated_bucket_hash() const noexcept { - return 0; - } - -protected: - void copy_hash(const hopscotch_bucket_hash& ) noexcept { - } - - void set_hash(truncated_hash_type /*hash*/) noexcept { - } -}; - -template<> -class hopscotch_bucket_hash { -public: - bool bucket_hash_equal(std::size_t hash) const noexcept { - return m_hash == truncated_hash_type(hash); - } - - truncated_hash_type truncated_bucket_hash() const noexcept { - return m_hash; - } - -protected: - void copy_hash(const hopscotch_bucket_hash& bucket) noexcept { - m_hash = bucket.m_hash; - } - - void set_hash(truncated_hash_type hash) noexcept { - m_hash = hash; - } - -private: - truncated_hash_type m_hash; -}; - - -template -class hopscotch_bucket: public hopscotch_bucket_hash { -private: - static const std::size_t MIN_NEIGHBORHOOD_SIZE = 4; - static const std::size_t MAX_NEIGHBORHOOD_SIZE = SMALLEST_TYPE_MAX_BITS_SUPPORTED - NB_RESERVED_BITS_IN_NEIGHBORHOOD; - - - static_assert(NeighborhoodSize >= 4, "NeighborhoodSize should be >= 4."); - // We can't put a variable in the message, ensure coherence - static_assert(MIN_NEIGHBORHOOD_SIZE == 4, ""); - - static_assert(NeighborhoodSize <= 62, "NeighborhoodSize should be <= 62."); - // We can't put a variable in the message, ensure coherence - static_assert(MAX_NEIGHBORHOOD_SIZE == 62, ""); - - - static_assert(!StoreHash || NeighborhoodSize <= 30, - "NeighborhoodSize should be <= 30 if StoreHash is true."); - // We can't put a variable in the message, ensure coherence - static_assert(MAX_NEIGHBORHOOD_SIZE - 32 == 30, ""); - - using bucket_hash = hopscotch_bucket_hash; - -public: - using value_type = ValueType; - using neighborhood_bitmap = - typename smallest_type_for_min_bits::type; - - - hopscotch_bucket() noexcept: bucket_hash(), m_neighborhood_infos(0) { - tsl_hh_assert(empty()); - } - - - hopscotch_bucket(const hopscotch_bucket& bucket) - noexcept(std::is_nothrow_copy_constructible::value): bucket_hash(bucket), - m_neighborhood_infos(0) - { - if(!bucket.empty()) { - ::new (static_cast(std::addressof(m_value))) value_type(bucket.value()); - } - - m_neighborhood_infos = bucket.m_neighborhood_infos; - } - - hopscotch_bucket(hopscotch_bucket&& bucket) - noexcept(std::is_nothrow_move_constructible::value) : bucket_hash(std::move(bucket)), - m_neighborhood_infos(0) - { - if(!bucket.empty()) { - ::new (static_cast(std::addressof(m_value))) value_type(std::move(bucket.value())); - } - - m_neighborhood_infos = bucket.m_neighborhood_infos; - } - - hopscotch_bucket& operator=(const hopscotch_bucket& bucket) - noexcept(std::is_nothrow_copy_constructible::value) - { - if(this != &bucket) { - remove_value(); - - bucket_hash::operator=(bucket); - if(!bucket.empty()) { - ::new (static_cast(std::addressof(m_value))) value_type(bucket.value()); - } - - m_neighborhood_infos = bucket.m_neighborhood_infos; - } - - return *this; - } - - hopscotch_bucket& operator=(hopscotch_bucket&& ) = delete; - - ~hopscotch_bucket() noexcept { - if(!empty()) { - destroy_value(); - } - } - - neighborhood_bitmap neighborhood_infos() const noexcept { - return neighborhood_bitmap(m_neighborhood_infos >> NB_RESERVED_BITS_IN_NEIGHBORHOOD); - } - - void set_overflow(bool has_overflow) noexcept { - if(has_overflow) { - m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 2); - } - else { - m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~2); - } - } - - bool has_overflow() const noexcept { - return (m_neighborhood_infos & 2) != 0; - } - - bool empty() const noexcept { - return (m_neighborhood_infos & 1) == 0; - } - - void toggle_neighbor_presence(std::size_t ineighbor) noexcept { - tsl_hh_assert(ineighbor <= NeighborhoodSize); - m_neighborhood_infos = neighborhood_bitmap( - m_neighborhood_infos ^ (1ull << (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD))); - } - - bool check_neighbor_presence(std::size_t ineighbor) const noexcept { - tsl_hh_assert(ineighbor <= NeighborhoodSize); - if(((m_neighborhood_infos >> (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)) & 1) == 1) { - return true; - } - - return false; - } - - value_type& value() noexcept { - tsl_hh_assert(!empty()); - return *reinterpret_cast(std::addressof(m_value)); - } - - const value_type& value() const noexcept { - tsl_hh_assert(!empty()); - return *reinterpret_cast(std::addressof(m_value)); - } - - template - void set_value_of_empty_bucket(truncated_hash_type hash, Args&&... value_type_args) { - tsl_hh_assert(empty()); - - ::new (static_cast(std::addressof(m_value))) value_type(std::forward(value_type_args)...); - set_empty(false); - this->set_hash(hash); - } - - void swap_value_into_empty_bucket(hopscotch_bucket& empty_bucket) { - tsl_hh_assert(empty_bucket.empty()); - if(!empty()) { - ::new (static_cast(std::addressof(empty_bucket.m_value))) value_type(std::move(value())); - empty_bucket.copy_hash(*this); - empty_bucket.set_empty(false); - - destroy_value(); - set_empty(true); - } - } - - void remove_value() noexcept { - if(!empty()) { - destroy_value(); - set_empty(true); - } - } - - void clear() noexcept { - if(!empty()) { - destroy_value(); - } - - m_neighborhood_infos = 0; - tsl_hh_assert(empty()); - } - - static truncated_hash_type truncate_hash(std::size_t hash) noexcept { - return truncated_hash_type(hash); - } - -private: - void set_empty(bool is_empty) noexcept { - if(is_empty) { - m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~1); - } - else { - m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 1); - } - } - - void destroy_value() noexcept { - tsl_hh_assert(!empty()); - value().~value_type(); - } - -private: - using storage = typename std::aligned_storage::type; - - neighborhood_bitmap m_neighborhood_infos; - storage m_value; -}; - - -/** - * Internal common class used by (b)hopscotch_map and (b)hopscotch_set. - * - * ValueType is what will be stored by hopscotch_hash (usually std::pair for a map and Key for a set). - * - * KeySelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the key. - * - * ValueSelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the value. - * ValueSelect should be void if there is no value (in a set for example). - * - * OverflowContainer will be used as containers for overflown elements. Usually it should be a list - * or a set/map. - */ -template -class hopscotch_hash: private Hash, private KeyEqual, private GrowthPolicy { -private: - template - using has_mapped_type = typename std::integral_constant::value>; - - static_assert(noexcept(std::declval().bucket_for_hash(std::size_t(0))), "GrowthPolicy::bucket_for_hash must be noexcept."); - static_assert(noexcept(std::declval().clear()), "GrowthPolicy::clear must be noexcept."); - -public: - template - class hopscotch_iterator; - - using key_type = typename KeySelect::key_type; - using value_type = ValueType; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using hasher = Hash; - using key_equal = KeyEqual; - using allocator_type = Allocator; - using reference = value_type&; - using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; - using iterator = hopscotch_iterator; - using const_iterator = hopscotch_iterator; - -private: - using hopscotch_bucket = tsl::detail_hopscotch_hash::hopscotch_bucket; - using neighborhood_bitmap = typename hopscotch_bucket::neighborhood_bitmap; - - using buckets_allocator = typename std::allocator_traits::template rebind_alloc; - using buckets_container_type = std::vector; - - using overflow_container_type = OverflowContainer; - - static_assert(std::is_same::value, - "OverflowContainer should have ValueType as type."); - - static_assert(std::is_same::value, - "Invalid allocator, not the same type as the value_type."); - - - using iterator_buckets = typename buckets_container_type::iterator; - using const_iterator_buckets = typename buckets_container_type::const_iterator; - - using iterator_overflow = typename overflow_container_type::iterator; - using const_iterator_overflow = typename overflow_container_type::const_iterator; - -public: - /** - * The `operator*()` and `operator->()` methods return a const reference and const pointer respectively to the - * stored value type. - * - * In case of a map, to get a modifiable reference to the value associated to a key (the `.second` in the - * stored pair), you have to call `value()`. - */ - template - class hopscotch_iterator { - friend class hopscotch_hash; - private: - using iterator_bucket = typename std::conditional::type; - using iterator_overflow = typename std::conditional::type; - - - hopscotch_iterator(iterator_bucket buckets_iterator, iterator_bucket buckets_end_iterator, - iterator_overflow overflow_iterator) noexcept : - m_buckets_iterator(buckets_iterator), m_buckets_end_iterator(buckets_end_iterator), - m_overflow_iterator(overflow_iterator) - { - } - - public: - using iterator_category = std::forward_iterator_tag; - using value_type = const typename hopscotch_hash::value_type; - using difference_type = std::ptrdiff_t; - using reference = value_type&; - using pointer = value_type*; - - - hopscotch_iterator() noexcept { - } - - // Copy constructor from iterator to const_iterator. - template::type* = nullptr> - hopscotch_iterator(const hopscotch_iterator& other) noexcept : - m_buckets_iterator(other.m_buckets_iterator), m_buckets_end_iterator(other.m_buckets_end_iterator), - m_overflow_iterator(other.m_overflow_iterator) - { - } - - hopscotch_iterator(const hopscotch_iterator& other) = default; - hopscotch_iterator(hopscotch_iterator&& other) = default; - hopscotch_iterator& operator=(const hopscotch_iterator& other) = default; - hopscotch_iterator& operator=(hopscotch_iterator&& other) = default; - - const typename hopscotch_hash::key_type& key() const { - if(m_buckets_iterator != m_buckets_end_iterator) { - return KeySelect()(m_buckets_iterator->value()); - } - - return KeySelect()(*m_overflow_iterator); - } - - template::value>::type* = nullptr> - typename std::conditional< - IsConst, - const typename U::value_type&, - typename U::value_type&>::type value() const - { - if(m_buckets_iterator != m_buckets_end_iterator) { - return U()(m_buckets_iterator->value()); - } - - return U()(*m_overflow_iterator); - } - - reference operator*() const { - if(m_buckets_iterator != m_buckets_end_iterator) { - return m_buckets_iterator->value(); - } - - return *m_overflow_iterator; - } - - pointer operator->() const { - if(m_buckets_iterator != m_buckets_end_iterator) { - return std::addressof(m_buckets_iterator->value()); - } - - return std::addressof(*m_overflow_iterator); - } - - hopscotch_iterator& operator++() { - if(m_buckets_iterator == m_buckets_end_iterator) { - ++m_overflow_iterator; - return *this; - } - - do { - ++m_buckets_iterator; - } while(m_buckets_iterator != m_buckets_end_iterator && m_buckets_iterator->empty()); - - return *this; - } - - hopscotch_iterator operator++(int) { - hopscotch_iterator tmp(*this); - ++*this; - - return tmp; - } - - friend bool operator==(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { - return lhs.m_buckets_iterator == rhs.m_buckets_iterator && - lhs.m_overflow_iterator == rhs.m_overflow_iterator; - } - - friend bool operator!=(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { - return !(lhs == rhs); - } - - private: - iterator_bucket m_buckets_iterator; - iterator_bucket m_buckets_end_iterator; - iterator_overflow m_overflow_iterator; - }; - -public: - template::value>::type* = nullptr> - hopscotch_hash(size_type bucket_count, - const Hash& hash, - const KeyEqual& equal, - const Allocator& alloc, - float max_load_factor) : Hash(hash), - KeyEqual(equal), - GrowthPolicy(bucket_count), - m_buckets_data(alloc), - m_overflow_elements(alloc), - m_buckets(static_empty_bucket_ptr()), - m_nb_elements(0) - { - if(bucket_count > max_bucket_count()) { - throw std::length_error("The map exceeds its maxmimum size."); - } - - if(bucket_count > 0) { - static_assert(NeighborhoodSize - 1 > 0, ""); - - // Can't directly construct with the appropriate size in the initializer - // as m_buckets_data(bucket_count, alloc) is not supported by GCC 4.8 - m_buckets_data.resize(bucket_count + NeighborhoodSize - 1); - m_buckets = m_buckets_data.data(); - } - - - this->max_load_factor(max_load_factor); - - - // Check in the constructor instead of outside of a function to avoi compilation issues - // when value_type is not complete. - static_assert(std::is_nothrow_move_constructible::value || - std::is_copy_constructible::value, - "value_type must be either copy constructible or nothrow move constructible."); - } - - template::value>::type* = nullptr> - hopscotch_hash(size_type bucket_count, - const Hash& hash, - const KeyEqual& equal, - const Allocator& alloc, - float max_load_factor, - const typename OC::key_compare& comp) : Hash(hash), - KeyEqual(equal), - GrowthPolicy(bucket_count), - m_buckets_data(alloc), - m_overflow_elements(comp, alloc), - m_buckets(static_empty_bucket_ptr()), - m_nb_elements(0) - { - - if(bucket_count > max_bucket_count()) { - throw std::length_error("The map exceeds its maxmimum size."); - } - - if(bucket_count > 0) { - static_assert(NeighborhoodSize - 1 > 0, ""); - - // Can't directly construct with the appropriate size in the initializer - // as m_buckets_data(bucket_count, alloc) is not supported by GCC 4.8 - m_buckets_data.resize(bucket_count + NeighborhoodSize - 1); - m_buckets = m_buckets_data.data(); - } - - - this->max_load_factor(max_load_factor); - - - // Check in the constructor instead of outside of a function to avoi compilation issues - // when value_type is not complete. - static_assert(std::is_nothrow_move_constructible::value || - std::is_copy_constructible::value, - "value_type must be either copy constructible or nothrow move constructible."); - } - - hopscotch_hash(const hopscotch_hash& other): - Hash(other), - KeyEqual(other), - GrowthPolicy(other), - m_buckets_data(other.m_buckets_data), - m_overflow_elements(other.m_overflow_elements), - m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr(): - m_buckets_data.data()), - m_nb_elements(other.m_nb_elements), - m_max_load_factor(other.m_max_load_factor), - m_max_load_threshold_rehash(other.m_max_load_threshold_rehash), - m_min_load_threshold_rehash(other.m_min_load_threshold_rehash) - { - } - - hopscotch_hash(hopscotch_hash&& other) - noexcept( - std::is_nothrow_move_constructible::value && - std::is_nothrow_move_constructible::value && - std::is_nothrow_move_constructible::value && - std::is_nothrow_move_constructible::value && - std::is_nothrow_move_constructible::value - ): - Hash(std::move(static_cast(other))), - KeyEqual(std::move(static_cast(other))), - GrowthPolicy(std::move(static_cast(other))), - m_buckets_data(std::move(other.m_buckets_data)), - m_overflow_elements(std::move(other.m_overflow_elements)), - m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr(): - m_buckets_data.data()), - m_nb_elements(other.m_nb_elements), - m_max_load_factor(other.m_max_load_factor), - m_max_load_threshold_rehash(other.m_max_load_threshold_rehash), - m_min_load_threshold_rehash(other.m_min_load_threshold_rehash) - { - other.GrowthPolicy::clear(); - other.m_buckets_data.clear(); - other.m_overflow_elements.clear(); - other.m_buckets = static_empty_bucket_ptr(); - other.m_nb_elements = 0; - other.m_max_load_threshold_rehash = 0; - other.m_min_load_threshold_rehash = 0; - } - - hopscotch_hash& operator=(const hopscotch_hash& other) { - if(&other != this) { - Hash::operator=(other); - KeyEqual::operator=(other); - GrowthPolicy::operator=(other); - - m_buckets_data = other.m_buckets_data; - m_overflow_elements = other.m_overflow_elements; - m_buckets = m_buckets_data.empty()?static_empty_bucket_ptr(): - m_buckets_data.data(); - m_nb_elements = other.m_nb_elements; - m_max_load_factor = other.m_max_load_factor; - m_max_load_threshold_rehash = other.m_max_load_threshold_rehash; - m_min_load_threshold_rehash = other.m_min_load_threshold_rehash; - } - - return *this; - } - - hopscotch_hash& operator=(hopscotch_hash&& other) { - other.swap(*this); - other.clear(); - - return *this; - } - - allocator_type get_allocator() const { - return m_buckets_data.get_allocator(); - } - - - /* - * Iterators - */ - iterator begin() noexcept { - auto begin = m_buckets_data.begin(); - while(begin != m_buckets_data.end() && begin->empty()) { - ++begin; - } - - return iterator(begin, m_buckets_data.end(), m_overflow_elements.begin()); - } - - const_iterator begin() const noexcept { - return cbegin(); - } - - const_iterator cbegin() const noexcept { - auto begin = m_buckets_data.cbegin(); - while(begin != m_buckets_data.cend() && begin->empty()) { - ++begin; - } - - return const_iterator(begin, m_buckets_data.cend(), m_overflow_elements.cbegin()); - } - - iterator end() noexcept { - return iterator(m_buckets_data.end(), m_buckets_data.end(), m_overflow_elements.end()); - } - - const_iterator end() const noexcept { - return cend(); - } - - const_iterator cend() const noexcept { - return const_iterator(m_buckets_data.cend(), m_buckets_data.cend(), m_overflow_elements.cend()); - } - - - /* - * Capacity - */ - bool empty() const noexcept { - return m_nb_elements == 0; - } - - size_type size() const noexcept { - return m_nb_elements; - } - - size_type max_size() const noexcept { - return m_buckets_data.max_size(); - } - - /* - * Modifiers - */ - void clear() noexcept { - for(auto& bucket: m_buckets_data) { - bucket.clear(); - } - - m_overflow_elements.clear(); - m_nb_elements = 0; - } - - - std::pair insert(const value_type& value) { - return insert_impl(value); - } - - template::value>::type* = nullptr> - std::pair insert(P&& value) { - return insert_impl(value_type(std::forward

(value))); - } - - std::pair insert(value_type&& value) { - return insert_impl(std::move(value)); - } - - - iterator insert(const_iterator hint, const value_type& value) { - if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { - return mutable_iterator(hint); - } - - return insert(value).first; - } - - template::value>::type* = nullptr> - iterator insert(const_iterator hint, P&& value) { - return emplace_hint(hint, std::forward

(value)); - } - - iterator insert(const_iterator hint, value_type&& value) { - if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { - return mutable_iterator(hint); - } - - return insert(std::move(value)).first; - } - - - template - void insert(InputIt first, InputIt last) { - if(std::is_base_of::iterator_category>::value) - { - const auto nb_elements_insert = std::distance(first, last); - const std::size_t nb_elements_in_buckets = m_nb_elements - m_overflow_elements.size(); - const std::size_t nb_free_buckets = m_max_load_threshold_rehash - nb_elements_in_buckets; - tsl_hh_assert(m_nb_elements >= m_overflow_elements.size()); - tsl_hh_assert(m_max_load_threshold_rehash >= nb_elements_in_buckets); - - if(nb_elements_insert > 0 && nb_free_buckets < std::size_t(nb_elements_insert)) { - reserve(nb_elements_in_buckets + std::size_t(nb_elements_insert)); - } - } - - for(; first != last; ++first) { - insert(*first); - } - } - - - template - std::pair insert_or_assign(const key_type& k, M&& obj) { - return insert_or_assign_impl(k, std::forward(obj)); - } - - template - std::pair insert_or_assign(key_type&& k, M&& obj) { - return insert_or_assign_impl(std::move(k), std::forward(obj)); - } - - - template - iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { - if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { - auto it = mutable_iterator(hint); - it.value() = std::forward(obj); - - return it; - } - - return insert_or_assign(k, std::forward(obj)).first; - } - - template - iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { - if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { - auto it = mutable_iterator(hint); - it.value() = std::forward(obj); - - return it; - } - - return insert_or_assign(std::move(k), std::forward(obj)).first; - } - - - template - std::pair emplace(Args&&... args) { - return insert(value_type(std::forward(args)...)); - } - - template - iterator emplace_hint(const_iterator hint, Args&&... args) { - return insert(hint, value_type(std::forward(args)...)); - } - - template - std::pair try_emplace(const key_type& k, Args&&... args) { - return try_emplace_impl(k, std::forward(args)...); - } - - template - std::pair try_emplace(key_type&& k, Args&&... args) { - return try_emplace_impl(std::move(k), std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { - if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { - return mutable_iterator(hint); - } - - return try_emplace(k, std::forward(args)...).first; - } - - template - iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { - if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { - return mutable_iterator(hint); - } - - return try_emplace(std::move(k), std::forward(args)...).first; - } - - - /** - * Here to avoid `template size_type erase(const K& key)` being used when - * we use an iterator instead of a const_iterator. - */ - iterator erase(iterator pos) { - return erase(const_iterator(pos)); - } - - iterator erase(const_iterator pos) { - const std::size_t ibucket_for_hash = bucket_for_hash(hash_key(pos.key())); - - if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) { - auto it_bucket = m_buckets_data.begin() + std::distance(m_buckets_data.cbegin(), pos.m_buckets_iterator); - erase_from_bucket(*it_bucket, ibucket_for_hash); - - return ++iterator(it_bucket, m_buckets_data.end(), m_overflow_elements.begin()); - } - else { - auto it_next_overflow = erase_from_overflow(pos.m_overflow_iterator, ibucket_for_hash); - return iterator(m_buckets_data.end(), m_buckets_data.end(), it_next_overflow); - } - } - - iterator erase(const_iterator first, const_iterator last) { - if(first == last) { - return mutable_iterator(first); - } - - auto to_delete = erase(first); - while(to_delete != last) { - to_delete = erase(to_delete); - } - - return to_delete; - } - - template - size_type erase(const K& key) { - return erase(key, hash_key(key)); - } - - template - size_type erase(const K& key, std::size_t hash) { - const std::size_t ibucket_for_hash = bucket_for_hash(hash); - - hopscotch_bucket* bucket_found = find_in_buckets(key, hash, m_buckets + ibucket_for_hash); - if(bucket_found != nullptr) { - erase_from_bucket(*bucket_found, ibucket_for_hash); - - return 1; - } - - if(m_buckets[ibucket_for_hash].has_overflow()) { - auto it_overflow = find_in_overflow(key); - if(it_overflow != m_overflow_elements.end()) { - erase_from_overflow(it_overflow, ibucket_for_hash); - - return 1; - } - } - - return 0; - } - - void swap(hopscotch_hash& other) { - using std::swap; - - swap(static_cast(*this), static_cast(other)); - swap(static_cast(*this), static_cast(other)); - swap(static_cast(*this), static_cast(other)); - swap(m_buckets_data, other.m_buckets_data); - swap(m_overflow_elements, other.m_overflow_elements); - swap(m_buckets, other.m_buckets); - swap(m_nb_elements, other.m_nb_elements); - swap(m_max_load_factor, other.m_max_load_factor); - swap(m_max_load_threshold_rehash, other.m_max_load_threshold_rehash); - swap(m_min_load_threshold_rehash, other.m_min_load_threshold_rehash); - } - - - /* - * Lookup - */ - template::value>::type* = nullptr> - typename U::value_type& at(const K& key) { - return at(key, hash_key(key)); - } - - template::value>::type* = nullptr> - typename U::value_type& at(const K& key, std::size_t hash) { - return const_cast(static_cast(this)->at(key, hash)); - } - - - template::value>::type* = nullptr> - const typename U::value_type& at(const K& key) const { - return at(key, hash_key(key)); - } - - template::value>::type* = nullptr> - const typename U::value_type& at(const K& key, std::size_t hash) const { - using T = typename U::value_type; - - const T* value = find_value_impl(key, hash, m_buckets + bucket_for_hash(hash)); - if(value == nullptr) { - throw std::out_of_range("Couldn't find key."); - } - else { - return *value; - } - } - - - template::value>::type* = nullptr> - typename U::value_type& operator[](K&& key) { - using T = typename U::value_type; - - const std::size_t hash = hash_key(key); - const std::size_t ibucket_for_hash = bucket_for_hash(hash); - - T* value = find_value_impl(key, hash, m_buckets + ibucket_for_hash); - if(value != nullptr) { - return *value; - } - else { - return insert_value(ibucket_for_hash, hash, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), - std::forward_as_tuple()).first.value(); - } - } - - - template - size_type count(const K& key) const { - return count(key, hash_key(key)); - } - - template - size_type count(const K& key, std::size_t hash) const { - return count_impl(key, hash, m_buckets + bucket_for_hash(hash)); - } - - - template - iterator find(const K& key) { - return find(key, hash_key(key)); - } - - template - iterator find(const K& key, std::size_t hash) { - return find_impl(key, hash, m_buckets + bucket_for_hash(hash)); - } - - - template - const_iterator find(const K& key) const { - return find(key, hash_key(key)); - } - - template - const_iterator find(const K& key, std::size_t hash) const { - return find_impl(key, hash, m_buckets + bucket_for_hash(hash)); - } - - - template - std::pair equal_range(const K& key) { - return equal_range(key, hash_key(key)); - } - - template - std::pair equal_range(const K& key, std::size_t hash) { - iterator it = find(key, hash); - return std::make_pair(it, (it == end())?it:std::next(it)); - } - - - template - std::pair equal_range(const K& key) const { - return equal_range(key, hash_key(key)); - } - - template - std::pair equal_range(const K& key, std::size_t hash) const { - const_iterator it = find(key, hash); - return std::make_pair(it, (it == cend())?it:std::next(it)); - } - - /* - * Bucket interface - */ - size_type bucket_count() const { - /* - * So that the last bucket can have NeighborhoodSize neighbors, the size of the bucket array is a little - * bigger than the real number of buckets when not empty. - * We could use some of the buckets at the beginning, but it is faster this way as we avoid extra checks. - */ - if(m_buckets_data.empty()) { - return 0; - } - - return m_buckets_data.size() - NeighborhoodSize + 1; - } - - size_type max_bucket_count() const { - const std::size_t max_bucket_count = std::min(GrowthPolicy::max_bucket_count(), m_buckets_data.max_size()); - return max_bucket_count - NeighborhoodSize + 1; - } - - - /* - * Hash policy - */ - float load_factor() const { - if(bucket_count() == 0) { - return 0; - } - - return float(m_nb_elements)/float(bucket_count()); - } - - float max_load_factor() const { - return m_max_load_factor; - } - - void max_load_factor(float ml) { - m_max_load_factor = std::max(0.1f, std::min(ml, 0.95f)); - m_max_load_threshold_rehash = size_type(float(bucket_count())*m_max_load_factor); - m_min_load_threshold_rehash = size_type(float(bucket_count())*MIN_LOAD_FACTOR_FOR_REHASH); - } - - void rehash(size_type count_) { - count_ = std::max(count_, size_type(std::ceil(float(size())/max_load_factor()))); - rehash_impl(count_); - } - - void reserve(size_type count_) { - rehash(size_type(std::ceil(float(count_)/max_load_factor()))); - } - - - /* - * Observers - */ - hasher hash_function() const { - return static_cast(*this); - } - - key_equal key_eq() const { - return static_cast(*this); - } - - /* - * Other - */ - iterator mutable_iterator(const_iterator pos) { - if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) { - // Get a non-const iterator - auto it = m_buckets_data.begin() + std::distance(m_buckets_data.cbegin(), pos.m_buckets_iterator); - return iterator(it, m_buckets_data.end(), m_overflow_elements.begin()); - } - else { - // Get a non-const iterator - auto it = mutable_overflow_iterator(pos.m_overflow_iterator); - return iterator(m_buckets_data.end(), m_buckets_data.end(), it); - } - } - - size_type overflow_size() const noexcept { - return m_overflow_elements.size(); - } - - template::value>::type* = nullptr> - typename U::key_compare key_comp() const { - return m_overflow_elements.key_comp(); - } - - -private: - template - std::size_t hash_key(const K& key) const { - return Hash::operator()(key); - } - - template - bool compare_keys(const K1& key1, const K2& key2) const { - return KeyEqual::operator()(key1, key2); - } - - std::size_t bucket_for_hash(std::size_t hash) const { - const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); - tsl_hh_assert(bucket < m_buckets_data.size() || (bucket == 0 && m_buckets_data.empty())); - - return bucket; - } - - template::value>::type* = nullptr> - void rehash_impl(size_type count_) { - hopscotch_hash new_map = new_hopscotch_hash(count_); - - if(!m_overflow_elements.empty()) { - new_map.m_overflow_elements.swap(m_overflow_elements); - new_map.m_nb_elements += new_map.m_overflow_elements.size(); - - for(const value_type& value : new_map.m_overflow_elements) { - const std::size_t ibucket_for_hash = new_map.bucket_for_hash(new_map.hash_key(KeySelect()(value))); - new_map.m_buckets[ibucket_for_hash].set_overflow(true); - } - } - - try { - const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); - for(auto it_bucket = m_buckets_data.begin(); it_bucket != m_buckets_data.end(); ++it_bucket) { - if(it_bucket->empty()) { - continue; - } - - const std::size_t hash = use_stored_hash? - it_bucket->truncated_bucket_hash(): - new_map.hash_key(KeySelect()(it_bucket->value())); - const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); - - new_map.insert_value(ibucket_for_hash, hash, std::move(it_bucket->value())); - - - erase_from_bucket(*it_bucket, bucket_for_hash(hash)); - } - } - /* - * The call to insert_value may throw an exception if an element is added to the overflow - * list. Rollback the elements in this case. - */ - catch(...) { - m_overflow_elements.swap(new_map.m_overflow_elements); - - const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); - for(auto it_bucket = new_map.m_buckets_data.begin(); it_bucket != new_map.m_buckets_data.end(); ++it_bucket) { - if(it_bucket->empty()) { - continue; - } - - const std::size_t hash = use_stored_hash? - it_bucket->truncated_bucket_hash(): - hash_key(KeySelect()(it_bucket->value())); - const std::size_t ibucket_for_hash = bucket_for_hash(hash); - - // The elements we insert were not in the overflow list before the switch. - // They will not be go in the overflow list if we rollback the switch. - insert_value(ibucket_for_hash, hash, std::move(it_bucket->value())); - } - - throw; - } - - new_map.swap(*this); - } - - template::value && - !std::is_nothrow_move_constructible::value>::type* = nullptr> - void rehash_impl(size_type count_) { - hopscotch_hash new_map = new_hopscotch_hash(count_); - - const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); - for(const hopscotch_bucket& bucket: m_buckets_data) { - if(bucket.empty()) { - continue; - } - - const std::size_t hash = use_stored_hash? - bucket.truncated_bucket_hash(): - new_map.hash_key(KeySelect()(bucket.value())); - const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); - - new_map.insert_value(ibucket_for_hash, hash, bucket.value()); - } - - for(const value_type& value: m_overflow_elements) { - const std::size_t hash = new_map.hash_key(KeySelect()(value)); - const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); - - new_map.insert_value(ibucket_for_hash, hash, value); - } - - new_map.swap(*this); - } - -#ifdef TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR - iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) { - return std::next(m_overflow_elements.begin(), std::distance(m_overflow_elements.cbegin(), it)); - } -#else - iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) { - return m_overflow_elements.erase(it, it); - } -#endif - - // iterator is in overflow list - iterator_overflow erase_from_overflow(const_iterator_overflow pos, std::size_t ibucket_for_hash) { -#ifdef TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR - auto it_next = m_overflow_elements.erase(mutable_overflow_iterator(pos)); -#else - auto it_next = m_overflow_elements.erase(pos); -#endif - m_nb_elements--; - - - // Check if we can remove the overflow flag - tsl_hh_assert(m_buckets[ibucket_for_hash].has_overflow()); - for(const value_type& value: m_overflow_elements) { - const std::size_t bucket_for_value = bucket_for_hash(hash_key(KeySelect()(value))); - if(bucket_for_value == ibucket_for_hash) { - return it_next; - } - } - - m_buckets[ibucket_for_hash].set_overflow(false); - return it_next; - } - - - /** - * bucket_for_value is the bucket in which the value is. - * ibucket_for_hash is the bucket where the value belongs. - */ - void erase_from_bucket(hopscotch_bucket& bucket_for_value, std::size_t ibucket_for_hash) noexcept { - const std::size_t ibucket_for_value = std::distance(m_buckets_data.data(), &bucket_for_value); - tsl_hh_assert(ibucket_for_value >= ibucket_for_hash); - - bucket_for_value.remove_value(); - m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_for_value - ibucket_for_hash); - m_nb_elements--; - } - - - - template - std::pair insert_or_assign_impl(K&& key, M&& obj) { - auto it = try_emplace_impl(std::forward(key), std::forward(obj)); - if(!it.second) { - it.first.value() = std::forward(obj); - } - - return it; - } - - template - std::pair try_emplace_impl(P&& key, Args&&... args_value) { - const std::size_t hash = hash_key(key); - const std::size_t ibucket_for_hash = bucket_for_hash(hash); - - // Check if already presents - auto it_find = find_impl(key, hash, m_buckets + ibucket_for_hash); - if(it_find != end()) { - return std::make_pair(it_find, false); - } - - return insert_value(ibucket_for_hash, hash, std::piecewise_construct, - std::forward_as_tuple(std::forward

(key)), - std::forward_as_tuple(std::forward(args_value)...)); - } - - template - std::pair insert_impl(P&& value) { - const std::size_t hash = hash_key(KeySelect()(value)); - const std::size_t ibucket_for_hash = bucket_for_hash(hash); - - // Check if already presents - auto it_find = find_impl(KeySelect()(value), hash, m_buckets + ibucket_for_hash); - if(it_find != end()) { - return std::make_pair(it_find, false); - } - - - return insert_value(ibucket_for_hash, hash, std::forward

(value)); - } - - template - std::pair insert_value(std::size_t ibucket_for_hash, std::size_t hash, Args&&... value_type_args) { - if((m_nb_elements - m_overflow_elements.size()) >= m_max_load_threshold_rehash) { - rehash(GrowthPolicy::next_bucket_count()); - ibucket_for_hash = bucket_for_hash(hash); - } - - std::size_t ibucket_empty = find_empty_bucket(ibucket_for_hash); - if(ibucket_empty < m_buckets_data.size()) { - do { - tsl_hh_assert(ibucket_empty >= ibucket_for_hash); - - // Empty bucket is in range of NeighborhoodSize, use it - if(ibucket_empty - ibucket_for_hash < NeighborhoodSize) { - auto it = insert_in_bucket(ibucket_empty, ibucket_for_hash, - hash, std::forward(value_type_args)...); - return std::make_pair(iterator(it, m_buckets_data.end(), m_overflow_elements.begin()), true); - } - } - // else, try to swap values to get a closer empty bucket - while(swap_empty_bucket_closer(ibucket_empty)); - } - - auto it = insert_in_overflow(ibucket_for_hash, std::forward(value_type_args)...); - return std::make_pair(iterator(m_buckets_data.end(), m_buckets_data.end(), it), true); - - // Never rehash here for memory safety - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // Load factor is too low or a rehash will not change the neighborhood, put the value in overflow list - // if(size() < m_min_load_threshold_rehash || !will_neighborhood_change_on_rehash(ibucket_for_hash)) { - // auto it = insert_in_overflow(ibucket_for_hash, std::forward(value_type_args)...); - // return std::make_pair(iterator(m_buckets_data.end(), m_buckets_data.end(), it), true); - // } - - // rehash(GrowthPolicy::next_bucket_count()); - // ibucket_for_hash = bucket_for_hash(hash); - - // return insert_value(ibucket_for_hash, hash, std::forward(value_type_args)...); - ////////////////////////////////////////////////////////////////////////////////////////////////////// - } - - /* - * Return true if a rehash will change the position of a key-value in the neighborhood of - * ibucket_neighborhood_check. In this case a rehash is needed instead of puting the value in overflow list. - */ - bool will_neighborhood_change_on_rehash(size_t ibucket_neighborhood_check) const { - std::size_t expand_bucket_count = GrowthPolicy::next_bucket_count(); - GrowthPolicy expand_growth_policy(expand_bucket_count); - - const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(expand_bucket_count); - for(size_t ibucket = ibucket_neighborhood_check; - ibucket < m_buckets_data.size() && (ibucket - ibucket_neighborhood_check) < NeighborhoodSize; - ++ibucket) - { - tsl_hh_assert(!m_buckets[ibucket].empty()); - - const size_t hash = use_stored_hash? - m_buckets[ibucket].truncated_bucket_hash(): - hash_key(KeySelect()(m_buckets[ibucket].value())); - if(bucket_for_hash(hash) != expand_growth_policy.bucket_for_hash(hash)) { - return true; - } - } - - return false; - } - - /* - * Return the index of an empty bucket in m_buckets_data. - * If none, the returned index equals m_buckets_data.size() - */ - std::size_t find_empty_bucket(std::size_t ibucket_start) const { - const std::size_t limit = std::min(ibucket_start + MAX_PROBES_FOR_EMPTY_BUCKET, m_buckets_data.size()); - for(; ibucket_start < limit; ibucket_start++) { - if(m_buckets[ibucket_start].empty()) { - return ibucket_start; - } - } - - return m_buckets_data.size(); - } - - /* - * Insert value in ibucket_empty where value originally belongs to ibucket_for_hash - * - * Return bucket iterator to ibucket_empty - */ - template - iterator_buckets insert_in_bucket(std::size_t ibucket_empty, std::size_t ibucket_for_hash, - std::size_t hash, Args&&... value_type_args) - { - tsl_hh_assert(ibucket_empty >= ibucket_for_hash ); - tsl_hh_assert(m_buckets[ibucket_empty].empty()); - m_buckets[ibucket_empty].set_value_of_empty_bucket(hopscotch_bucket::truncate_hash(hash), std::forward(value_type_args)...); - - tsl_hh_assert(!m_buckets[ibucket_for_hash].empty()); - m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_empty - ibucket_for_hash); - m_nb_elements++; - - return m_buckets_data.begin() + ibucket_empty; - } - - template::value>::type* = nullptr> - iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, Args&&... value_type_args) { - auto it = m_overflow_elements.emplace(m_overflow_elements.end(), std::forward(value_type_args)...); - - m_buckets[ibucket_for_hash].set_overflow(true); - m_nb_elements++; - - return it; - } - - template::value>::type* = nullptr> - iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, Args&&... value_type_args) { - auto it = m_overflow_elements.emplace(std::forward(value_type_args)...).first; - - m_buckets[ibucket_for_hash].set_overflow(true); - m_nb_elements++; - - return it; - } - - /* - * Try to swap the bucket ibucket_empty_in_out with a bucket preceding it while keeping the neighborhood - * conditions correct. - * - * If a swap was possible, the position of ibucket_empty_in_out will be closer to 0 and true will re returned. - */ - bool swap_empty_bucket_closer(std::size_t& ibucket_empty_in_out) { - tsl_hh_assert(ibucket_empty_in_out >= NeighborhoodSize); - const std::size_t neighborhood_start = ibucket_empty_in_out - NeighborhoodSize + 1; - - for(std::size_t to_check = neighborhood_start; to_check < ibucket_empty_in_out; to_check++) { - neighborhood_bitmap neighborhood_infos = m_buckets[to_check].neighborhood_infos(); - std::size_t to_swap = to_check; - - while(neighborhood_infos != 0 && to_swap < ibucket_empty_in_out) { - if((neighborhood_infos & 1) == 1) { - tsl_hh_assert(m_buckets[ibucket_empty_in_out].empty()); - tsl_hh_assert(!m_buckets[to_swap].empty()); - - m_buckets[to_swap].swap_value_into_empty_bucket(m_buckets[ibucket_empty_in_out]); - - tsl_hh_assert(!m_buckets[to_check].check_neighbor_presence(ibucket_empty_in_out - to_check)); - tsl_hh_assert(m_buckets[to_check].check_neighbor_presence(to_swap - to_check)); - - m_buckets[to_check].toggle_neighbor_presence(ibucket_empty_in_out - to_check); - m_buckets[to_check].toggle_neighbor_presence(to_swap - to_check); - - - ibucket_empty_in_out = to_swap; - - return true; - } - - to_swap++; - neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1); - } - } - - return false; - } - - - - template::value>::type* = nullptr> - typename U::value_type* find_value_impl(const K& key, std::size_t hash, hopscotch_bucket* bucket_for_hash) { - return const_cast( - static_cast(this)->find_value_impl(key, hash, bucket_for_hash)); - } - - /* - * Avoid the creation of an iterator to just get the value for operator[] and at() in maps. Faster this way. - * - * Return null if no value for the key (TODO use std::optional when available). - */ - template::value>::type* = nullptr> - const typename U::value_type* find_value_impl(const K& key, std::size_t hash, - const hopscotch_bucket* bucket_for_hash) const - { - const hopscotch_bucket* bucket_found = find_in_buckets(key, hash, bucket_for_hash); - if(bucket_found != nullptr) { - return std::addressof(ValueSelect()(bucket_found->value())); - } - - if(bucket_for_hash->has_overflow()) { - auto it_overflow = find_in_overflow(key); - if(it_overflow != m_overflow_elements.end()) { - return std::addressof(ValueSelect()(*it_overflow)); - } - } - - return nullptr; - } - - template - size_type count_impl(const K& key, std::size_t hash, const hopscotch_bucket* bucket_for_hash) const { - if(find_in_buckets(key, hash, bucket_for_hash) != nullptr) { - return 1; - } - else if(bucket_for_hash->has_overflow() && find_in_overflow(key) != m_overflow_elements.cend()) { - return 1; - } - else { - return 0; - } - } - - template - iterator find_impl(const K& key, std::size_t hash, hopscotch_bucket* bucket_for_hash) { - hopscotch_bucket* bucket_found = find_in_buckets(key, hash, bucket_for_hash); - if(bucket_found != nullptr) { - return iterator(m_buckets_data.begin() + std::distance(m_buckets_data.data(), bucket_found), - m_buckets_data.end(), m_overflow_elements.begin()); - } - - if(!bucket_for_hash->has_overflow()) { - return end(); - } - - return iterator(m_buckets_data.end(), m_buckets_data.end(), find_in_overflow(key)); - } - - template - const_iterator find_impl(const K& key, std::size_t hash, const hopscotch_bucket* bucket_for_hash) const { - const hopscotch_bucket* bucket_found = find_in_buckets(key, hash, bucket_for_hash); - if(bucket_found != nullptr) { - return const_iterator(m_buckets_data.cbegin() + std::distance(m_buckets_data.data(), bucket_found), - m_buckets_data.cend(), m_overflow_elements.cbegin()); - } - - if(!bucket_for_hash->has_overflow()) { - return cend(); - } - - - return const_iterator(m_buckets_data.cend(), m_buckets_data.cend(), find_in_overflow(key)); - } - - template - hopscotch_bucket* find_in_buckets(const K& key, std::size_t hash, hopscotch_bucket* bucket_for_hash) { - const hopscotch_bucket* bucket_found = - static_cast(this)->find_in_buckets(key, hash, bucket_for_hash); - return const_cast(bucket_found); - } - - - /** - * Return a pointer to the bucket which has the value, nullptr otherwise. - */ - template - const hopscotch_bucket* find_in_buckets(const K& key, std::size_t hash, const hopscotch_bucket* bucket_for_hash) const { - (void) hash; // Avoid warning of unused variable when StoreHash is false; - - // TODO Try to optimize the function. - // I tried to use ffs and __builtin_ffs functions but I could not reduce the time the function - // takes with -march=native - - neighborhood_bitmap neighborhood_infos = bucket_for_hash->neighborhood_infos(); - while(neighborhood_infos != 0) { - if((neighborhood_infos & 1) == 1) { - // Check StoreHash before calling bucket_hash_equal. Functionally it doesn't change anythin. - // If StoreHash is false, bucket_hash_equal is a no-op. Avoiding the call is there to help - // GCC optimizes `hash` parameter away, it seems to not be able to do without this hint. - if((!StoreHash || bucket_for_hash->bucket_hash_equal(hash)) && - compare_keys(KeySelect()(bucket_for_hash->value()), key)) - { - return bucket_for_hash; - } - } - - ++bucket_for_hash; - neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1); - } - - return nullptr; - } - - - - template::value>::type* = nullptr> - iterator_overflow find_in_overflow(const K& key) { - return std::find_if(m_overflow_elements.begin(), m_overflow_elements.end(), - [&](const value_type& value) { - return compare_keys(key, KeySelect()(value)); - }); - } - - template::value>::type* = nullptr> - const_iterator_overflow find_in_overflow(const K& key) const { - return std::find_if(m_overflow_elements.cbegin(), m_overflow_elements.cend(), - [&](const value_type& value) { - return compare_keys(key, KeySelect()(value)); - }); - } - - template::value>::type* = nullptr> - iterator_overflow find_in_overflow(const K& key) { - return m_overflow_elements.find(key); - } - - template::value>::type* = nullptr> - const_iterator_overflow find_in_overflow(const K& key) const { - return m_overflow_elements.find(key); - } - - - - template::value>::type* = nullptr> - hopscotch_hash new_hopscotch_hash(size_type bucket_count) { - return hopscotch_hash(bucket_count, static_cast(*this), static_cast(*this), - get_allocator(), m_max_load_factor); - } - - template::value>::type* = nullptr> - hopscotch_hash new_hopscotch_hash(size_type bucket_count) { - return hopscotch_hash(bucket_count, static_cast(*this), static_cast(*this), - get_allocator(), m_max_load_factor, m_overflow_elements.key_comp()); - } - -public: - static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0; - static constexpr float DEFAULT_MAX_LOAD_FACTOR = (NeighborhoodSize <= 30)?0.8f:0.9f; - -private: - static const std::size_t MAX_PROBES_FOR_EMPTY_BUCKET = 12*NeighborhoodSize; - static constexpr float MIN_LOAD_FACTOR_FOR_REHASH = 0.2f; - - /** - * We can only use the hash on rehash if the size of the hash type is the same as the stored one or - * if we use a power of two modulo. In the case of the power of two modulo, we just mask - * the least significant bytes, we just have to check that the truncated_hash_type didn't truncated - * too much bytes. - */ - template::value>::type* = nullptr> - static bool USE_STORED_HASH_ON_REHASH(size_type /*bucket_count*/) { - return StoreHash; - } - - template::value>::type* = nullptr> - static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) { - (void) bucket_count; - if(StoreHash && is_power_of_two_policy::value) { - tsl_hh_assert(bucket_count > 0); - return (bucket_count - 1) <= std::numeric_limits::max(); - } - else { - return false; - } - } - - /** - * Return an always valid pointer to an static empty hopscotch_bucket. - */ - hopscotch_bucket* static_empty_bucket_ptr() { - static hopscotch_bucket empty_bucket; - return &empty_bucket; - } - -private: - buckets_container_type m_buckets_data; - overflow_container_type m_overflow_elements; - - /** - * Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points to static_empty_bucket_ptr. - * This variable is useful to avoid the cost of checking if m_buckets_data is empty when trying - * to find an element. - * - * TODO Remove m_buckets_data and only use a pointer+size instead of a pointer+vector to save some space in the hopscotch_hash object. - */ - hopscotch_bucket* m_buckets; - - size_type m_nb_elements; - - float m_max_load_factor; - - /** - * Max size of the hash table before a rehash occurs automatically to grow the table. - */ - size_type m_max_load_threshold_rehash; - - /** - * Min size of the hash table before a rehash can occurs automatically (except if m_max_load_threshold_rehash os reached). - * If the neighborhood of a bucket is full before the min is reacher, the elements are put into m_overflow_elements. - */ - size_type m_min_load_threshold_rehash; -}; - -} // end namespace detail_hopscotch_hash - - -} // end namespace tsl - -#endif diff --git a/feed/src/common/hopscotch_map.h b/feed/src/common/hopscotch_map.h deleted file mode 100755 index e1e5ddbbadebf9ed09c78dedf9750bdb4ed067e6..0000000000000000000000000000000000000000 --- a/feed/src/common/hopscotch_map.h +++ /dev/null @@ -1,679 +0,0 @@ -/** - * MIT License - * - * Copyright (c) 2017 Tessil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TSL_HOPSCOTCH_MAP_H -#define TSL_HOPSCOTCH_MAP_H - - -#include -#include -#include -#include -#include -#include -#include -#include -#include "paddle/fluid/feed/src/common/hopscotch_hash.h" - - -namespace tsl { - -/** - * Implementation of a hash map using the hopscotch hashing algorithm. - * - * The Key and the value T must be either nothrow move-constructible, copy-constuctible or both. - * - * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false. - * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting - * the NeighborhoodSize to <= 30. There is no memory usage difference between - * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'. - * - * Storing the hash may improve performance on insert during the rehash process if the hash takes time - * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss). - * If used with simple Hash and KeyEqual it may slow things down. - * - * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy. - * - * GrowthPolicy defines how the map grows and consequently how a hash value is mapped to a bucket. - * By default the map uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets - * to a power of two and uses a mask to map the hash to a bucket instead of the slow modulo. - * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface. - * - * If the destructors of Key or T throw an exception, behaviour of the class is undefined. - * - * Iterators invalidation: - * - clear, operator=, reserve, rehash: always invalidate the iterators. - * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators - * if a displacement is needed to resolve a collision (which mean that most of the time, - * insert will invalidate the iterators). Or if there is a rehash. - * - erase: iterator on the erased element is the only one which become invalid. - */ -template, - class KeyEqual = std::equal_to, - class Allocator = std::allocator>, - unsigned int NeighborhoodSize = 62, - bool StoreHash = false, - class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> -class hopscotch_map { -private: - template - using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; - - class KeySelect { - public: - using key_type = Key; - - const key_type& operator()(const std::pair& key_value) const { - return key_value.first; - } - - key_type& operator()(std::pair& key_value) { - return key_value.first; - } - }; - - class ValueSelect { - public: - using value_type = T; - - const value_type& operator()(const std::pair& key_value) const { - return key_value.second; - } - - value_type& operator()(std::pair& key_value) { - return key_value.second; - } - }; - - - using overflow_container_type = std::list, Allocator>; - using ht = detail_hopscotch_hash::hopscotch_hash, KeySelect, ValueSelect, - Hash, KeyEqual, - Allocator, NeighborhoodSize, - StoreHash, GrowthPolicy, - overflow_container_type>; - -public: - using key_type = typename ht::key_type; - using mapped_type = T; - using value_type = typename ht::value_type; - using size_type = typename ht::size_type; - using difference_type = typename ht::difference_type; - using hasher = typename ht::hasher; - using key_equal = typename ht::key_equal; - using allocator_type = typename ht::allocator_type; - using reference = typename ht::reference; - using const_reference = typename ht::const_reference; - using pointer = typename ht::pointer; - using const_pointer = typename ht::const_pointer; - using iterator = typename ht::iterator; - using const_iterator = typename ht::const_iterator; - - - - /* - * Constructors - */ - hopscotch_map() : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE) { - } - - explicit hopscotch_map(size_type bucket_count, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : - m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) - { - } - - hopscotch_map(size_type bucket_count, - const Allocator& alloc) : hopscotch_map(bucket_count, Hash(), KeyEqual(), alloc) - { - } - - hopscotch_map(size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : hopscotch_map(bucket_count, hash, KeyEqual(), alloc) - { - } - - explicit hopscotch_map(const Allocator& alloc) : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { - } - - template - hopscotch_map(InputIt first, InputIt last, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : hopscotch_map(bucket_count, hash, equal, alloc) - { - insert(first, last); - } - - template - hopscotch_map(InputIt first, InputIt last, - size_type bucket_count, - const Allocator& alloc) : hopscotch_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) - { - } - - template - hopscotch_map(InputIt first, InputIt last, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : hopscotch_map(first, last, bucket_count, hash, KeyEqual(), alloc) - { - } - - hopscotch_map(std::initializer_list init, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : - hopscotch_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) - { - } - - hopscotch_map(std::initializer_list init, - size_type bucket_count, - const Allocator& alloc) : - hopscotch_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) - { - } - - hopscotch_map(std::initializer_list init, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : - hopscotch_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) - { - } - - - hopscotch_map& operator=(std::initializer_list ilist) { - m_ht.clear(); - - m_ht.reserve(ilist.size()); - m_ht.insert(ilist.begin(), ilist.end()); - - return *this; - } - - allocator_type get_allocator() const { return m_ht.get_allocator(); } - - - /* - * Iterators - */ - iterator begin() noexcept { return m_ht.begin(); } - const_iterator begin() const noexcept { return m_ht.begin(); } - const_iterator cbegin() const noexcept { return m_ht.cbegin(); } - - iterator end() noexcept { return m_ht.end(); } - const_iterator end() const noexcept { return m_ht.end(); } - const_iterator cend() const noexcept { return m_ht.cend(); } - - - /* - * Capacity - */ - bool empty() const noexcept { return m_ht.empty(); } - size_type size() const noexcept { return m_ht.size(); } - size_type max_size() const noexcept { return m_ht.max_size(); } - - /* - * Modifiers - */ - void clear() noexcept { m_ht.clear(); } - - - - - std::pair insert(const value_type& value) { - return m_ht.insert(value); - } - - template::value>::type* = nullptr> - std::pair insert(P&& value) { - return m_ht.insert(std::forward

(value)); - } - - std::pair insert(value_type&& value) { - return m_ht.insert(std::move(value)); - } - - - iterator insert(const_iterator hint, const value_type& value) { - return m_ht.insert(hint, value); - } - - template::value>::type* = nullptr> - iterator insert(const_iterator hint, P&& value) { - return m_ht.insert(hint, std::forward

(value)); - } - - iterator insert(const_iterator hint, value_type&& value) { - return m_ht.insert(hint, std::move(value)); - } - - - template - void insert(InputIt first, InputIt last) { - m_ht.insert(first, last); - } - - void insert(std::initializer_list ilist) { - m_ht.insert(ilist.begin(), ilist.end()); - } - - - - - template - std::pair insert_or_assign(const key_type& k, M&& obj) { - return m_ht.insert_or_assign(k, std::forward(obj)); - } - - template - std::pair insert_or_assign(key_type&& k, M&& obj) { - return m_ht.insert_or_assign(std::move(k), std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { - return m_ht.insert_or_assign(hint, k, std::forward(obj)); - } - - template - iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { - return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); - } - - - - - /** - * Due to the way elements are stored, emplace will need to move or copy the key-value once. - * The method is equivalent to insert(value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - std::pair emplace(Args&&... args) { - return m_ht.emplace(std::forward(args)...); - } - - - - - /** - * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. - * The method is equivalent to insert(hint, value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - iterator emplace_hint(const_iterator hint, Args&&... args) { - return m_ht.emplace_hint(hint, std::forward(args)...); - } - - - - - template - std::pair try_emplace(const key_type& k, Args&&... args) { - return m_ht.try_emplace(k, std::forward(args)...); - } - - template - std::pair try_emplace(key_type&& k, Args&&... args) { - return m_ht.try_emplace(std::move(k), std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { - return m_ht.try_emplace(hint, k, std::forward(args)...); - } - - template - iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { - return m_ht.try_emplace(hint, std::move(k), std::forward(args)...); - } - - - - - iterator erase(iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } - size_type erase(const key_type& key) { return m_ht.erase(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - size_type erase(const key_type& key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - size_type erase(const K& key) { return m_ht.erase(key); } - - /** - * @copydoc erase(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - template::value>::type* = nullptr> - size_type erase(const K& key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - - - - void swap(hopscotch_map& other) { other.m_ht.swap(m_ht); } - - /* - * Lookup - */ - T& at(const Key& key) { return m_ht.at(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } - - - const T& at(const Key& key) const { return m_ht.at(key); } - - /** - * @copydoc at(const Key& key, std::size_t precalculated_hash) - */ - const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } - - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - T& at(const K& key) { return m_ht.at(key); } - - /** - * @copydoc at(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } - - - /** - * @copydoc at(const K& key) - */ - template::value>::type* = nullptr> - const T& at(const K& key) const { return m_ht.at(key); } - - /** - * @copydoc at(const K& key, std::size_t precalculated_hash) - */ - template::value>::type* = nullptr> - const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } - - - - - T& operator[](const Key& key) { return m_ht[key]; } - T& operator[](Key&& key) { return m_ht[std::move(key)]; } - - - - - size_type count(const Key& key) const { return m_ht.count(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - size_type count(const Key& key, std::size_t precalculated_hash) const { - return m_ht.count(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - size_type count(const K& key) const { return m_ht.count(key); } - - /** - * @copydoc count(const K& key) const - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } - - - - - iterator find(const Key& key) { return m_ht.find(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - const_iterator find(const Key& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const Key& key, std::size_t precalculated_hash) - */ - const_iterator find(const Key& key, std::size_t precalculated_hash) const { - return m_ht.find(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - iterator find(const K& key) { return m_ht.find(key); } - - /** - * @copydoc find(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - /** - * @copydoc find(const K& key) - */ - template::value>::type* = nullptr> - const_iterator find(const K& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - const_iterator find(const K& key, std::size_t precalculated_hash) const { - return m_ht.find(key, precalculated_hash); - } - - - - - std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key) { return m_ht.equal_range(key); } - - - /** - * @copydoc equal_range(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * @copydoc equal_range(const K& key) - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const K& key, std::size_t precalculated_hash) - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - - - - /* - * Bucket interface - */ - size_type bucket_count() const { return m_ht.bucket_count(); } - size_type max_bucket_count() const { return m_ht.max_bucket_count(); } - - - /* - * Hash policy - */ - float load_factor() const { return m_ht.load_factor(); } - float max_load_factor() const { return m_ht.max_load_factor(); } - void max_load_factor(float ml) { m_ht.max_load_factor(ml); } - - void rehash(size_type count_) { m_ht.rehash(count_); } - void reserve(size_type count_) { m_ht.reserve(count_); } - - - /* - * Observers - */ - hasher hash_function() const { return m_ht.hash_function(); } - key_equal key_eq() const { return m_ht.key_eq(); } - - /* - * Other - */ - - /** - * Convert a const_iterator to an iterator. - */ - iterator mutable_iterator(const_iterator pos) { - return m_ht.mutable_iterator(pos); - } - - size_type overflow_size() const noexcept { return m_ht.overflow_size(); } - - friend bool operator==(const hopscotch_map& lhs, const hopscotch_map& rhs) { - if(lhs.size() != rhs.size()) { - return false; - } - - for(const auto& element_lhs : lhs) { - const auto it_element_rhs = rhs.find(element_lhs.first); - if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) { - return false; - } - } - - return true; - } - - friend bool operator!=(const hopscotch_map& lhs, const hopscotch_map& rhs) { - return !operator==(lhs, rhs); - } - - friend void swap(hopscotch_map& lhs, hopscotch_map& rhs) { - lhs.swap(rhs); - } - - - -private: - ht m_ht; -}; - - -/** - * Same as `tsl::hopscotch_map`. - */ -template, - class KeyEqual = std::equal_to, - class Allocator = std::allocator>, - unsigned int NeighborhoodSize = 62, - bool StoreHash = false> -using hopscotch_pg_map = hopscotch_map; - -} // end namespace tsl - -#endif diff --git a/feed/src/common/hopscotch_set.h b/feed/src/common/hopscotch_set.h deleted file mode 100755 index 2ec1035a2fdc1d25a1bf5b0d8dbc293111486c62..0000000000000000000000000000000000000000 --- a/feed/src/common/hopscotch_set.h +++ /dev/null @@ -1,525 +0,0 @@ -/** - * MIT License - * - * Copyright (c) 2017 Tessil - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef TSL_HOPSCOTCH_SET_H -#define TSL_HOPSCOTCH_SET_H - - -#include -#include -#include -#include -#include -#include -#include -#include -#include "paddle/fluid/feed/src/common/hopscotch_hash.h" - - -namespace tsl { - -/** - * Implementation of a hash set using the hopscotch hashing algorithm. - * - * The Key must be either nothrow move-constructible, copy-constuctible or both. - * - * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false. - * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting - * the NeighborhoodSize to <= 30. There is no memory usage difference between - * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'. - * - * Storing the hash may improve performance on insert during the rehash process if the hash takes time - * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss). - * If used with simple Hash and KeyEqual it may slow things down. - * - * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy. - * - * GrowthPolicy defines how the set grows and consequently how a hash value is mapped to a bucket. - * By default the set uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets - * to a power of two and uses a mask to set the hash to a bucket instead of the slow modulo. - * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface. - * - * If the destructor of Key throws an exception, behaviour of the class is undefined. - * - * Iterators invalidation: - * - clear, operator=, reserve, rehash: always invalidate the iterators. - * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators - * if a displacement is needed to resolve a collision (which mean that most of the time, - * insert will invalidate the iterators). Or if there is a rehash. - * - erase: iterator on the erased element is the only one which become invalid. - */ -template, - class KeyEqual = std::equal_to, - class Allocator = std::allocator, - unsigned int NeighborhoodSize = 62, - bool StoreHash = false, - class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> -class hopscotch_set { -private: - template - using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; - - class KeySelect { - public: - using key_type = Key; - - const key_type& operator()(const Key& key) const { - return key; - } - - key_type& operator()(Key& key) { - return key; - } - }; - - - using overflow_container_type = std::list; - using ht = detail_hopscotch_hash::hopscotch_hash; - -public: - using key_type = typename ht::key_type; - using value_type = typename ht::value_type; - using size_type = typename ht::size_type; - using difference_type = typename ht::difference_type; - using hasher = typename ht::hasher; - using key_equal = typename ht::key_equal; - using allocator_type = typename ht::allocator_type; - using reference = typename ht::reference; - using const_reference = typename ht::const_reference; - using pointer = typename ht::pointer; - using const_pointer = typename ht::const_pointer; - using iterator = typename ht::iterator; - using const_iterator = typename ht::const_iterator; - - - /* - * Constructors - */ - hopscotch_set() : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) { - } - - explicit hopscotch_set(size_type bucket_count, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : - m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) - { - } - - hopscotch_set(size_type bucket_count, - const Allocator& alloc) : hopscotch_set(bucket_count, Hash(), KeyEqual(), alloc) - { - } - - hopscotch_set(size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : hopscotch_set(bucket_count, hash, KeyEqual(), alloc) - { - } - - explicit hopscotch_set(const Allocator& alloc) : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { - } - - template - hopscotch_set(InputIt first, InputIt last, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : hopscotch_set(bucket_count, hash, equal, alloc) - { - insert(first, last); - } - - template - hopscotch_set(InputIt first, InputIt last, - size_type bucket_count, - const Allocator& alloc) : hopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) - { - } - - template - hopscotch_set(InputIt first, InputIt last, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : hopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc) - { - } - - hopscotch_set(std::initializer_list init, - size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, - const Hash& hash = Hash(), - const KeyEqual& equal = KeyEqual(), - const Allocator& alloc = Allocator()) : - hopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) - { - } - - hopscotch_set(std::initializer_list init, - size_type bucket_count, - const Allocator& alloc) : - hopscotch_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) - { - } - - hopscotch_set(std::initializer_list init, - size_type bucket_count, - const Hash& hash, - const Allocator& alloc) : - hopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) - { - } - - - hopscotch_set& operator=(std::initializer_list ilist) { - m_ht.clear(); - - m_ht.reserve(ilist.size()); - m_ht.insert(ilist.begin(), ilist.end()); - - return *this; - } - - allocator_type get_allocator() const { return m_ht.get_allocator(); } - - - /* - * Iterators - */ - iterator begin() noexcept { return m_ht.begin(); } - const_iterator begin() const noexcept { return m_ht.begin(); } - const_iterator cbegin() const noexcept { return m_ht.cbegin(); } - - iterator end() noexcept { return m_ht.end(); } - const_iterator end() const noexcept { return m_ht.end(); } - const_iterator cend() const noexcept { return m_ht.cend(); } - - - /* - * Capacity - */ - bool empty() const noexcept { return m_ht.empty(); } - size_type size() const noexcept { return m_ht.size(); } - size_type max_size() const noexcept { return m_ht.max_size(); } - - /* - * Modifiers - */ - void clear() noexcept { m_ht.clear(); } - - - - - std::pair insert(const value_type& value) { return m_ht.insert(value); } - std::pair insert(value_type&& value) { return m_ht.insert(std::move(value)); } - - iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert(hint, value); } - iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert(hint, std::move(value)); } - - template - void insert(InputIt first, InputIt last) { m_ht.insert(first, last); } - void insert(std::initializer_list ilist) { m_ht.insert(ilist.begin(), ilist.end()); } - - - - - /** - * Due to the way elements are stored, emplace will need to move or copy the key-value once. - * The method is equivalent to insert(value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - std::pair emplace(Args&&... args) { return m_ht.emplace(std::forward(args)...); } - - - - - /** - * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. - * The method is equivalent to insert(hint, value_type(std::forward(args)...)); - * - * Mainly here for compatibility with the std::unordered_map interface. - */ - template - iterator emplace_hint(const_iterator hint, Args&&... args) { - return m_ht.emplace_hint(hint, std::forward(args)...); - } - - - - - iterator erase(iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator pos) { return m_ht.erase(pos); } - iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } - size_type erase(const key_type& key) { return m_ht.erase(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - size_type erase(const key_type& key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - size_type erase(const K& key) { return m_ht.erase(key); } - - /** - * @copydoc erase(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. - */ - template::value>::type* = nullptr> - size_type erase(const K& key, std::size_t precalculated_hash) { - return m_ht.erase(key, precalculated_hash); - } - - - - - void swap(hopscotch_set& other) { other.m_ht.swap(m_ht); } - - - /* - * Lookup - */ - size_type count(const Key& key) const { return m_ht.count(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - size_type count(const K& key) const { return m_ht.count(key); } - - /** - * @copydoc count(const K& key) const - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } - - - - - iterator find(const Key& key) { return m_ht.find(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - const_iterator find(const Key& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const Key& key, std::size_t precalculated_hash) - */ - const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - iterator find(const K& key) { return m_ht.find(key); } - - /** - * @copydoc find(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } - - /** - * @copydoc find(const K& key) - */ - template::value>::type* = nullptr> - const_iterator find(const K& key) const { return m_ht.find(key); } - - /** - * @copydoc find(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } - - - - - std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } - - /** - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) - */ - std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. - * If so, K must be hashable and comparable to Key. - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key) { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const K& key) - * - * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same - * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) { - return m_ht.equal_range(key, precalculated_hash); - } - - /** - * @copydoc equal_range(const K& key) - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } - - /** - * @copydoc equal_range(const K& key, std::size_t precalculated_hash) - */ - template::value>::type* = nullptr> - std::pair equal_range(const K& key, std::size_t precalculated_hash) const { - return m_ht.equal_range(key, precalculated_hash); - } - - - - - /* - * Bucket interface - */ - size_type bucket_count() const { return m_ht.bucket_count(); } - size_type max_bucket_count() const { return m_ht.max_bucket_count(); } - - - /* - * Hash policy - */ - float load_factor() const { return m_ht.load_factor(); } - float max_load_factor() const { return m_ht.max_load_factor(); } - void max_load_factor(float ml) { m_ht.max_load_factor(ml); } - - void rehash(size_type count_) { m_ht.rehash(count_); } - void reserve(size_type count_) { m_ht.reserve(count_); } - - - /* - * Observers - */ - hasher hash_function() const { return m_ht.hash_function(); } - key_equal key_eq() const { return m_ht.key_eq(); } - - - /* - * Other - */ - - /** - * Convert a const_iterator to an iterator. - */ - iterator mutable_iterator(const_iterator pos) { - return m_ht.mutable_iterator(pos); - } - - size_type overflow_size() const noexcept { return m_ht.overflow_size(); } - - friend bool operator==(const hopscotch_set& lhs, const hopscotch_set& rhs) { - if(lhs.size() != rhs.size()) { - return false; - } - - for(const auto& element_lhs : lhs) { - const auto it_element_rhs = rhs.find(element_lhs); - if(it_element_rhs == rhs.cend()) { - return false; - } - } - - return true; - } - - friend bool operator!=(const hopscotch_set& lhs, const hopscotch_set& rhs) { - return !operator==(lhs, rhs); - } - - friend void swap(hopscotch_set& lhs, hopscotch_set& rhs) { - lhs.swap(rhs); - } - -private: - ht m_ht; -}; - - -/** - * Same as `tsl::hopscotch_set`. - */ -template, - class KeyEqual = std::equal_to, - class Allocator = std::allocator, - unsigned int NeighborhoodSize = 62, - bool StoreHash = false> -using hopscotch_pg_set = hopscotch_set; - -} // end namespace tsl - -#endif diff --git a/feed/src/data_reader/CMakeLists.txt b/feed/src/data_reader/CMakeLists.txt deleted file mode 100644 index 8c933afc88303385e2a6e9f8601af74313bffc32..0000000000000000000000000000000000000000 --- a/feed/src/data_reader/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -cc_library(feed_data_set SRCS data_set.cc DEPS operator) diff --git a/feed/src/data_reader/data_set.cc b/feed/src/data_reader/data_set.cc deleted file mode 100644 index 2fe850572500dd4810acb39891a34caaf3226bbe..0000000000000000000000000000000000000000 --- a/feed/src/data_reader/data_set.cc +++ /dev/null @@ -1,173 +0,0 @@ -#include "paddle/fluid/feed/src/data_reader/data_set.h" -#include "paddle/fluid/framework/data_feed_factory.h" -#include "paddle/fluid/framework/fleet/fleet_wrapper.h" -#include "paddle/fluid/framework/io/fs.h" -#include "paddle/fluid/platform/timer.h" - -namespace paddle { -namespace framework { - -void FeedMultiSlotDataset::CreatePreLoadReaders() { - VLOG(3) << "Begin CreatePreLoadReaders"; - if (preload_thread_num_ == 0) { - preload_thread_num_ = thread_num_; - } - CHECK(preload_thread_num_ > 0) << "thread num should > 0"; - CHECK(input_channel_ != nullptr); - preload_readers_.clear(); - for (int i = 0; i < preload_thread_num_; ++i) { - preload_readers_.push_back( - DataFeedFactory::CreateDataFeed(data_feed_desc_.name())); - preload_readers_[i]->Init(data_feed_desc_); - preload_readers_[i]->SetThreadId(i); - preload_readers_[i]->SetThreadNum(preload_thread_num_); - preload_readers_[i]->SetFileListMutex(&mutex_for_pick_file_); - preload_readers_[i]->SetFileListIndex(&file_idx_); - preload_readers_[i]->SetFileList(filelist_); - preload_readers_[i]->SetParseInsId(parse_ins_id_); - preload_readers_[i]->SetParseContent(parse_content_); - preload_readers_[i]->SetInputChannel(input_channel_.get()); - preload_readers_[i]->SetOutputChannel(nullptr); - preload_readers_[i]->SetConsumeChannel(nullptr); - } - VLOG(3) << "End CreatePreLoadReaders"; -} - -void FeedMultiSlotDataset::MergeByInsId() { - VLOG(3) << "MultiSlotDataset::MergeByInsId begin"; - if (!merge_by_insid_) { - VLOG(3) << "merge_by_insid=false, will not MergeByInsId"; - return; - } - auto multi_slot_desc = data_feed_desc_.multi_slot_desc(); - std::vector use_slots; - for (size_t i = 0; i < multi_slot_desc.slots_size(); ++i) { - const auto& slot = multi_slot_desc.slots(i); - if (slot.is_used()) { - use_slots.push_back(slot.name()); - } - } - CHECK(multi_output_channel_.size() != 0); // NOLINT - auto channel_data = paddle::framework::MakeChannel(); - VLOG(3) << "multi_output_channel_.size() " << multi_output_channel_.size(); - for (size_t i = 0; i < multi_output_channel_.size(); ++i) { - std::vector vec_data; - multi_output_channel_[i]->Close(); - multi_output_channel_[i]->ReadAll(vec_data); - channel_data->Write(std::move(vec_data)); - vec_data.clear(); - vec_data.shrink_to_fit(); - multi_output_channel_[i]->Clear(); - } - channel_data->Close(); - std::vector recs; - recs.reserve(channel_data->Size()); - channel_data->ReadAll(recs); - channel_data->Clear(); - std::sort(recs.begin(), recs.end(), [](const Record& a, const Record& b) { - return a.ins_id_ < b.ins_id_; - }); - - std::vector results; - uint64_t drop_ins_num = 0; - std::unordered_set all_int64; - std::unordered_set all_float; - std::unordered_set local_uint64; - std::unordered_set local_float; - - VLOG(3) << "recs.size() " << recs.size(); - for (size_t i = 0; i < recs.size();) { - size_t j = i + 1; - while (j < recs.size() && recs[j].ins_id_ == recs[i].ins_id_) { - j++; - } - if (min_merge_size_ > 0 && j - i != min_merge_size_) { - drop_ins_num += j - i; - LOG(WARNING) << "drop ins " << recs[i].ins_id_ << " size=" << j - i - << ", because merge_size=" << min_merge_size_; - i = j; - continue; - } - - all_int64.clear(); - all_float.clear(); - bool has_conflict_slot = false; - uint16_t conflict_slot = 0; - - Record rec; - rec.ins_id_ = recs[i].ins_id_; - rec.content_ = recs[i].content_; - - for (size_t k = i; k < j; k++) { - local_uint64.clear(); - local_float.clear(); - for (auto& feature : recs[k].uint64_feasigns_) { - uint16_t slot = feature.slot(); - if (all_int64.find(slot) != all_int64.end()) { - has_conflict_slot = true; - conflict_slot = slot; - break; - } - local_uint64.insert(slot); - rec.uint64_feasigns_.push_back(std::move(feature)); - } - if (has_conflict_slot) { - break; - } - all_int64.insert(local_uint64.begin(), local_uint64.end()); - - for (auto& feature : recs[k].float_feasigns_) { - uint16_t slot = feature.slot(); - if (all_float.find(slot) != all_float.end()) { - has_conflict_slot = true; - conflict_slot = slot; - break; - } - local_float.insert(slot); - rec.float_feasigns_.push_back(std::move(feature)); - } - if (has_conflict_slot) { - break; - } - all_float.insert(local_float.begin(), local_float.end()); - } - - if (has_conflict_slot) { - LOG(WARNING) << "drop ins " << recs[i].ins_id_ << " size=" << j - i - << ", because conflict_slot=" << use_slots[conflict_slot]; - drop_ins_num += j - i; - } else { - results.push_back(std::move(rec)); - } - i = j; - } - std::vector().swap(recs); - VLOG(3) << "results size " << results.size(); - LOG(WARNING) << "total drop ins num: " << drop_ins_num; - results.shrink_to_fit(); - - auto fleet_ptr = FleetWrapper::GetInstance(); - std::shuffle(results.begin(), results.end(), fleet_ptr->LocalRandomEngine()); - channel_data->Open(); - channel_data->Write(std::move(results)); - channel_data->Close(); - results.clear(); - results.shrink_to_fit(); - VLOG(3) << "channel data size " << channel_data->Size(); - channel_data->SetBlockSize(channel_data->Size() / channel_num_ + 1); - VLOG(3) << "channel data block size " << channel_data->BlockSize(); - for (size_t i = 0; i < multi_output_channel_.size(); ++i) { - std::vector vec_data; - channel_data->Read(vec_data); - multi_output_channel_[i]->Open(); - multi_output_channel_[i]->Write(std::move(vec_data)); - vec_data.clear(); - vec_data.shrink_to_fit(); - } - CHECK(channel_data->Size() == 0); // NOLINT - channel_data->Clear(); - VLOG(3) << "MultiSlotDataset::MergeByInsId end"; -} - -} // end namespace framework -} // end namespace paddle diff --git a/feed/src/data_reader/data_set.h b/feed/src/data_reader/data_set.h deleted file mode 100644 index 9c22418e24c8d5b27f57cbc1c406682b9665a28f..0000000000000000000000000000000000000000 --- a/feed/src/data_reader/data_set.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include "paddle/fluid/framework/data_set.h" - -namespace paddle { -namespace framework { - -class FeedMultiSlotDataset : public MultiSlotDataset { - public: - FeedMultiSlotDataset() {} - virtual void MergeByInsId(); - virtual void CreatePreLoadReaders(); - virtual ~FeedMultiSlotDataset() {} -}; - -} // end namespace framework -} // end namespace paddle diff --git a/feed/tool/CMakeLists.txt b/feed/tool/CMakeLists.txt deleted file mode 100755 index fe1b1b9c4060ffc30229007806b9f34a260ca7e7..0000000000000000000000000000000000000000 --- a/feed/tool/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_executable(parse_feasign parse_feasign.cpp) diff --git a/feed/tool/parse_feasign.cpp b/feed/tool/parse_feasign.cpp deleted file mode 100644 index 8c19e25016fc1910cde179aaf3cd88f08dc7dc91..0000000000000000000000000000000000000000 --- a/feed/tool/parse_feasign.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include -#include -#include -#include -#include -#include -using namespace std; - -// 将extractor产出的feasign 转 paddle instance -int main(int argc, char * argv[]) { - ifstream fin(argv[1]); - int slot_idx = 0; - unordered_map slot_map; - int slot = 0; - while (fin >> slot) { - slot_map[slot] = slot_idx++; - } - int slot_num = slot_map.size(); - int max_feasign_num = 10000; - vector > slots; - for (int i = 0; i < slot_num; ++i) { - vector tmp; - tmp.reserve(max_feasign_num); - slots.push_back(tmp); - } - - char * linebuf = (char *)calloc(1024*1024*40, sizeof(char)); - if (NULL == linebuf) { - fprintf(stderr, "memory not enough, exit\n"); - exit(-1); - } - - int click = 0; - int show = 0; - unsigned long feasign = 0; - int i = 0; - while (fgets(linebuf, 1024*1024*40, stdin)) { - char* head_ptr = linebuf; - for (i = 0; *(head_ptr + i) != ' '; ++i) ; - head_ptr += i + 1; - show = strtoul(head_ptr, &head_ptr, 10); - click = strtoul(head_ptr, &head_ptr, 10); - int feasign_num = 0; - while (head_ptr != NULL) { - feasign = strtoul(head_ptr, &head_ptr, 10); - if (head_ptr != NULL && *head_ptr == ':') { - head_ptr++; - slot = strtoul(head_ptr, &head_ptr, 10); - feasign_num++; - if (slot_map.find(slot) == slot_map.end()) { - continue; - } - slots[slot_map[slot]].push_back(feasign); - } else { - break; - } - } - - int tag = 0; - float weight = 1; - bool has_tag = false; - bool has_weight = false; - for (int j = 0; *(head_ptr + j) != '\0'; ++j) { - if (*(head_ptr + j) == '$') { - has_tag = true; - } else if (*(head_ptr + j) == '*') { - has_weight = true; - } - } - - if (has_tag) { - for (i = 0; *(head_ptr + i) != '\0' && *(head_ptr + i) != '$'; ++i) ; - if (head_ptr + i != '\0') { - head_ptr += i + 1; - if (*head_ptr == 'D') { - tag = 0; - head_ptr += 1; - } else { - tag = strtoul(head_ptr, &head_ptr, 10); - } - } - } - - if (has_weight) { - for (i = 0; *(head_ptr + i) != '\0' && *(head_ptr + i) != '*'; ++i) ; - if (head_ptr + i != '\0') { - head_ptr += i + 1; - weight = strtod(head_ptr, &head_ptr); - } - } - - fprintf(stdout, "1 %d 1 %d", show, click); - for (size_t i = 0; i < slots.size() - 2; ++i) { - if (slots[i].size() == 0) { - fprintf(stdout, " 1 0"); - } else { - fprintf(stdout, " %lu", slots[i].size()); - for (size_t j = 0; j < slots[i].size(); ++j) { - fprintf(stdout, " %lu", slots[i][j]); - } - } - slots[i].clear(); - slots[i].reserve(max_feasign_num); - } - if (weight == 1.0) { - fprintf(stdout, " 1 %d 1 %d\n", int(weight), tag); - } else { - fprintf(stdout, " 1 %f 1 %d\n", weight, tag); - } - } -} diff --git a/kagle/kagle_metric.py b/kagle/metrics/auc_metrics.py old mode 100755 new mode 100644 similarity index 74% rename from kagle/kagle_metric.py rename to kagle/metrics/auc_metrics.py index 3919c6bfb628ca797adadb82a45f35ebfe792e2c..788c3567ce5bcfbf5618a5349ae4c348b3ecb7bb --- a/kagle/kagle_metric.py +++ b/kagle/metrics/auc_metrics.py @@ -1,83 +1,35 @@ -""" -Do metric jobs. calculate AUC, MSE, COCP ... -""" -import abc + import math -import time import numpy as np import paddle.fluid as fluid -import kagle.kagle_util as kagle_util -from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet - -class Metric(object): - """R - """ - __metaclass__=abc.ABCMeta +from .base import Metric - def __init__(self, config): - """ """ - pass - - @abc.abstractmethod - def clear(self, scope, params): - """ - clear current value - Args: - scope: value container - params: extend varilable for clear - """ - pass - - @abc.abstractmethod - def calculate(self, scope, params): - """ - calculate result - Args: - scope: value container - params: extend varilable for clear - """ - pass - @abc.abstractmethod - def get_result(self): - """ - Return: - result(dict) : calculate result - """ - pass - - @abc.abstractmethod - def get_result_to_string(self): - """ - Return: - result(string) : calculate result with string format, for output - """ - pass - - -class PaddleAUCMetric(Metric): +class AUCMetric(Metric): """ Metric For Paddle Model """ - def __init__(self, config): + + def __init__(self, config, fleet): """ """ - pass - + self.config = config + self.fleet = fleet + def clear(self, scope, params): """ Clear current metric value, usually set to zero Args: scope : paddle runtime var container - params(dict) : + params(dict) : label : a group name for metric metric_dict : current metric_items in group Return: - None + None """ self._label = params['label'] self._metric_dict = params['metric_dict'] self._result = {} - place=fluid.CPUPlace() + place = fluid.CPUPlace() for metric_name in self._metric_dict: metric_config = self._metric_dict[metric_name] if scope.find_var(metric_config['var'].name) is None: @@ -85,10 +37,10 @@ class PaddleAUCMetric(Metric): metric_var = scope.var(metric_config['var'].name).get_tensor() data_type = 'float32' if 'data_type' in metric_config: - data_type = metric_config['data_type'] + data_type = metric_config['data_type'] data_array = np.zeros(metric_var._get_dims()).astype(data_type) metric_var.set(data_array, place) - + def get_metric(self, scope, metric_name): """ reduce metric named metric_name from all worker @@ -99,17 +51,17 @@ class PaddleAUCMetric(Metric): old_metric_shape = np.array(metric.shape) metric = metric.reshape(-1) global_metric = np.copy(metric) * 0 - fleet._role_maker._node_type_comm.Allreduce(metric, global_metric) + self.fleet._role_maker._node_type_comm.Allreduce(metric, global_metric) global_metric = global_metric.reshape(old_metric_shape) return global_metric[0] - + def get_global_metrics(self, scope, metric_dict): """ reduce all metric in metric_dict from all worker Return: dict : {matric_name : metric_result} """ - fleet._role_maker._barrier_worker() + self.fleet._role_maker._barrier_worker() result = {} for metric_name in metric_dict: metric_item = metric_dict[metric_name] @@ -120,7 +72,7 @@ class PaddleAUCMetric(Metric): return result def calculate_auc(self, global_pos, global_neg): - """R + """R """ num_bucket = len(global_pos) area = 0.0 @@ -129,7 +81,7 @@ class PaddleAUCMetric(Metric): new_pos = 0.0 new_neg = 0.0 total_ins_num = 0 - for i in xrange(num_bucket): + for i in range(num_bucket): index = num_bucket - 1 - i new_pos = pos + global_pos[index] total_ins_num += global_pos[index] @@ -146,7 +98,7 @@ class PaddleAUCMetric(Metric): return auc_value def calculate_bucket_error(self, global_pos, global_neg): - """R + """R """ num_bucket = len(global_pos) last_ctr = -1.0 @@ -164,7 +116,7 @@ class PaddleAUCMetric(Metric): relative_ctr_error = 0.0 k_max_span = 0.01 k_relative_error_bound = 0.05 - for i in xrange(num_bucket): + for i in range(num_bucket): click = global_pos[i] show = global_pos[i] + global_neg[i] ctr = float(i) / num_bucket @@ -182,7 +134,7 @@ class PaddleAUCMetric(Metric): if adjust_ctr == 0: continue relative_error = \ - math.sqrt((1 - adjust_ctr) / (adjust_ctr * impression_sum)) + math.sqrt((1 - adjust_ctr) / (adjust_ctr * impression_sum)) if relative_error < k_relative_error_bound: actual_ctr = click_sum / impression_sum relative_ctr_error = abs(actual_ctr / adjust_ctr - 1) @@ -192,12 +144,12 @@ class PaddleAUCMetric(Metric): bucket_error = error_sum / error_count if error_count > 0 else 0.0 return bucket_error - + def calculate(self, scope, params): """ """ self._label = params['label'] self._metric_dict = params['metric_dict'] - fleet._role_maker._barrier_worker() + self.fleet._role_maker._barrier_worker() result = self.get_global_metrics(scope, self._metric_dict) if result['total_ins_num'] == 0: self._result = result @@ -218,7 +170,7 @@ class PaddleAUCMetric(Metric): if 'abserr' in result: result['mae'] = result['abserr'] / result['total_ins_num'] if 'sqrerr' in result: - result['rmse'] = math.sqrt(result['sqrerr'] / result['total_ins_num']) + result['rmse'] = math.sqrt(result['sqrerr'] / result['total_ins_num']) if 'prob' in result: result['predict_ctr'] = result['prob'] / result['total_ins_num'] if abs(result['predict_ctr']) > 1e-6: @@ -236,8 +188,9 @@ class PaddleAUCMetric(Metric): def get_result_to_string(self): """ """ result = self.get_result() - result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f "\ - "Actural_CTR=%.6f Predicted_CTR=%.6f COPC=%.6f MEAN Q_VALUE=%.6f Ins number=%s" % \ - (self._label, result['auc'], result['bucket_error'], result['mae'], result['rmse'], result['actual_ctr'], - result['predict_ctr'], result['copc'], result['mean_q'], result['total_ins_num']) + result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f " \ + "Actural_CTR=%.6f Predicted_CTR=%.6f COPC=%.6f MEAN Q_VALUE=%.6f Ins number=%s" % \ + (self._label, result['auc'], result['bucket_error'], result['mae'], result['rmse'], + result['actual_ctr'], + result['predict_ctr'], result['copc'], result['mean_q'], result['total_ins_num']) return result_str diff --git a/kagle/metrics/base.py b/kagle/metrics/base.py new file mode 100644 index 0000000000000000000000000000000000000000..251422f7fd1ad91469de59bc54d8f56e9ed81029 --- /dev/null +++ b/kagle/metrics/base.py @@ -0,0 +1,50 @@ +""" +Do metric jobs. calculate AUC, MSE, COCP ... +""" +import abc + + +class Metric(object): + """R + """ + __metaclass__ = abc.ABCMeta + + def __init__(self, config): + """ """ + pass + + @abc.abstractmethod + def clear(self, scope, params): + """ + clear current value + Args: + scope: value container + params: extend varilable for clear + """ + pass + + @abc.abstractmethod + def calculate(self, scope, params): + """ + calculate result + Args: + scope: value container + params: extend varilable for clear + """ + pass + + @abc.abstractmethod + def get_result(self): + """ + Return: + result(dict) : calculate result + """ + pass + + @abc.abstractmethod + def get_result_to_string(self): + """ + Return: + result(string) : calculate result with string format, for output + """ + pass diff --git a/kagle/models/__init__.py b/kagle/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/kagle_model.py b/kagle/models/base.py old mode 100755 new mode 100644 similarity index 82% rename from kagle/kagle_model.py rename to kagle/models/base.py index 52a160ab6e4e9289cb9f2e3d739279e10e2c1896..2311f5e5dfb5601ce9d80a398c5ffcc05cb94288 --- a/kagle/kagle_model.py +++ b/kagle/models/base.py @@ -4,25 +4,57 @@ Model Net: analyse layer config, and parse to Paddle Pragram import abc import copy import yaml -import paddle.fluid as fluid -import kagle.kagle_layer as kagle_layer -import kagle.kagle_table as kagle_table +import paddle.fluid as fluid +import kagle.utils.kagle_table as kagle_table from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet + +class Layer(object): + """R + """ + __metaclass__ = abc.ABCMeta + + def __init__(self, config): + """R + """ + pass + + def generate(self, mode, param): + """R + """ + if mode == 'fluid': + return self.generate_fluid(param) + elif mode == 'tensorflow': + return self.generate_tensorflow(param) + print('unsupport this mode: ' + mode) + return None, None + + @abc.abstractmethod + def generate_fluid(self, param): + """R + """ + pass + + def generate_tensorflow(self, param): + """ Not implement currently + """ + pass + + def create(config): """ Create a model instance by config Args: - config(dict) : desc model type and net + config(dict) : desc model type and net Return: Model Instance """ model = None if config['mode'] == 'fluid': - model = FluidModel(config) + model = YamlModel(config) model.build_model() return model - + class Model(object): """R @@ -37,13 +69,13 @@ class Model(object): f = open(config['layer_file'], 'r') self._build_nodes = yaml.safe_load(f.read()) self._build_phase = ['input', 'param', 'summary', 'layer'] - self._build_param = {'layer': {}, 'inner_layer':{}, 'layer_extend': {}, 'model': {}} - self._inference_meta = {'dependency':{}, 'params': {}} + self._build_param = {'layer': {}, 'inner_layer': {}, 'layer_extend': {}, 'model': {}} + self._inference_meta = {'dependency': {}, 'params': {}} self._cost = None self._metrics = {} self._data_var = [] pass - + def get_cost_op(self): """R """ @@ -58,10 +90,10 @@ class Model(object): def shrink(self, params): """R """ - pass + pass @abc.abstractmethod - def build_model(self): + def build_model(self): """R """ pass @@ -83,10 +115,10 @@ class Model(object): """R """ pass - + def inference_params(self, inference_layer): """ - get params name for inference_layer + get params name for inference_layer Args: inference_layer(str): layer for inference Return: @@ -95,7 +127,7 @@ class Model(object): layer = inference_layer if layer in self._inference_meta['params']: return self._inference_meta['params'][layer] - + self._inference_meta['params'][layer] = [] self._inference_meta['dependency'][layer] = self.get_dependency(self._build_param['inner_layer'], layer) for node in self._build_nodes['layer']: @@ -103,16 +135,16 @@ class Model(object): continue if 'inference_param' in self._build_param['layer_extend'][node['name']]: self._inference_meta['params'][layer] += \ - self._build_param['layer_extend'][node['name']]['inference_param']['params'] + self._build_param['layer_extend'][node['name']]['inference_param']['params'] return self._inference_meta['params'][layer] def get_dependency(self, layer_graph, dest_layer): """ - get layers of dest_layer depends on + get model of dest_layer depends on Args: - layer_graph(dict) : all layers in graph + layer_graph(dict) : all model in graph Return: - depend_layers(list) : sub-graph layers for calculate dest_layer + depend_layers(list) : sub-graph model for calculate dest_layer """ dependency_list = [] if dest_layer in layer_graph: @@ -122,8 +154,8 @@ class Model(object): dependency_list = dependency_list + self.get_dependency(layer_graph, dependency) return list(set(dependency_list)) - -class FluidModel(Model): + +class YamlModel(Model): """R """ def __init__(self, config): @@ -131,8 +163,8 @@ class FluidModel(Model): """ Model.__init__(self, config) pass - - def build_model(self): + + def build_model(self): """R build a fluid model with config Return: @@ -144,13 +176,12 @@ class FluidModel(Model): """ for layer in self._build_nodes['layer']: self._build_param['inner_layer'][layer['name']] = layer - - + self._build_param['table'] = {} self._build_param['model']['train_program'] = fluid.Program() self._build_param['model']['startup_program'] = fluid.Program() with fluid.program_guard(self._build_param['model']['train_program'], \ - self._build_param['model']['startup_program']): + self._build_param['model']['startup_program']): with fluid.unique_name.guard(): for phase in self._build_phase: if self._build_nodes[phase] is None: @@ -176,12 +207,12 @@ class FluidModel(Model): inference_param = extend_output['inference_param'] param_name = inference_param['name'] if param_name not in self._build_param['table']: - self._build_param['table'][param_name] = {'params':[]} + self._build_param['table'][param_name] = {'params' :[]} table_meta = kagle_table.TableMeta.alloc_new_table(inference_param['table_id']) self._build_param['table'][param_name]['_meta'] = table_meta self._build_param['table'][param_name]['params'] += inference_param['params'] pass - + @classmethod def build_optimizer(self, params): """R @@ -197,8 +228,8 @@ class FluidModel(Model): stat_var_names += [model_metrics[metric]['var'].name for metric in model_metrics] strategy['stat_var_names'] = list(set(stat_var_names)) optimizer_generator = 'optimizer = fluid.optimizer.' + optimizer_conf['class'] + \ - '(learning_rate=' + str(optimizer_conf['learning_rate']) + ')' - exec(optimizer_generator) + '(learning_rate=' + str(optimizer_conf['learning_rate']) + ')' + exec(optimizer_generator) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) return optimizer @@ -240,7 +271,6 @@ class FluidModel(Model): with fluid.scope_guard(scope): if params['save_combine']: fluid.io.save_vars(executor, "./", \ - program, vars=params_var_list, filename=params_file_name) + program, vars=params_var_list, filename=params_file_name) else: fluid.io.save_vars(executor, params_file_name, program, vars=params_var_list) - pass diff --git a/kagle/models/ctr_dnn/hyper_parameters.yaml b/kagle/models/ctr_dnn/hyper_parameters.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/models/ctr_dnn/model.py b/kagle/models/ctr_dnn/model.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/models/ctr_dnn/reader.py b/kagle/models/ctr_dnn/reader.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/models/ctr_dnn/sample_test.txt b/kagle/models/ctr_dnn/sample_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/models/ctr_dnn/sample_train.txt b/kagle/models/ctr_dnn/sample_train.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/kagle_layer.py b/kagle/models/layers.py old mode 100755 new mode 100644 similarity index 70% rename from kagle/kagle_layer.py rename to kagle/models/layers.py index acf49d91efe449229cd6338da2065034ac0c4118..b214cc647eda168ab77b7f4c2330b085b5bc8b35 --- a/kagle/kagle_layer.py +++ b/kagle/models/layers.py @@ -1,44 +1,11 @@ -""" -DnnLayer: analyse layer config, and parse to Paddle Operator, build net -""" -import abc import paddle.fluid as fluid - -class Layer(object): - """R - """ - __metaclass__ = abc.ABCMeta - - def __init__(self, config): - """R - """ - pass - - def generate(self, mode, param): - """R - """ - if mode == 'fluid': - return self.generate_fluid(param) - elif mode == 'tensorflow': - return self.generate_tensorflow(param) - print ('unsupport this mode: ' + mode) - return None, None - - @abc.abstractmethod - def generate_fluid(self, param): - """R - """ - pass - - def generate_tensorflow(self, param): - """ Not implement currently - """ - pass +from .base import Layer class EmbeddingInputLayer(Layer): """R """ + def __init__(self, config): """R """ @@ -47,10 +14,10 @@ class EmbeddingInputLayer(Layer): self._slots = [str(slot) for slot in config['slots']] self._mf_dim = config['mf_dim'] self._backward = config['backward'] - self._emb_dim = self._mf_dim + 3 #append show ctr lr + self._emb_dim = self._mf_dim + 3 # append show ctr lr self._emb_layers = [] - - def generate_fluid(self, param): + + def generate_fluid(self, param): """R """ show_clk = fluid.layers.concat( @@ -61,7 +28,8 @@ class EmbeddingInputLayer(Layer): l = fluid.layers.data(name=slot, shape=[1], dtype="int64", lod_level=1) data_var.append(l) emb = fluid.layers.embedding(input=l, size=[10, self._emb_dim], \ - is_sparse=True, is_distributed=True, param_attr=fluid.ParamAttr(name="embedding")) + is_sparse=True, is_distributed=True, + param_attr=fluid.ParamAttr(name="embedding")) emb = fluid.layers.sequence_pool(input=emb, pool_type='sum') emb = fluid.layers.continuous_value_model(emb, show_clk, self._cvm) self._emb_layers.append(emb) @@ -72,6 +40,7 @@ class EmbeddingInputLayer(Layer): class LabelInputLayer(Layer): """R """ + def __init__(self, config): """R """ @@ -80,19 +49,20 @@ class LabelInputLayer(Layer): self._data_type = config.get('data_type', "int64") self._label_idx = config['label_idx'] - def generate_fluid(self, param): + def generate_fluid(self, param): """R """ label = fluid.layers.data(name=self._name, shape=[-1, self._dim], \ - dtype=self._data_type, lod_level=0, append_batch_size=False) + dtype=self._data_type, lod_level=0, append_batch_size=False) cast_label = fluid.layers.cast(label, dtype='float32') cast_label.stop_gradient = True return cast_label, {'data_var': [label]} -class TagInputLayer(Layer): +class TagInputLayer(Layer): """R """ + def __init__(self, config): """R """ @@ -101,17 +71,18 @@ class TagInputLayer(Layer): self._dim = config.get('dim', 1) self._data_type = config['data_type'] - def generate_fluid(self, param): + def generate_fluid(self, param): """R """ output = fluid.layers.data(name=self._name, shape=[-1, self._dim], \ - dtype=self._data_type, lod_level=0, append_batch_size=False, stop_gradient=True) + dtype=self._data_type, lod_level=0, append_batch_size=False, stop_gradient=True) return output, {'data_var': [output]} - -class ParamLayer(Layer): + +class ParamLayer(Layer): """R """ + def __init__(self, config): """R """ @@ -122,15 +93,16 @@ class ParamLayer(Layer): self._data_type = config.get('data_type', 'float32') self._config = config - def generate_fluid(self, param): + def generate_fluid(self, param): """R """ - return self._config, {'inference_param': {'name':'param', 'params': [], 'table_id': self._table_id}} + return self._config, {'inference_param': {'name': 'param', 'params': [], 'table_id': self._table_id}} -class SummaryLayer(Layer): +class SummaryLayer(Layer): """R """ + def __init__(self, config): """R """ @@ -139,41 +111,43 @@ class SummaryLayer(Layer): self._data_type = config.get('data_type', 'float32') self._config = config - def generate_fluid(self, param): + def generate_fluid(self, param): """R """ - return self._config, {'inference_param': {'name': 'summary', 'params': [], 'table_id': self._table_id}} + return self._config, {'inference_param': {'name': 'summary', 'params': [], 'table_id': self._table_id}} -class NormalizetionLayer(Layer): +class NormalizetionLayer(Layer): """R """ + def __init__(self, config): """R """ self._name = config['name'] self._input = config['input'] - self._summary = config['summary'] + self._summary = config['summary'] self._table_id = config.get('table_id', -1) - def generate_fluid(self, param): + def generate_fluid(self, param): """R """ input_layer = param['layer'][self._input[0]] summary_layer = param['layer'][self._summary] if len(self._input) > 0: - input_list=[param['layer'][i] for i in self._input] + input_list = [param['layer'][i] for i in self._input] input_layer = fluid.layers.concat(input=input_list, axis=1) bn = fluid.layers.data_norm(input=input_layer, name=self._name, epsilon=1e-4, param_attr={ - "batch_size": 1e4, "batch_sum_default": 0.0, "batch_square": 1e4}) + "batch_size": 1e4, "batch_sum_default": 0.0, "batch_square": 1e4}) inference_param = [self._name + '.batch_size', self._name + '.batch_sum', self._name + '.batch_square_sum'] - return bn, {'inference_param': {'name':'summary', \ - 'params': inference_param, 'table_id': summary_layer.get('table_id', -1)}} + return bn, {'inference_param': {'name': 'summary', \ + 'params': inference_param, 'table_id': summary_layer.get('table_id', -1)}} -class NeuralLayer(Layer): +class NeuralLayer(Layer): """R """ + def __init__(self, config): """R """ @@ -183,37 +157,38 @@ class NeuralLayer(Layer): self._bias = config.get('bias', True) self._act_func = config.get('act_func', None) - def generate_fluid(self, param): + def generate_fluid(self, param): """R """ param_layer = param['layer'][self._param] input_layer = param['layer'][self._input[0]] if len(self._input) > 0: - input_list=[param['layer'][i] for i in self._input] + input_list = [param['layer'][i] for i in self._input] input_layer = fluid.layers.concat(input=input_list, axis=1) input_coln = input_layer.shape[1] scale = param_layer['init_range'] / (input_coln ** 0.5) bias = None if self._bias: - bias = fluid.ParamAttr(learning_rate=1.0, - initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=scale)) + bias = fluid.ParamAttr(learning_rate=1.0, + initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=scale)) fc = fluid.layers.fc( - name = self._name, - input = input_layer, - size = param_layer['coln'], - act = self._act_func, - param_attr = \ + name=self._name, + input=input_layer, + size=param_layer['coln'], + act=self._act_func, + param_attr= \ fluid.ParamAttr(learning_rate=1.0, \ - initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=scale)), - bias_attr = bias) + initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=scale)), + bias_attr=bias) inference_param = [self._name + '.w_0', self._name + '.b_0'] - return fc, {'inference_param': {'name':'param', 'params': inference_param, \ - 'table_id': param_layer.get('table_id', -1)}} + return fc, {'inference_param': {'name': 'param', 'params': inference_param, \ + 'table_id': param_layer.get('table_id', -1)}} class SigmoidLossLayer(Layer): """R """ + def __init__(self, config): """R """ @@ -240,8 +215,8 @@ class SigmoidLossLayer(Layer): 'q': {'var': None} } } - - def generate_fluid(self, param): + + def generate_fluid(self, param): """R """ input_layer = param['layer'][self._input[0]] @@ -254,15 +229,16 @@ class SigmoidLossLayer(Layer): output = fluid.layers.elementwise_mul(output, weight_layer) output = fluid.layers.mean(x=output) self._extend_output['loss'] = output - - #For AUC Metric + + # For AUC Metric metric = self._extend_output['metric_dict'] binary_predict = fluid.layers.concat( input=[fluid.layers.elementwise_sub(fluid.layers.ceil(norm), norm), norm], axis=1) metric['auc']['var'], metric['batch_auc']['var'], [metric['batch_stat_pos']['var'], \ - metric['batch_stat_neg']['var'], metric['stat_pos']['var'], metric['stat_neg']['var']] = \ + metric['batch_stat_neg']['var'], metric['stat_pos']['var'], + metric['stat_neg']['var']] = \ fluid.layers.auc(input=binary_predict, label=fluid.layers.cast(x=label_layer, dtype='int64'), \ - curve='ROC', num_thresholds=32) + curve='ROC', num_thresholds=32) metric['sqrerr']['var'], metric['abserr']['var'], metric['prob']['var'], metric['q']['var'], \ metric['pos_ins_num']['var'], metric['total_ins_num']['var'] = \ diff --git a/kagle/models/word2vec/hyper_parameters.yaml b/kagle/models/word2vec/hyper_parameters.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/models/word2vec/model.py b/kagle/models/word2vec/model.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/models/word2vec/reader.py b/kagle/models/word2vec/reader.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/models/word2vec/sample_test.txt b/kagle/models/word2vec/sample_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/models/word2vec/sample_train.txt b/kagle/models/word2vec/sample_train.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/reader/__init__.py b/kagle/reader/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/reader/data_loader.py b/kagle/reader/data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/kagle_dataset.py b/kagle/reader/dataset.py similarity index 98% rename from kagle/kagle_dataset.py rename to kagle/reader/dataset.py index c9800f2f651ce26af5d7b7979c91d071319b69b5..96c6fd50081a92cd6402361b2575cb42b2e033ab 100755 --- a/kagle/kagle_dataset.py +++ b/kagle/reader/dataset.py @@ -7,9 +7,9 @@ import yaml import time import datetime import paddle.fluid as fluid -import kagle.kagle_fs as kagle_fs -import kagle.kagle_util as kagle_util -import kagle.kagle_layer as kagle_layer +import kagle.utils.kagle_fs as kagle_fs +import kagle.utils.kagle_util as kagle_util + class Dataset(object): """ diff --git a/kagle/trainer/ctr_trainer.py b/kagle/trainer/ctr_trainer.py index 0477c0f3fc13b9e8487e452c886b3ccd652a8c57..93e282e462e3217e1a717008b6ff4a86dfae36ee 100755 --- a/kagle/trainer/ctr_trainer.py +++ b/kagle/trainer/ctr_trainer.py @@ -8,23 +8,68 @@ import yaml import time import json import datetime + +import numpy as np + import paddle.fluid as fluid -import kagle.kagle_fs as kagle_fs -import kagle.kagle_util as kagle_util +import kagle.utils.kagle_fs as kagle_fs +import kagle.utils.kagle_util as kagle_util import kagle.kagle_model as kagle_model import kagle.kagle_metric as kagle_metric -import kagle.kagle_dataset as kagle_dataset +import kagle.reader.dataset as kagle_dataset import kagle.trainer.kagle_trainer as kagle_trainer from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker + +def wroker_numric_opt(value, env, opt): + """ + numric count opt for workers + Args: + value: value for count + env: mpi/gloo + opt: count operator, SUM/MAX/MIN/AVG + Return: + count result + """ + local_value = np.array([value]) + global_value = np.copy(local_value) * 0 + fleet._role_maker.all_reduce_worker(local_value, global_value, opt) + return global_value[0] + + +def worker_numric_sum(value, env="mpi"): + """R + """ + return wroker_numric_opt(value, env, "sum") + + +def worker_numric_avg(value, env="mpi"): + """R + """ + return worker_numric_sum(value, env) / fleet.worker_num() + + +def worker_numric_min(value, env="mpi"): + """R + """ + return wroker_numric_opt(value, env, "min") + + +def worker_numric_max(value, env="mpi"): + """R + """ + return wroker_numric_opt(value, env, "max") + + class CtrPaddleTrainer(kagle_trainer.Trainer): """R """ + def __init__(self, config): """R """ - kagle_trainer.Trainer.__init__(self, config) + kagle_trainer.Trainer.__init__(self, config) config['output_path'] = kagle_util.get_absolute_path( config['output_path'], config['io']['afs']) self.global_config = config @@ -43,7 +88,7 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): }) if 'path_generator' in config: self._path_generator.add_path_template(config['path_generator']) - + self.regist_context_processor('uninit', self.init) self.regist_context_processor('startup', self.startup) self.regist_context_processor('begin_day', self.begin_day) @@ -56,7 +101,7 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): role_maker = None if self.global_config.get('process_mode', 'mpi') == 'brilliant_cpu': afs_config = self.global_config['io']['afs'] - role_maker = fluid.incubate.fleet.base.role_maker.GeneralRoleMaker( + role_maker = GeneralRoleMaker( hdfs_name=afs_config['fs_name'], hdfs_ugi=afs_config['fs_ugi'], path=self.global_config['output_path'] + "/gloo", init_timeout_seconds=1200, run_timeout_seconds=1200) @@ -72,7 +117,7 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): self._exector_context[executor['name']] = {} self._exector_context[executor['name']]['scope'] = scope self._exector_context[executor['name']]['model'] = kagle_model.create(executor) - model = self._exector_context[executor['name']]['model'] + model = self._exector_context[executor['name']]['model'] self._metrics.update(model.get_metrics()) runnnable_scope.append(scope) runnnable_cost_op.append(model.get_cost_op()) @@ -80,16 +125,16 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): if var.name in data_var_name_dict: continue data_var_list.append(var) - data_var_name_dict[var.name] = var + data_var_name_dict[var.name] = var optimizer = kagle_model.FluidModel.build_optimizer({ - 'metrics': self._metrics, + 'metrics': self._metrics, 'optimizer_conf': self.global_config['optimizer'] }) optimizer.minimize(runnnable_cost_op, runnnable_scope) for executor in self.global_config['executor']: scope = self._exector_context[executor['name']]['scope'] - model = self._exector_context[executor['name']]['model'] + model = self._exector_context[executor['name']]['model'] program = model._build_param['model']['train_program'] if not executor['is_update_sparse']: program._fleet_opt["program_configs"][str(id(model.get_cost_op().block.program))]["push_sparse"] = [] @@ -99,17 +144,17 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): self._exe.run(model._build_param['model']['startup_program']) model.dump_model_program('./') - #server init done + # server init done if fleet.is_server(): return 0 - + self._dataset = {} for dataset_item in self.global_config['dataset']['data_list']: dataset_item['data_vars'] = data_var_list dataset_item.update(self.global_config['io']['afs']) dataset_item["batch_size"] = self.global_config['batch_size'] self._dataset[dataset_item['name']] = kagle_dataset.FluidTimeSplitDataset(dataset_item) - #if config.need_reqi_changeslot and config.reqi_dnn_plugin_day >= last_day and config.reqi_dnn_plugin_pass >= last_pass: + # if config.need_reqi_changeslot and config.reqi_dnn_plugin_day >= last_day and config.reqi_dnn_plugin_pass >= last_pass: # util.reqi_changeslot(config.hdfs_dnn_plugin_path, join_save_params, common_save_params, update_save_params, scope2, scope3) fleet.init_worker() pass @@ -118,48 +163,56 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): """R """ params['index'] = fleet.worker_index() - return kagle_util.print_log(log_str, params) + if params['master']: + if fleet.worker_index() == 0: + print(log_str) + sys.stdout.flush() + else: + print(log_str) + if 'stdout' in params: + params['stdout'] += str(datetime.datetime.now()) + log_str def print_global_metrics(self, scope, model, monitor_data, stdout_str): """R """ metrics = model.get_metrics() - metric_calculator = kagle_metric.PaddleAUCMetric(None) + metric_calculator = kagle_metric.AUCMetric(None) for metric in metrics: - metric_param = {'label': metric, 'metric_dict': metrics[metric]} + metric_param = {'label': metric, 'metric_dict': metrics[metric]} metric_calculator.calculate(scope, metric_param) - metric_result = metric_calculator.get_result_to_string() + metric_result = metric_calculator.get_result_to_string() self.print_log(metric_result, {'master': True, 'stdout': stdout_str}) monitor_data += metric_result metric_calculator.clear(scope, metric_param) - + def save_model(self, day, pass_index, base_key): """R """ - cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, - {'master': True, 'log_format': 'save model cost %s sec'}) + cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, + {'master': True, 'log_format': 'save model cost %s sec'}) model_path = self._path_generator.generate_path('batch_model', {'day': day, 'pass_id': pass_index}) - save_mode = 0 # just save all - if pass_index < 1: #batch_model - save_mode = 3 # unseen_day++, save all + save_mode = 0 # just save all + if pass_index < 1: # batch_model + save_mode = 3 # unseen_day++, save all kagle_util.rank0_print("going to save_model %s" % model_path) fleet.save_persistables(None, model_path, mode=save_mode) if fleet._role_maker.is_first_worker(): self._train_pass.save_train_progress(day, pass_index, base_key, model_path, is_checkpoint=True) cost_printer.done() return model_path - + def save_xbox_model(self, day, pass_index, xbox_base_key, monitor_data): """R """ stdout_str = "" xbox_patch_id = str(int(time.time())) kagle_util.rank0_print("begin save delta model") - + model_path = "" xbox_model_donefile = "" cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, {'master': True, \ - 'log_format': 'save xbox model cost %s sec', 'stdout': stdout_str}) + 'log_format': 'save xbox model cost %s sec', + 'stdout': stdout_str}) if pass_index < 1: save_mode = 2 xbox_patch_id = xbox_base_key @@ -172,14 +225,15 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): total_save_num = fleet.save_persistables(None, model_path, mode=save_mode) cost_printer.done() - cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, {'master': True, - 'log_format': 'save cache model cost %s sec', 'stdout': stdout_str}) + cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, {'master': True, + 'log_format': 'save cache model cost %s sec', + 'stdout': stdout_str}) model_file_handler = kagle_fs.FileHandler(self.global_config['io']['afs']) if self.global_config['save_cache_model']: cache_save_num = fleet.save_cache_model(None, model_path, mode=save_mode) model_file_handler.write( - "file_prefix:part\npart_num:16\nkey_num:%d\n" % cache_save_num, - model_path + '/000_cache/sparse_cache.meta', 'w') + "file_prefix:part\npart_num:16\nkey_num:%d\n" % cache_save_num, + model_path + '/000_cache/sparse_cache.meta', 'w') cost_printer.done() kagle_util.rank0_print("save xbox cache model done, key_num=%s" % cache_save_num) @@ -187,8 +241,9 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): 'executor': self._exe, 'save_combine': True } - cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, {'master': True, - 'log_format': 'save dense model cost %s sec', 'stdout': stdout_str}) + cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, {'master': True, + 'log_format': 'save dense model cost %s sec', + 'stdout': stdout_str}) if fleet._role_maker.is_first_worker(): for executor in self.global_config['executor']: if 'layer_for_inference' not in executor: @@ -196,11 +251,11 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): executor_name = executor['name'] model = self._exector_context[executor_name]['model'] save_env_param['inference_list'] = executor['layer_for_inference'] - save_env_param['scope'] = self._exector_context[executor_name]['scope'] + save_env_param['scope'] = self._exector_context[executor_name]['scope'] model.dump_inference_param(save_env_param) for dnn_layer in executor['layer_for_inference']: - model_file_handler.cp(dnn_layer['save_file_name'], - model_path + '/dnn_plugin/' + dnn_layer['save_file_name']) + model_file_handler.cp(dnn_layer['save_file_name'], + model_path + '/dnn_plugin/' + dnn_layer['save_file_name']) fleet._role_maker._barrier_worker() cost_printer.done() @@ -222,8 +277,8 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): if pass_index > 0: self._train_pass.save_train_progress(day, pass_index, xbox_base_key, model_path, is_checkpoint=False) fleet._role_maker._barrier_worker() - return stdout_str - + return stdout_str + def run_executor(self, executor_config, dataset, stdout_str): """R """ @@ -238,12 +293,12 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): begin = time.time() program = model._build_param['model']['train_program'] self._exe.train_from_dataset(program, dataset, scope, - thread=executor_config['train_thread_num'], debug=self.global_config['debug']) + thread=executor_config['train_thread_num'], debug=self.global_config['debug']) end = time.time() local_cost = (end - begin) / 60.0 - avg_cost = kagle_util.worker_numric_avg(local_cost) - min_cost = kagle_util.worker_numric_min(local_cost) - max_cost = kagle_util.worker_numric_max(local_cost) + avg_cost = worker_numric_avg(local_cost) + min_cost = worker_numric_min(local_cost) + max_cost = worker_numric_max(local_cost) kagle_util.rank0_print("avg train time %s mins, min %s mins, max %s mins" % (avg_cost, min_cost, max_cost)) self._exector_context[executor_name]['cost'] = max_cost @@ -264,13 +319,14 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): stdout_str = "" self._train_pass = kagle_util.TimeTrainPass(self.global_config) if not self.global_config['cold_start']: - cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, - {'master': True, 'log_format': 'load model cost %s sec', 'stdout': stdout_str}) + cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, + {'master': True, 'log_format': 'load model cost %s sec', + 'stdout': stdout_str}) self.print_log("going to load model %s" % self._train_pass._checkpoint_model_path, {'master': True}) - #if config.need_reqi_changeslot and config.reqi_dnn_plugin_day >= self._train_pass.date() + # if config.need_reqi_changeslot and config.reqi_dnn_plugin_day >= self._train_pass.date() # and config.reqi_dnn_plugin_pass >= self._pass_id: # fleet.load_one_table(0, self._train_pass._checkpoint_model_path) - #else: + # else: fleet.init_server(self._train_pass._checkpoint_model_path, mode=0) cost_printer.done() if self.global_config['save_first_base']: @@ -279,7 +335,7 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): self._train_pass._base_key = int(time.time()) stdout_str += self.save_xbox_model(self._train_pass.date(), 0, self._train_pass._base_key, "") context['status'] = 'begin_day' - + def begin_day(self, context): """R """ @@ -293,7 +349,7 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): context['status'] = 'end_day' else: context['status'] = 'train_pass' - + def end_day(self, context): """R """ @@ -303,8 +359,8 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): context['status'] = 'begin_day' kagle_util.rank0_print("shrink table") - cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, - {'master': True, 'log_format': 'shrink table done, cost %s sec'}) + cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, + {'master': True, 'log_format': 'shrink table done, cost %s sec'}) fleet.shrink_sparse_table() for executor in self._exector_context: self._exector_context[executor]['model'].shrink({ @@ -333,7 +389,8 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): train_begin_time = time.time() cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, \ - {'master': True, 'log_format': 'load into memory done, cost %s sec', 'stdout': stdout_str}) + {'master': True, 'log_format': 'load into memory done, cost %s sec', + 'stdout': stdout_str}) current_dataset = {} for name in self._dataset: current_dataset[name] = self._dataset[name].load_dataset({ @@ -342,11 +399,11 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): }) fleet._role_maker._barrier_worker() cost_printer.done() - + kagle_util.rank0_print("going to global shuffle") - cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, { + cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, { 'master': True, 'stdout': stdout_str, - 'log_format': 'global shuffle done, cost %s sec'}) + 'log_format': 'global shuffle done, cost %s sec'}) for name in current_dataset: current_dataset[name].global_shuffle(fleet, self.global_config['dataset']['shuffle_thread']) cost_printer.done() @@ -354,34 +411,34 @@ class CtrPaddleTrainer(kagle_trainer.Trainer): fleet._role_maker._barrier_worker() if self.global_config['prefetch_data']: - next_pass_time = (self._train_pass._current_train_time + - datetime.timedelta(minutes=self._train_pass._interval_per_pass)).strftime("%Y%m%d%H%M") + next_pass_time = (self._train_pass._current_train_time + + datetime.timedelta(minutes=self._train_pass._interval_per_pass)).strftime("%Y%m%d%H%M") for name in self._dataset: self._dataset[name].preload_dataset({ 'node_num': fleet.worker_num(), 'node_idx': fleet.worker_index(), 'begin_time': next_pass_time, 'time_window_min': self._train_pass._interval_per_pass }) - + fleet._role_maker._barrier_worker() pure_train_begin = time.time() for executor in self.global_config['executor']: self.run_executor(executor, current_dataset[executor['dataset_name']], stdout_str) cost_printer = kagle_util.CostPrinter(kagle_util.print_cost, \ - {'master': True, 'log_format': 'release_memory cost %s sec'}) + {'master': True, 'log_format': 'release_memory cost %s sec'}) for name in current_dataset: current_dataset[name].release_memory() pure_train_cost = time.time() - pure_train_begin - + if self._train_pass.is_checkpoint_pass(pass_id): self.save_model(day, pass_id, base_key) train_end_time = time.time() train_cost = train_end_time - train_begin_time - other_cost = train_cost - pure_train_cost + other_cost = train_cost - pure_train_cost log_str = "finished train day %s pass %s time cost:%s sec job time cost:" % (day, pass_id, train_cost) for executor in self._exector_context: log_str += '[' + executor + ':' + str(self._exector_context[executor]['cost']) + ']' - log_str += '[other_cost:' + str(other_cost) + ']' + log_str += '[other_cost:' + str(other_cost) + ']' kagle_util.rank0_print(log_str) stdout_str += kagle_util.now_time_str() + log_str sys.stdout.write(stdout_str) diff --git a/kagle/trainer/kagle_trainer.py b/kagle/trainer/kagle_trainer.py index 62e65a7709cbc4818a7ae1722a8a49d447c5a09b..331cf3481536cd01a53f5abd914a567be5922f60 100755 --- a/kagle/trainer/kagle_trainer.py +++ b/kagle/trainer/kagle_trainer.py @@ -2,13 +2,14 @@ Define A Trainer Base """ import abc -import sys import time + class Trainer(object): """R """ __metaclass__ = abc.ABCMeta + def __init__(self, config): """R """ diff --git a/kagle/utils/__init__.py b/kagle/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/kagle/kagle_fs.py b/kagle/utils/kagle_fs.py similarity index 99% rename from kagle/kagle_fs.py rename to kagle/utils/kagle_fs.py index 533523547554c7fe99a9965b7562038d48c92b98..f8d6bb98d36ff036001b6bdeba81d7406e9313e0 100755 --- a/kagle/kagle_fs.py +++ b/kagle/utils/kagle_fs.py @@ -5,6 +5,7 @@ import os import time from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient + def is_afs_path(path): """R """ @@ -162,4 +163,3 @@ class FileHandler(object): if org_is_afs and not dest_is_afs: return self._hdfs_client.download(org_path, dest_path) print("Not Suppor hdfs cp currently") - diff --git a/kagle/kagle_table.py b/kagle/utils/kagle_table.py similarity index 99% rename from kagle/kagle_table.py rename to kagle/utils/kagle_table.py index f18dd0f4ce19cd6f161ee7350c53866d8e2f612d..6d3ccb3ae85a45e9f538d7055921bc8dcda0ad6a 100755 --- a/kagle/kagle_table.py +++ b/kagle/utils/kagle_table.py @@ -4,6 +4,7 @@ Construct ParamTable Meta import copy import yaml + class TableMeta(object): """ Simple ParamTable Meta, Contain table_id diff --git a/kagle/kagle_util.py b/kagle/utils/kagle_util.py similarity index 78% rename from kagle/kagle_util.py rename to kagle/utils/kagle_util.py index b64f7076b491c07916cd1fb163803ef2f4105674..5173a793ec8970e9fb70acf431d0698666ca59c1 100755 --- a/kagle/kagle_util.py +++ b/kagle/utils/kagle_util.py @@ -2,12 +2,10 @@ Util lib """ import os -import sys import time import datetime -import numpy as np -import kagle.kagle_fs as kagle_fs -from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet +import kagle.utils.kagle_fs as kagle_fs + def get_env_value(env_name): """ @@ -28,7 +26,7 @@ def get_absolute_path(path, params): """ if path.startswith('afs:') or path.startswith('hdfs:'): sub_path = path.split('fs:')[1] - if ':' in sub_path: #such as afs://xxx:prot/xxxx + if ':' in sub_path: # such as afs://xxx:prot/xxxx return path elif 'fs_name' in params: return params['fs_name'] + sub_path @@ -46,96 +44,45 @@ def make_datetime(date_str, fmt=None): datetime """ if fmt is None: - if len(date_str) == 8: #%Y%m%d + if len(date_str) == 8: # %Y%m%d return datetime.datetime.strptime(date_str, '%Y%m%d') - if len(date_str) == 12: #%Y%m%d%H%M + if len(date_str) == 12: # %Y%m%d%H%M return datetime.datetime.strptime(date_str, '%Y%m%d%H%M') return datetime.datetime.strptime(date_str, fmt) -def wroker_numric_opt(value, env, opt): - """ - numric count opt for workers - Args: - value: value for count - env: mpi/gloo - opt: count operator, SUM/MAX/MIN/AVG - Return: - count result - """ - local_value = np.array([value]) - global_value = np.copy(local_value) * 0 - fleet._role_maker.all_reduce_worker(local_value, global_value, opt) - return global_value[0] - -def worker_numric_sum(value, env="mpi"): - """R - """ - return wroker_numric_opt(value, env, "sum") - - -def worker_numric_avg(value, env="mpi"): - """R - """ - return worker_numric_sum(value, env) / fleet.worker_num() - - -def worker_numric_min(value, env="mpi"): - """R - """ - return wroker_numric_opt(value, env, "min") - - -def worker_numric_max(value, env="mpi"): - """R - """ - return wroker_numric_opt(value, env, "max") - - def rank0_print(log_str): """R """ print_log(log_str, {'master': True}) -def print_log(log_str, params): - """R - """ - if params['master']: - if fleet.worker_index() == 0: - print(log_str) - sys.stdout.flush() - else: - print(log_str) - if 'stdout' in params: - params['stdout'] += str(datetime.datetime.now()) + log_str - - def print_cost(cost, params): """R """ log_str = params['log_format'] % cost - print_log(log_str, params) + print_log(log_str, params) return log_str - + class CostPrinter(object): """ For count cost time && print cost log """ + def __init__(self, callback, callback_params): """R """ self.reset(callback, callback_params) pass - + def __del__(self): """R """ if not self._done: self.done() pass - + def reset(self, callback, callback_params): """R """ @@ -144,12 +91,12 @@ class CostPrinter(object): self._callback_params = callback_params self._begin_time = time.time() pass - + def done(self): """R """ cost = time.time() - self._begin_time - log_str = self._callback(cost, self._callback_params) #cost(s) + log_str = self._callback(cost, self._callback_params) # cost(s) self._done = True return cost, log_str @@ -158,13 +105,14 @@ class PathGenerator(object): """ generate path with template & runtime variables """ + def __init__(self, config): """R """ - self._templates = {} + self._templates = {} self.add_path_template(config) pass - + def add_path_template(self, config): """R """ @@ -190,6 +138,7 @@ class TimeTrainPass(object): timely pass define pass time_interval && start_time && end_time """ + def __init__(self, global_config): """R """ @@ -199,20 +148,20 @@ class TimeTrainPass(object): day_fields = day_str.split('+') self._begin_day = make_datetime(day_fields[0].strip()) if len(day_fields) == 1 or len(day_fields[1]) == 0: - #100 years, meaning to continuous running - self._end_day = self._begin_day + datetime.timedelta(days=36500) - else: + # 100 years, meaning to continuous running + self._end_day = self._begin_day + datetime.timedelta(days=36500) + else: # example: 2020212+10 run_day = int(day_fields[1].strip()) - self._end_day =self._begin_day + datetime.timedelta(days=run_day) - else: + self._end_day = self._begin_day + datetime.timedelta(days=run_day) + else: # example: {20191001..20191031} days = os.popen("echo -n " + self._config['days']).read().split(" ") self._begin_day = make_datetime(days[0]) self._end_day = make_datetime(days[len(days) - 1]) self._checkpoint_interval = self._config['checkpoint_interval'] self._dump_inference_interval = self._config['dump_inference_interval'] - self._interval_per_pass = self._config['train_time_interval'] #train N min data per pass + self._interval_per_pass = self._config['train_time_interval'] # train N min data per pass self._pass_id = 0 self._inference_pass_id = 0 @@ -223,29 +172,29 @@ class TimeTrainPass(object): self._pass_donefile_handler = kagle_fs.FileHandler(global_config['io']['afs']) else: self._pass_donefile_handler = kagle_fs.FileHandler(global_config['io']['local_fs']) - + last_done = self._pass_donefile_handler.cat(self._train_pass_donefile).strip().split('\n')[-1] done_fileds = last_done.split('\t') if len(done_fileds) > 4: self._base_key = done_fileds[1] self._checkpoint_model_path = done_fileds[2] self._checkpoint_pass_id = int(done_fileds[3]) - self._inference_pass_id = int(done_fileds[4]) + self._inference_pass_id = int(done_fileds[4]) self.init_pass_by_id(done_fileds[0], self._checkpoint_pass_id) def max_pass_num_day(self): """R """ return 24 * 60 / self._interval_per_pass - + def save_train_progress(self, day, pass_id, base_key, model_path, is_checkpoint): """R """ if is_checkpoint: self._checkpoint_pass_id = pass_id self._checkpoint_model_path = model_path - done_content = "%s\t%s\t%s\t%s\t%d\n" % (day, base_key, - self._checkpoint_model_path, self._checkpoint_pass_id, pass_id) + done_content = "%s\t%s\t%s\t%s\t%d\n" % (day, base_key, + self._checkpoint_model_path, self._checkpoint_pass_id, pass_id) self._pass_donefile_handler.write(done_content, self._train_pass_donefile, 'a') pass @@ -256,7 +205,7 @@ class TimeTrainPass(object): date_str: example "20200110" pass_id(int): pass_id of date """ - date_time = make_datetime(date_str) + date_time = make_datetime(date_str) if pass_id < 1: pass_id = 0 if (date_time - self._begin_day).total_seconds() > 0: @@ -264,7 +213,7 @@ class TimeTrainPass(object): self._pass_id = pass_id mins = self._interval_per_pass * (pass_id - 1) self._current_train_time = date_time + datetime.timedelta(minutes=mins) - + def init_pass_by_time(self, datetime_str): """ init pass context with datetime @@ -279,7 +228,7 @@ class TimeTrainPass(object): """R """ return self._pass_id - + def next(self): """R """ @@ -307,7 +256,7 @@ class TimeTrainPass(object): if pass_id % self._checkpoint_interval == 0: return True return False - + def need_dump_inference(self, pass_id): """R """