未验证 提交 29bbeb07 编写于 作者: T Thunderbrook 提交者: GitHub

unit double (#32902)

* unit double

* unit double
上级 59997d53
...@@ -287,6 +287,15 @@ if(WITH_DISTRIBUTE) ...@@ -287,6 +287,15 @@ if(WITH_DISTRIBUTE)
graph_to_program_pass variable_helper timer monitor) graph_to_program_pass variable_helper timer monitor)
endif() endif()
elseif(WITH_PSLIB) elseif(WITH_PSLIB)
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
set(DISTRIBUTE_COMPILE_FLAGS
"${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
endif()
set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(device_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(hetercpu_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(heterxpu_trainer.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc
dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc
heterxpu_trainer.cc heterxpu_trainer.cc
......
...@@ -123,7 +123,7 @@ class DownpourServer(Server): ...@@ -123,7 +123,7 @@ class DownpourServer(Server):
support_accessor_class = [ support_accessor_class = [
'DownpourFeatureValueAccessor', 'DownpourCtrAccessor', 'DownpourFeatureValueAccessor', 'DownpourCtrAccessor',
'DownpourSparseValueAccessor', 'DownpourCtrDoubleAccessor', 'DownpourSparseValueAccessor', 'DownpourCtrDoubleAccessor',
'DownpourUnitAccessor' 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor'
] ]
if strategy.get('sparse_accessor_class') is not None: if strategy.get('sparse_accessor_class') is not None:
accessor_class = strategy.get('sparse_accessor_class') accessor_class = strategy.get('sparse_accessor_class')
...@@ -254,7 +254,7 @@ class DownpourServer(Server): ...@@ -254,7 +254,7 @@ class DownpourServer(Server):
table2.param = 2 table2.param = 2
table2.converter = converter table2.converter = converter
table2.deconverter = deconverter table2.deconverter = deconverter
elif accessor_class == 'DownpourUnitAccessor': elif accessor_class == 'DownpourUnitAccessor' or accessor_class == 'DownpourDoubleUnitAccessor':
self.add_sparse_table_common_config(table, strategy) self.add_sparse_table_common_config(table, strategy)
self.add_sparse_optimizer(table.accessor.embed_sgd_param, self.add_sparse_optimizer(table.accessor.embed_sgd_param,
strategy, "embed_") strategy, "embed_")
...@@ -380,7 +380,7 @@ class DownpourServer(Server): ...@@ -380,7 +380,7 @@ class DownpourServer(Server):
table.accessor.fea_dim = fea_dim table.accessor.fea_dim = fea_dim
def add_sparse_optimizer(self, sgd, strategy, prefix): def add_sparse_optimizer(self, sgd, strategy, prefix):
optimizer_name = strategy.get(prefix + "sparse_optimizer", "adam") optimizer_name = strategy.get(prefix + "sparse_optimizer", "adagrad")
sgd.name = optimizer_name sgd.name = optimizer_name
if optimizer_name == "naive": if optimizer_name == "naive":
sgd.naive.learning_rate = \ sgd.naive.learning_rate = \
...@@ -394,6 +394,19 @@ class DownpourServer(Server): ...@@ -394,6 +394,19 @@ class DownpourServer(Server):
strategy.get(prefix + 'sparse_learning_rate', 0.05) strategy.get(prefix + 'sparse_learning_rate', 0.05)
sgd.adagrad.initial_range = \ sgd.adagrad.initial_range = \
strategy.get(prefix + 'sparse_initial_range', 1e-4) strategy.get(prefix + 'sparse_initial_range', 1e-4)
if prefix == "embed_":
sgd.adagrad.initial_range = 0
sgd.adagrad.initial_g2sum = strategy.get(
prefix + 'sparse_initial_g2sum', 3)
bounds = strategy.get(prefix + 'sparse_weight_bounds', [-10, 10])
sgd.adagrad.weight_bounds.extend(bounds)
elif optimizer_name == "std_adagrad":
sgd.adagrad.learning_rate = \
strategy.get(prefix + 'sparse_learning_rate', 0.05)
sgd.adagrad.initial_range = \
strategy.get(prefix + 'sparse_initial_range', 1e-4)
if prefix == "embed_":
sgd.adagrad.initial_range = 0
sgd.adagrad.initial_g2sum = strategy.get( sgd.adagrad.initial_g2sum = strategy.get(
prefix + 'sparse_initial_g2sum', 3) prefix + 'sparse_initial_g2sum', 3)
bounds = strategy.get(prefix + 'sparse_weight_bounds', [-10, 10]) bounds = strategy.get(prefix + 'sparse_weight_bounds', [-10, 10])
......
...@@ -489,6 +489,7 @@ class DistributedAdam(DistributedOptimizerImplBase): ...@@ -489,6 +489,7 @@ class DistributedAdam(DistributedOptimizerImplBase):
# user do not have to set it in config_fleet # user do not have to set it in config_fleet
if accessor == "DownpourFeatureValueAccessor" \ if accessor == "DownpourFeatureValueAccessor" \
or accessor == "DownpourCtrAccessor" \ or accessor == "DownpourCtrAccessor" \
or accessor == "DownpourDoubleUnitAccessor" \
or accessor == "DownpourUnitAccessor": or accessor == "DownpourUnitAccessor":
if st.get("sparse_embedx_dim") is not None \ if st.get("sparse_embedx_dim") is not None \
and st["sparse_embedx_dim"] != emb_to_size[key] - 3: and st["sparse_embedx_dim"] != emb_to_size[key] - 3:
...@@ -769,7 +770,7 @@ class DistributedAdam(DistributedOptimizerImplBase): ...@@ -769,7 +770,7 @@ class DistributedAdam(DistributedOptimizerImplBase):
if server._server.downpour_server_param.downpour_table_param[ if server._server.downpour_server_param.downpour_table_param[
0].accessor.accessor_class in [ 0].accessor.accessor_class in [
"DownpourCtrAccessor", "DownpourCtrDoubleAccessor", "DownpourCtrAccessor", "DownpourCtrDoubleAccessor",
"DownpourUnitAccessor" "DownpourUnitAccessor", "DownpourDoubleUnitAccessor"
]: ]:
opt_info["dump_slot"] = True opt_info["dump_slot"] = True
elif server._server.downpour_server_param.downpour_table_param[ elif server._server.downpour_server_param.downpour_table_param[
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册