"Input(Tensor or LoDTensor): The parameter to be accumulated.");
AddInput("param","(Tensor), The parameter to be accumulated.");
AddInput("in_sum_1",
"Input(Tensor or LoDTensor): A tensor used to store the parameter "
"(Tensor), A tensor used to store the parameter "
"sums with the same shape as input(param).");
AddInput("in_sum_2",
"Input(Tensor or LoDTensor): A auxiliary tensor to help "
"(Tensor), A auxiliary tensor to help "
"accumulating sums of parameter values with the same shape as "
"input(param). It is used to avoid loss of precision due to too "
"many sums.");
AddInput("in_sum_3",
"Input(Tensor or LoDTensor): A auxiliary tensor to help "
"(Tensor), A auxiliary tensor to help "
"accumulating sums of parameter values with the same shape as "
"input(param).");
AddInput("in_num_accumulates",
"Input(Tensor): The accumulating times of current window with "
"shape [1].");
AddInput("in_old_num_accumulates",
"Input(Tensor): The accumulating times of previous window with "
"(Tensor<int64_t>), The accumulating times of current window with "
"shape [1].");
AddInput(
"in_old_num_accumulates",
"(Tensor<int64_t>), The accumulating times of previous window with "
"shape [1].");
AddInput("in_num_updates",
"Input(Tensor): The total number of batches used by trainning "
"(Tensor<int64_t>), The total number of batches used by trainning "
"before this batch with shape [1].");
AddOutput("out_sum_1",
"Output(Tensor or LoDTensor): A tensor used to store the "
"(Tensor), A tensor used to store the "
"parameter sums with the same shape as input(param).");
AddOutput("out_sum_2",
"Output(Tensor or LoDTensor): A auxiliary tensor to help "
"(Tensor), A auxiliary tensor to help "
"accumulating sums of parameter values with the same shape as "
"input(param). It is used to avoid loss of precision due to too "
"many sums.");
AddOutput("out_sum_3",
"Output(Tensor or LoDTensor): A auxiliary tensor to help "
"(Tensor), A auxiliary tensor to help "
"accumulating sums of parameter values with the same shape as "
"input(param).");
AddOutput("out_num_accumulates",
"Output(Tensor): The accumulating times of current window with "
"shape [1].");
AddOutput("out_old_num_accumulates",
"Output(Tensor): The accumulating times of previous window with "
"shape [1].");
AddOutput("out_num_updates",
"Output(Tensor): The total number of batches used by trainning "
"before this batch with shape [1].");
AddOutput(
"out_num_accumulates",
"(Tensor<int64_t>), The accumulating times of current window with "
"shape [1].");
AddOutput(
"out_old_num_accumulates",
"(Tensor<int64_t>) The accumulating times of previous window with "
"shape [1].");
AddOutput(
"out_num_updates",
"(Tensor<int64_t>), The total number of batches used by trainning "
"before this batch with shape [1].");
AddAttr<float>("average_window",
"The rate of average window size relative to num_updates.");
AddAttr<int64_t>("max_average_window","Maximum size of average window.");
AddAttr<int64_t>("min_average_window","Minimu size of average window.");
"(float, default 0) "
"The rate of average window size relative to num_updates.")
.SetDefault(0);
AddAttr<int64_t>("max_average_window",
"(int64_t) "
"Maximum size of average window. It suggests that the "
"number of mini-batches "
"in one pass is appropriate value to set.");
AddAttr<int64_t>("min_average_window",
"(int64_t, default 10000L) "
"Minimu size of average window.")
.SetDefault(10000L);
AddComment(R"DOC(
AverageAccumulates Operator.
Accumulate the sum of parameter whtin sliding window. The size of sliding window is determined by 'average_window', 'max_average_window' and 'min_average_window'.
Accumulate the sum of parameter whtin sliding window. The size of sliding window is
determined by 'average_window', 'max_average_window' and 'min_average_window'.
Memory was shared by Input(in_sum_1) and Output(out_sum_1) which acts as an accumulator 'sum_1'.
'sum_2', 'sum_3', 'num_accumulates', 'old_num_accumulates' and 'num_updates' were the same as 'sum_1'.
All the accumulators were inited to zero before training.
And for a mini-batch in training, accumulators were computed as below steps: