提交 01c26972 编写于 作者: K Kexin Zhao

Merge remote-tracking branch 'upstream/develop' into inf_rnn_encode_decode

......@@ -212,6 +212,10 @@ TEST(compareSparse, NeuralNetwork) {
}
int main(int argc, char** argv) {
// FIXME(tonyyang-svail):
// Turn off this test due CI failure:
// https://paddleci.ngrok.io/viewLog.html?buildId=27608&buildTypeId=Paddle_PrCi&tab=buildLog&_focus=10430
return 0;
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
initPython(argc, argv);
......
......@@ -287,6 +287,9 @@ TEST_F(NCCLTester, ncclBcastOp) {
}
int main(int argc, char **argv) {
// FIXME(tonyyang-svail):
// Due to the driver issue on our CI, disable for now
return 0;
const int dev_count = p::GetCUDADeviceCount();
if (dev_count <= 1) {
LOG(WARNING)
......
......@@ -127,6 +127,9 @@ TEST(NCCL, all_reduce) {
} // namespace paddle
int main(int argc, char** argv) {
// FIXME(tonyyang-svail):
// Due to the driver issue on our CI, disable for now
return 0;
dev_count = paddle::platform::GetCUDADeviceCount();
if (dev_count <= 1) {
LOG(WARNING)
......
......@@ -92,7 +92,7 @@ def fc(input,
.. math::
Out = Act({\sum_{i=0}^{N-1}W_iX_i + b})
Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
In the above equation:
......
......@@ -16,6 +16,8 @@ import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import contextlib
import unittest
import math
import sys
def main(use_cuda):
......@@ -58,6 +60,8 @@ def main(use_cuda):
print(avg_loss_value)
if avg_loss_value[0] < 10.0:
return
if math.isnan(float(avg_loss_value)):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Fit a line cost is too large, {0:2.2}".format(
avg_loss_value[0]))
......
......@@ -17,6 +17,8 @@ from __future__ import print_function
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import contextlib
import math
import sys
import numpy
import unittest
......@@ -145,6 +147,8 @@ def train(net_type, use_cuda, save_dirname):
loss_t, acc_t = exe.run(program=test_program,
feed=feeder.feed(test_data),
fetch_list=[avg_cost, acc])
if math.isnan(float(loss_t)):
sys.exit("got NaN loss, training failed.")
acc_list.append(float(acc_t))
avg_loss_list.append(float(loss_t))
break # Use 1 segment for speeding up CI
......
......@@ -18,6 +18,8 @@ import paddle.v2 as paddle
import sys
import numpy
import unittest
import math
import sys
def parse_arg():
......@@ -148,6 +150,8 @@ def train(nn_type, use_cuda, parallel, save_dirname):
'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.
format(pass_id, batch_id + 1,
float(avg_loss_val), float(acc_val)))
if math.isnan(float(avg_loss_val)):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Loss of recognize digits is too large")
......
......@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import sys
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid.core as core
......@@ -217,6 +219,8 @@ def main():
if out[0] < 6.0:
# if avg cost less than 6.0, we think our code is good.
exit(0)
if math.isnan(float(out[0])):
sys.exit("got NaN loss, training failed.")
main()
......@@ -19,6 +19,8 @@ import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers
import contextlib
import math
import sys
import unittest
from paddle.v2.fluid.executor import Executor
......@@ -207,7 +209,8 @@ def train(use_cuda, save_dirname=None):
avg_cost_val = np.array(outs[0])
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
" avg_cost=" + str(avg_cost_val))
if math.isnan(float(avg_cost_val[0])):
sys.exit("got NaN loss, training failed.")
if batch_id > 3:
if save_dirname is not None:
fluid.io.save_inference_model(
......
......@@ -16,6 +16,8 @@ import unittest
import paddle.v2.fluid as fluid
import paddle.v2 as paddle
import contextlib
import math
import sys
def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32,
......@@ -115,6 +117,8 @@ def main(word_dict, net_method, use_cuda):
print("cost=" + str(cost_val) + " acc=" + str(acc_val))
if cost_val < 0.4 and acc_val > 0.8:
return
if math.isnan(float(cost_val)):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Cost is too large for {0}".format(
net_method.__name__))
......
......@@ -16,6 +16,8 @@ import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import unittest
import os
import math
import sys
def main(use_cuda, is_sparse, parallel):
......@@ -112,6 +114,9 @@ def main(use_cuda, is_sparse, parallel):
fetch_list=[avg_cost])
if avg_cost_np[0] < 5.0:
return
if math.isnan(float(avg_cost_np[0])):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))
......@@ -153,4 +158,6 @@ for use_cuda in (False, True):
inject_test_method(use_cuda, is_sparse, parallel)
if __name__ == '__main__':
# FIXME(tonyyang-svail):
# This test always fail on MultiGPU CI
unittest.main()
......@@ -198,4 +198,7 @@ class ParallelOpTestMultipleInput(BaseParallelForTest):
if __name__ == '__main__':
# FIXME(tonyyang-svail):
# This test always fail on MultiGPU CI
exit(0)
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册