提交 01c26972 编写于 作者: K Kexin Zhao

Merge remote-tracking branch 'upstream/develop' into inf_rnn_encode_decode

...@@ -212,6 +212,10 @@ TEST(compareSparse, NeuralNetwork) { ...@@ -212,6 +212,10 @@ TEST(compareSparse, NeuralNetwork) {
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
// FIXME(tonyyang-svail):
// Turn off this test due CI failure:
// https://paddleci.ngrok.io/viewLog.html?buildId=27608&buildTypeId=Paddle_PrCi&tab=buildLog&_focus=10430
return 0;
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
initPython(argc, argv); initPython(argc, argv);
......
...@@ -287,6 +287,9 @@ TEST_F(NCCLTester, ncclBcastOp) { ...@@ -287,6 +287,9 @@ TEST_F(NCCLTester, ncclBcastOp) {
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
// FIXME(tonyyang-svail):
// Due to the driver issue on our CI, disable for now
return 0;
const int dev_count = p::GetCUDADeviceCount(); const int dev_count = p::GetCUDADeviceCount();
if (dev_count <= 1) { if (dev_count <= 1) {
LOG(WARNING) LOG(WARNING)
......
...@@ -127,6 +127,9 @@ TEST(NCCL, all_reduce) { ...@@ -127,6 +127,9 @@ TEST(NCCL, all_reduce) {
} // namespace paddle } // namespace paddle
int main(int argc, char** argv) { int main(int argc, char** argv) {
// FIXME(tonyyang-svail):
// Due to the driver issue on our CI, disable for now
return 0;
dev_count = paddle::platform::GetCUDADeviceCount(); dev_count = paddle::platform::GetCUDADeviceCount();
if (dev_count <= 1) { if (dev_count <= 1) {
LOG(WARNING) LOG(WARNING)
......
...@@ -92,7 +92,7 @@ def fc(input, ...@@ -92,7 +92,7 @@ def fc(input,
.. math:: .. math::
Out = Act({\sum_{i=0}^{N-1}W_iX_i + b}) Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
In the above equation: In the above equation:
......
...@@ -16,6 +16,8 @@ import paddle.v2 as paddle ...@@ -16,6 +16,8 @@ import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
import contextlib import contextlib
import unittest import unittest
import math
import sys
def main(use_cuda): def main(use_cuda):
...@@ -58,6 +60,8 @@ def main(use_cuda): ...@@ -58,6 +60,8 @@ def main(use_cuda):
print(avg_loss_value) print(avg_loss_value)
if avg_loss_value[0] < 10.0: if avg_loss_value[0] < 10.0:
return return
if math.isnan(float(avg_loss_value)):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Fit a line cost is too large, {0:2.2}".format( raise AssertionError("Fit a line cost is too large, {0:2.2}".format(
avg_loss_value[0])) avg_loss_value[0]))
......
...@@ -17,6 +17,8 @@ from __future__ import print_function ...@@ -17,6 +17,8 @@ from __future__ import print_function
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
import contextlib import contextlib
import math
import sys
import numpy import numpy
import unittest import unittest
...@@ -145,6 +147,8 @@ def train(net_type, use_cuda, save_dirname): ...@@ -145,6 +147,8 @@ def train(net_type, use_cuda, save_dirname):
loss_t, acc_t = exe.run(program=test_program, loss_t, acc_t = exe.run(program=test_program,
feed=feeder.feed(test_data), feed=feeder.feed(test_data),
fetch_list=[avg_cost, acc]) fetch_list=[avg_cost, acc])
if math.isnan(float(loss_t)):
sys.exit("got NaN loss, training failed.")
acc_list.append(float(acc_t)) acc_list.append(float(acc_t))
avg_loss_list.append(float(loss_t)) avg_loss_list.append(float(loss_t))
break # Use 1 segment for speeding up CI break # Use 1 segment for speeding up CI
......
...@@ -18,6 +18,8 @@ import paddle.v2 as paddle ...@@ -18,6 +18,8 @@ import paddle.v2 as paddle
import sys import sys
import numpy import numpy
import unittest import unittest
import math
import sys
def parse_arg(): def parse_arg():
...@@ -148,6 +150,8 @@ def train(nn_type, use_cuda, parallel, save_dirname): ...@@ -148,6 +150,8 @@ def train(nn_type, use_cuda, parallel, save_dirname):
'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.
format(pass_id, batch_id + 1, format(pass_id, batch_id + 1,
float(avg_loss_val), float(acc_val))) float(avg_loss_val), float(acc_val)))
if math.isnan(float(avg_loss_val)):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Loss of recognize digits is too large") raise AssertionError("Loss of recognize digits is too large")
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import sys
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
...@@ -217,6 +219,8 @@ def main(): ...@@ -217,6 +219,8 @@ def main():
if out[0] < 6.0: if out[0] < 6.0:
# if avg cost less than 6.0, we think our code is good. # if avg cost less than 6.0, we think our code is good.
exit(0) exit(0)
if math.isnan(float(out[0])):
sys.exit("got NaN loss, training failed.")
main() main()
...@@ -19,6 +19,8 @@ import paddle.v2.fluid.core as core ...@@ -19,6 +19,8 @@ import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import contextlib import contextlib
import math
import sys
import unittest import unittest
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
...@@ -207,7 +209,8 @@ def train(use_cuda, save_dirname=None): ...@@ -207,7 +209,8 @@ def train(use_cuda, save_dirname=None):
avg_cost_val = np.array(outs[0]) avg_cost_val = np.array(outs[0])
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
" avg_cost=" + str(avg_cost_val)) " avg_cost=" + str(avg_cost_val))
if math.isnan(float(avg_cost_val[0])):
sys.exit("got NaN loss, training failed.")
if batch_id > 3: if batch_id > 3:
if save_dirname is not None: if save_dirname is not None:
fluid.io.save_inference_model( fluid.io.save_inference_model(
......
...@@ -16,6 +16,8 @@ import unittest ...@@ -16,6 +16,8 @@ import unittest
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
import paddle.v2 as paddle import paddle.v2 as paddle
import contextlib import contextlib
import math
import sys
def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32,
...@@ -115,6 +117,8 @@ def main(word_dict, net_method, use_cuda): ...@@ -115,6 +117,8 @@ def main(word_dict, net_method, use_cuda):
print("cost=" + str(cost_val) + " acc=" + str(acc_val)) print("cost=" + str(cost_val) + " acc=" + str(acc_val))
if cost_val < 0.4 and acc_val > 0.8: if cost_val < 0.4 and acc_val > 0.8:
return return
if math.isnan(float(cost_val)):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Cost is too large for {0}".format( raise AssertionError("Cost is too large for {0}".format(
net_method.__name__)) net_method.__name__))
......
...@@ -16,6 +16,8 @@ import paddle.v2 as paddle ...@@ -16,6 +16,8 @@ import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.v2.fluid as fluid
import unittest import unittest
import os import os
import math
import sys
def main(use_cuda, is_sparse, parallel): def main(use_cuda, is_sparse, parallel):
...@@ -112,6 +114,9 @@ def main(use_cuda, is_sparse, parallel): ...@@ -112,6 +114,9 @@ def main(use_cuda, is_sparse, parallel):
fetch_list=[avg_cost]) fetch_list=[avg_cost])
if avg_cost_np[0] < 5.0: if avg_cost_np[0] < 5.0:
return return
if math.isnan(float(avg_cost_np[0])):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0])) raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))
...@@ -153,4 +158,6 @@ for use_cuda in (False, True): ...@@ -153,4 +158,6 @@ for use_cuda in (False, True):
inject_test_method(use_cuda, is_sparse, parallel) inject_test_method(use_cuda, is_sparse, parallel)
if __name__ == '__main__': if __name__ == '__main__':
# FIXME(tonyyang-svail):
# This test always fail on MultiGPU CI
unittest.main() unittest.main()
...@@ -198,4 +198,7 @@ class ParallelOpTestMultipleInput(BaseParallelForTest): ...@@ -198,4 +198,7 @@ class ParallelOpTestMultipleInput(BaseParallelForTest):
if __name__ == '__main__': if __name__ == '__main__':
# FIXME(tonyyang-svail):
# This test always fail on MultiGPU CI
exit(0)
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册