提交 1e8474b9 编写于 作者: D Dong Zhihong

"delete python ops testcase"

上级 0990c87b
...@@ -13,8 +13,11 @@ ...@@ -13,8 +13,11 @@
limitations under the License. */ limitations under the License. */
#include "paddle/operators/nccl_op.h" #include "paddle/operators/nccl_op.h"
#include "glog/logging.h" #include <glog/logging.h>
#include "gtest/gtest.h" #include <gtest/gtest.h>
#include <thrust/device_vector.h>
#include <memory>
#include <vector>
#include "paddle/framework/block_desc.h" #include "paddle/framework/block_desc.h"
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_desc.h"
...@@ -24,10 +27,13 @@ ...@@ -24,10 +27,13 @@
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/gpu_info.h" #include "paddle/platform/gpu_info.h"
#include "paddle/platform/place.h"
#include <thrust/device_vector.h> USE_CPU_ONLY_OP(ncclInit);
#include <memory> USE_GPU_ONLY_OP(ncclAllReduce);
#include <vector> USE_GPU_ONLY_OP(ncclReduce);
USE_GPU_ONLY_OP(ncclBcastSend);
USE_GPU_ONLY_OP(ncclBcastRecv);
static std::vector<int> gpu_list; static std::vector<int> gpu_list;
...@@ -55,28 +61,28 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs, ...@@ -55,28 +61,28 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
op->SetAttrMap(attrs); op->SetAttrMap(attrs);
} }
TEST(NCCL, ncclInit) { // ncclInitOp with desc
TEST(NCCL, ncclInitOp) {
f::ProgramDescBind program; f::ProgramDescBind program;
f::BlockDescBind *block = program.Block(0); f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op = block->AppendOp(); f::OpDescBind *op1 = block->AppendOp();
paddle::platform::Communicator comm; op1->SetType("ncclInit");
op->SetType("ncclInit"); op1->SetOutput("Communicator", {"x1"});
op->SetOutput("Communicator", ) op1->SetAttr("gpus", {gpu_list});
f::Scope g_scope;
AddOp("ncclInit", {}, {{"Communicator", {comm}}}, {{"gpus", {gpu_list}}}, paddle::platform::DeviceContext *ctx =
block); new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace());
auto *var = g_scope.Var("x1");
var->GetMutable<paddle::platform::Communicator>();
auto op = f::OpRegistry::CreateOp(*op1);
VLOG(1) << "invoke NCCLInitOp.";
op->Run(g_scope, *ctx);
VLOG(1) << "NCCLInitOp finished.";
} }
// TEST(NCCL, ncclAllReduce) {
// f::ProgramDescBind program;
// f::BlockDescBind *block = program.Block(0);
// paddle::platform::Communicator comm;
// AddOp("ncclInit", {}, {{"Communicator", {comm}}, {"gpus", {gpu_list}}},
// block);
// }
int main(int argc, char **argv) { int main(int argc, char **argv) {
static int dev_count = paddle::platform::GetCUDADeviceCount(); static int dev_count = paddle::platform::GetCUDADeviceCount();
if (dev_count <= 1) { if (dev_count <= 1) {
......
import unittest, os
import numpy as np
import paddle.v2 as paddle
from paddle.v2.framework.op import Operator
import paddle.v2.framework.core as core
from op_test import OpTest, create_op, set_input
gpu_list = "0,1,2,3"
import unittest, os
import numpy as np
import paddle.v2 as paddle
from paddle.v2.framework.op import Operator
import paddle.v2.framework.core as core
from op_test import OpTest, create_op, set_input
# gpu_list = os.environ["NV_LIST"]
gpu_list = "0,1,2,3"
if not core.is_compile_gpu() or not gpu_list:
exit(0)
def allreduce(tensors, gpus):
num_device = len(gpus)
assert (len(tensors) == num_device), "not match of tensor and device"
Out = tensors
for i in range(1, len(tensors)):
Out[0] += Out[i]
for i in range(1, len(tensors)):
Out[i] = Out[0]
return Out
class TestNCCLAllReduce(unittest.TestCase):
def setUp(self):
self.op_type = "ncclAllReduce"
self.gpus = [int(g) for g in gpu_list.split(",")]
self.g_scope = core.Scope()
self.g_ctx = core.DeviceContext.create(core.CPUPlace())
self.scopes = []
self.ops = []
self.places = []
self.input_data = []
for i in range(len(self.gpus)):
self.input_data.append(np.random.random((32, 32)))
self.output_data = allreduce(self.input_data, self.gpus)
nccl_init = Operator("ncclInit", Out="Communicator", gpus=self.gpus)
op.run(self.g_scope, self.g_ctx)
for i in range(len(self.gpus)):
# insert kid scope
scope = self.g_scope.new_scope()
place = core.GPUPlace(self.gpus[i])
inputs = {"X": self.input_data[i]}
outputs = {"Out": self.output_data[i]}
attrs = {"gpus": self.gpus}
op = create_op(scope, self.op_type, inputs, outputs, attrs)
set_input(scope, op, inputs, place)
self.scopes.append(scope)
self.ops.append(op)
self.places.append(place)
def test_output(self):
idx = 0
for scope, place, op in zip(self.scopes, self.places, self.ops):
ctx = core.DeviceContext.create(place)
op.run(scope, ctx)
for out_name, out_dup in Operator.get_op_outputs(self.op.type()):
actual = np.array(scope.find_var(out_name).get_tensor())
expect = self.output_data[idx]
idx += 1
self.assertTrue(actual, expect), "has diff"
# if __name__ == "__main__":
# unittest.main()
# usage : export NV_LIST=0,1,2,3 python *.py
# os.environ["NV_LIST"] = ["0,1,2,3"]
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册