提交 14fe72f3 编写于 作者: Y Yi Huaijie

fix pylint warnings

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import os
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
import mindspore.context as context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
device_num = 2
device_id = int(os.getenv('DEVICE_ID'))
rank_id = 0
def setup_module():
global device_num
global rank_id
np.random.seed(0)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
context.set_context(device_id=device_id)
distributedTool.init()
device_num = distributedTool.get_group_size()
rank_id = distributedTool.get_rank()
context.set_auto_parallel_context(device_num=device_num,
global_rank=rank_id)
def teardown_module():
distributedTool.release()
class Onehot(Cell):
def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
super(Onehot, self).__init__()
trans_stra = None
if strategy:
trans_stra = (strategy[0],)
self.onehot = P.OneHot().set_strategy(strategy=strategy)
self.depth = depth
self.on_value = Tensor(on_value, ms.float32)
self.off_value = Tensor(off_value, ms.float32)
self.transpose = P.Transpose().set_strategy(strategy=trans_stra)
self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1)))
def construct(self, input, indices):
x = self.onehot(indices, self.depth, self.on_value, self.off_value)
x = self.transpose(x, (1, 0))
x = self.sub(input, x)
return x
class DataGenerator():
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def generate_data(self, shape):
data = np.random.rand(*shape)
return data
def input_data(self, shape):
data = (self.generate_data(shape) * 2).astype(np.float32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
def label_data(self, shape, classes):
data = (self.generate_data(shape) * (classes - 1)).astype(np.int32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
class OneHotFactory:
def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None):
dataGen = DataGenerator()
self.input_full, self.input_part = dataGen.input_data((classes, batch_size))
self.label_full, self.label_part = dataGen.label_data((batch_size,), classes)
self.depth = classes
self.on_value = on_value
self.off_value = off_value
self.axis = axis
self.strategy = strategy
def forward_mindspore_single_impl(self):
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value)
out = net(self.input_full, self.label_full)
return out
def forward_mindspore_parallel_impl(self):
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value, strategy=self.strategy)
out = net.compile_and_run(self.input_full, self.label_full)
return out
def forward_cmp(self):
out_mindspore_single = self.forward_mindspore_single_impl().asnumpy()
context.reset_auto_parallel_context()
out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy()
context.reset_auto_parallel_context()
assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001)
def test_reid_onehot_forward_int32_128_depth1024_model_parallel():
fact = OneHotFactory(batch_size=128,
classes=1024,
on_value=1.000000,
off_value=0.000000,
axis=-1,
strategy=((1, device_num), (), ()))
fact.forward_cmp()
def test_reid_onehot_forward_int32_1024_depth128_model_parallel():
fact = OneHotFactory(batch_size=1024,
classes=128,
on_value=1.000000,
off_value=0.000000,
axis=-1,
strategy=((1, device_num), (), ()))
fact.forward_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
import mindspore.context as context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
device_num = 2
device_id = int(os.getenv('DEVICE_ID'))
rank_id = 0
def setup_module():
global device_num
global rank_id
np.random.seed(0)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
context.set_context(device_id=device_id)
distributedTool.init()
device_num = distributedTool.get_group_size()
rank_id = distributedTool.get_rank()
context.set_auto_parallel_context(device_num=device_num,
global_rank=rank_id)
def teardown_module():
distributedTool.release()
class Onehot(Cell):
def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
super(Onehot, self).__init__()
trans_stra = None
if strategy:
trans_stra = (strategy[0],)
self.onehot = P.OneHot().set_strategy(strategy=strategy)
self.depth = depth
self.on_value = Tensor(on_value, ms.float32)
self.off_value = Tensor(off_value, ms.float32)
self.transpose = P.Transpose().set_strategy(strategy=trans_stra)
self.sub = P.Sub().set_strategy(strategy=((1, 1), (1, 1)))
self.axis = axis
def construct(self, input_, indices):
x = self.onehot(indices, self.depth, self.on_value, self.off_value)
x = self.transpose(x, (1, 0))
x = self.sub(input_, x)
return x
class DataGenerator():
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def generate_data(self, shape):
data = np.random.rand(*shape)
return data
def input_data(self, shape):
data = (self.generate_data(shape) * 2).astype(np.float32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
def label_data(self, shape, classes):
data = (self.generate_data(shape) * (classes - 1)).astype(np.int32)
stra = [1] * len(shape)
stra[0] = device_num
datas = self.get_parallel_blocks(data, stra)
return Tensor(data), Tensor(datas[rank_id])
class OneHotFactory:
def __init__(self, batch_size, classes, on_value=1.0, off_value=0.0, axis=None, strategy=None):
data_gen = DataGenerator()
self.input_full, self.input_part = data_gen.input_data((classes, batch_size))
self.label_full, self.label_part = data_gen.label_data((batch_size,), classes)
self.depth = classes
self.on_value = on_value
self.off_value = off_value
self.axis = axis
self.strategy = strategy
def forward_mindspore_single_impl(self):
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value)
out = net(self.input_full, self.label_full)
return out
def forward_mindspore_parallel_impl(self):
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value, strategy=self.strategy)
out = net.compile_and_run(self.input_full, self.label_full)
return out
def forward_cmp(self):
out_mindspore_single = self.forward_mindspore_single_impl().asnumpy()
context.reset_auto_parallel_context()
out_mindspore_parallel = self.forward_mindspore_parallel_impl().asnumpy()
context.reset_auto_parallel_context()
assert np.allclose(out_mindspore_single, out_mindspore_parallel, 0.0001, 0.0001)
def test_reid_onehot_forward_int32_128_depth1024_model_parallel():
fact = OneHotFactory(batch_size=128,
classes=1024,
on_value=1.000000,
off_value=0.000000,
axis=-1,
strategy=((1, device_num), (), ()))
fact.forward_cmp()
def test_reid_onehot_forward_int32_1024_depth128_model_parallel():
fact = OneHotFactory(batch_size=1024,
classes=128,
on_value=1.000000,
off_value=0.000000,
axis=-1,
strategy=((1, device_num), (), ()))
fact.forward_cmp()
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import pytest
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_single
def test_expand_loss():
sh_path = os.path.split(os.path.realpath(__file__))[0]
ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh")
assert (ret == 0)
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import pytest
@pytest.mark.level0
@pytest.mark.platform_x86_ascend_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.env_single
def test_expand_loss():
sh_path = os.path.split(os.path.realpath(__file__))[0]
ret = os.system(f"sh {sh_path}/run_auto_parallel_loss_expand.sh")
assert ret == 0
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import pytest
def test_expand_loss():
ret = os.system("sh run_onehot_model_parallel.sh")
assert (ret == 0)
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
def test_expand_loss():
ret = os.system("sh run_onehot_model_parallel.sh")
assert ret == 0
......@@ -13,8 +13,8 @@
# limitations under the License.
# ============================================================================
import numpy as np
import os
import numpy as np
import pytest
import mindspore.common.dtype as mstype
......@@ -37,31 +37,29 @@ init()
context.set_auto_parallel_context(mirror_mean=True, parallel_mode=ParallelMode.AUTO_PARALLEL)
def weight_variable(shape, factor=0.1):
def weight_variable():
return One()
def _conv3x3(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
init_value = weight_variable((out_channels, in_channels, 3, 3))
init_value = weight_variable()
return nn.Conv2d(in_channels, out_channels,
kernel_size=3, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)
def _conv1x1(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
init_value = weight_variable((out_channels, in_channels, 1, 1))
init_value = weight_variable()
return nn.Conv2d(in_channels, out_channels,
kernel_size=1, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)
def _conv7x7(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
init_value = weight_variable((out_channels, in_channels, 7, 7))
init_value = weight_variable()
return nn.Conv2d(in_channels, out_channels,
kernel_size=7, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value)
def _fused_bn(channels, momentum=0.9):
init_weight = weight_variable((channels,))
init_bias = weight_variable((channels,))
return nn.BatchNorm2d(channels, momentum=momentum)
......@@ -210,8 +208,8 @@ class ResNet(nn.Cell):
self.mean = P.ReduceMean(keep_dims=True)
self.end_point = nn.Dense(2048, num_classes, has_bias=True,
weight_init=weight_variable((num_classes, 2048)),
bias_init=weight_variable((num_classes,)))
weight_init=weight_variable(),
bias_init=weight_variable())
self.squeeze = P.Squeeze()
self.cast = P.Cast()
......@@ -345,9 +343,8 @@ class Dataset():
raise StopIteration
self.index += 1
if self.input_num == 2:
return self.predict, self.label
else:
return self.predict,
return (self.predict, self.label)
return (self.predict,)
def reset(self):
self.index = 0
......@@ -364,7 +361,7 @@ class ModelCallback(Callback):
super(ModelCallback, self).__init__()
self.loss_list = []
def epoch_end(self, run_context, *args):
def epoch_end(self, run_context):
cb_params = run_context.original_args()
result = cb_params.net_outputs
self.loss_list.append(result.asnumpy().mean())
......@@ -376,9 +373,9 @@ class ModelCallback(Callback):
def test_train_feed(num_classes=8192):
set_algo_parameters(elementwise_op_strategy_follow=True)
parallel_callback = ModelCallback()
dataGen = DataGenerator()
input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224))
label_full, label_part = dataGen.label_data((32 * 2,))
data_gen = DataGenerator()
_, input_part = data_gen.input_data((32 * 2, 3, 224, 224))
_, label_part = data_gen.label_data((32 * 2,))
dataset = Dataset(input_part, label_part)
net = resnet50(num_classes)
loss = SoftmaxCrossEntropyExpand(sparse=True)
......@@ -396,9 +393,9 @@ def test_train_feed(num_classes=8192):
def test_train_feed2(num_classes=1001):
set_algo_parameters(elementwise_op_strategy_follow=True)
parallel_callback = ModelCallback()
dataGen = DataGenerator()
input_full, input_part = dataGen.input_data((32 * 2, 3, 224, 224))
label_full, label_part = dataGen.label_data((32 * 2,))
data_gen = DataGenerator()
_, input_part = data_gen.input_data((32 * 2, 3, 224, 224))
_, label_part = data_gen.label_data((32 * 2,))
dataset = Dataset(input_part, label_part)
net = resnet50(num_classes)
loss = SoftmaxCrossEntropyExpand(sparse=True)
......
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("../../..")
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.append("../../..")
......@@ -25,7 +25,6 @@ from mindspore.nn import Dense
from mindspore.nn import Momentum
from mindspore.nn import ReLU
from mindspore.nn import TrainOneStepCell, WithLossCell
from mindspore.ops.operations import Split
from mindspore.ops.operations.comm_ops import AllReduce, AllGather, _AlltoAll, ReduceOp, ReduceScatter
from mindspore.ops.operations.comm_ops import Broadcast
......
......@@ -16,8 +16,8 @@
@File : test_data_parallel_lenet.py
@Desc : test data parallel lenet
"""
import numpy as np
import os
import numpy as np
import mindspore.context as context
import mindspore.nn as nn
......@@ -80,7 +80,6 @@ def test_lenet5_train_step_training_pynative():
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
device_num=8, mirror_mean=True)
size = 3
predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01)
label = Tensor(np.zeros([1, 10]).astype(np.float32))
DatasetLenet(predict, label, 2)
......
......@@ -19,7 +19,7 @@ from mindspore.parallel._utils import _reset_op_id
from mindspore.parallel.algo_parameter_config import reset_algo_parameters
def setup_module(module):
def setup_module():
auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
reset_cost_model_context()
......
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)
def construct(self, x, z):
out = self.add(x, z)
return self.relu(out)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = 1.0
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in strategy1[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def forward_mindspore_impl(self):
net = AddRelu()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
output_grad = Tensor(output_grads[self.out_id])
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)
def construct(self, x, z):
out = self.add(x, z)
return self.relu(out)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = 1.0
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in strategy1[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def forward_mindspore_impl(self):
net = AddRelu()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
output_grad = Tensor(output_grads[self.out_id])
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
_ = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
_ = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.nn import Dropout
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Net(Cell):
def __init__(self, keep_prob, seed0, seed1, strategy=None):
super(Net, self).__init__()
self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy)
def construct(self, input):
x = self.drop(input)
return x
# pylint: disable=comparison-with-itself
class DropoutFactory:
def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None):
size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(10, size)
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32)
self.keep_prob = keep_prob
self.seed0 = seed0
self.seed1 = seed1
self.strategy0 = strategy0
need_dev_num = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
self.x_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def d4_tensor_compare(self, input, out_me):
[a, b, c, d] = input.shape
for i in range(a):
for j in range(b):
for k in range(c):
for e in range(d):
if out_me[i, j, k, e] == 0:
assert True == True
else:
assert np.allclose(out_me[i, j, k, e], input[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001)
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np)
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
net = Net(0.4, 0, 0, strategy=self.strategy0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
return out.asnumpy()
def forward_cmp(self):
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1])
self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel)
def test_reid_dropout_forward_seed_F32_64_512_8_8():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1)))
fact.forward_cmp()
def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1)))
fact.forward_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.nn import Dropout
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Net(Cell):
def __init__(self, keep_prob, seed0, seed1, strategy=None):
super(Net, self).__init__()
self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy)
def construct(self, input_):
x = self.drop(input_)
return x
# pylint: disable=comparison-with-itself
class DropoutFactory:
def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None):
size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(10, size)
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32)
self.keep_prob = keep_prob
self.seed0 = seed0
self.seed1 = seed1
self.strategy0 = strategy0
need_dev_num = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
self.x_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def d4_tensor_compare(self, input_, out_me):
[a, b, c, d] = input_.shape
for i in range(a):
for j in range(b):
for k in range(c):
for e in range(d):
if out_me[i, j, k, e] == 0:
assert True
else:
assert np.allclose(out_me[i, j, k, e], input_[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001)
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np)
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
net = Net(0.4, 0, 0, strategy=self.strategy0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
return out.asnumpy()
def forward_cmp(self):
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1])
self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel)
def test_reid_dropout_forward_seed_F32_64_512_8_8():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1)))
fact.forward_cmp()
def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1)))
fact.forward_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y):
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulAllgather(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulAllgather, self).__init__()
self.allgather = P.AllGather(group=group)
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce = P.AllReduce(group=group)
def construct(self, x, y):
x = self.allgather(x)
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, sens):
return grad_all_with_sens(self.network)(x, y, sens)
class MatmulAllgatherFactory:
def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra):
self.inputx = self.GenValue(inputx_shape, 10)
self.inputy = self.GenValue(inputy_shape, 20)
self.x_stra = x_stra
self.y_stra = y_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def GenValue(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulAllgather("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (64, 32)
inputy_shape = (32, 64)
fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y):
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulAllgather(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulAllgather, self).__init__()
self.allgather = P.AllGather(group=group)
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce = P.AllReduce(group=group)
def construct(self, x, y):
x = self.allgather(x)
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, sens):
return grad_all_with_sens(self.network)(x, y, sens)
class MatmulAllgatherFactory:
def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra):
self.inputx = self.gen_value(inputx_shape, 10)
self.inputy = self.gen_value(inputy_shape, 20)
self.x_stra = x_stra
self.y_stra = y_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def gen_value(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulAllgather("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (64, 32)
inputy_shape = (32, 64)
fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulReduce(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulReduce, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.allreduce1 = P.AllReduce(group=group)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce2 = P.AllReduce(group=group)
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.allreduce1(out)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce2(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, z, sens):
return grad_all_with_sens(self.network)(x, y, z, sens)
class MatmulReduceFactory:
def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra):
self.inputx = self.GenValue(inputx_shape, 10)
self.inputy = self.GenValue(inputy_shape, 20)
self.inputz = self.GenValue(inputz_shape, 30)
self.x_stra = x_stra
self.y_stra = y_stra
self.z_stra = z_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def GenValue(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
z = Tensor(self.inputz)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
inputzs = self.get_parallel_blocks(self.inputz, self.z_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
z = Tensor(inputzs[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulReduce("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size]
reduce_result2 = reduce_results[2].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4))
fact.grad_cmp()
def test_reduce_grad_repeat():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulReduce(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulReduce, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.allreduce1 = P.AllReduce(group=group)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce2 = P.AllReduce(group=group)
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.allreduce1(out)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce2(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, z, sens):
return grad_all_with_sens(self.network)(x, y, z, sens)
class MatmulReduceFactory:
def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra):
self.inputx = self.gen_value(inputx_shape, 10)
self.inputy = self.gen_value(inputy_shape, 20)
self.inputz = self.gen_value(inputz_shape, 30)
self.x_stra = x_stra
self.y_stra = y_stra
self.z_stra = z_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def gen_value(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
z = Tensor(self.inputz)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
inputzs = self.get_parallel_blocks(self.inputz, self.z_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
z = Tensor(inputzs[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulReduce("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size]
reduce_result2 = reduce_results[2].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4))
fact.grad_cmp()
def test_reduce_grad_repeat():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class L2normalize(Cell):
def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None):
super(L2normalize, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.l2norm = P.L2Normalize(axis, epsilon, strategy1)
def construct(self, x, y):
out = self.add(x, y)
out = self.l2norm(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class L2normalizeFactory:
def __init__(self, input_shape, axis, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.axis = axis
self.epsilon = 1e-4
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = L2normalize(self.axis, self.epsilon)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = L2normalize(self.axis, self.epsilon)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_l2normalize_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.forward_cmp()
def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.grad_cmp()
def test_reid_l2normalize_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.forward_cmp()
def test_reid_l2normalize_grad_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class L2normalize(Cell):
def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None):
super(L2normalize, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.l2norm = P.L2Normalize(axis, epsilon, strategy1)
def construct(self, x, y):
out = self.add(x, y)
out = self.l2norm(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class L2normalizeFactory:
def __init__(self, input_shape, axis, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.axis = axis
self.epsilon = 1e-4
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = L2normalize(self.axis, self.epsilon)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = L2normalize(self.axis, self.epsilon)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_l2normalize_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.forward_cmp()
def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.grad_cmp()
def test_reid_l2normalize_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.forward_cmp()
def test_reid_l2normalize_grad_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)
def construct(self, x, y):
out = self.add(x, y)
out = self.relu(out)
return out
class NetWithLoss(Cell):
def __init__(self, network, strategy2=None):
super(NetWithLoss, self).__init__()
self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2)
self.network = network
def construct(self, x, y, b):
predict = self.network(x, y)
return self.loss(predict, b)[0]
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, b):
return grad_all(self.network)(x, y, b)
class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1, strategy2):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(10, target_size)
self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype(
np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
self.strategy2 = strategy2
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = AddRelu()
net_with_loss = NetWithLoss(net)
grad_net = Grad(net_with_loss)
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad)
input_grads.append(input_grad)
return input_grads
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
net_with_loss = NetWithLoss(net, strategy2=self.strategy2)
grad_net = Grad(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad1])
input_grads.append(input_grad)
return input_grads
def grad_cmp(self):
input_grad_mindspores = self.grad_mindspore_impl()
input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl()
for i in range(0, len(input_grad_mindspores)):
input_grad_mindspore = input_grad_mindspores[i]
input_grad_mindspore_parallel = input_grad_mindspore_parallels[i]
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy",
input_grad_blocks_0[self.x_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy",
input_grad_blocks_1[self.y_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy",
input_grad_mindspore_parallel0)
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy",
input_grad_mindspore_parallel1)
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()
def test_reid_l2normalize_grad_input_128_512_stridesplit():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)
def construct(self, x, y):
out = self.add(x, y)
out = self.relu(out)
return out
class NetWithLoss(Cell):
def __init__(self, network, strategy2=None):
super(NetWithLoss, self).__init__()
self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2)
self.network = network
def construct(self, x, y, b):
predict = self.network(x, y)
return self.loss(predict, b)[0]
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, b):
return grad_all(self.network)(x, y, b)
class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1, strategy2):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(10, target_size)
self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype(
np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
self.strategy2 = strategy2
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = AddRelu()
net_with_loss = NetWithLoss(net)
grad_net = Grad(net_with_loss)
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad)
input_grads.append(input_grad)
return input_grads
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
net_with_loss = NetWithLoss(net, strategy2=self.strategy2)
grad_net = Grad(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad1])
input_grads.append(input_grad)
return input_grads
def grad_cmp(self):
input_grad_mindspores = self.grad_mindspore_impl()
input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl()
for i in range(0, len(input_grad_mindspores)):
input_grad_mindspore = input_grad_mindspores[i]
input_grad_mindspore_parallel = input_grad_mindspore_parallels[i]
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy",
input_grad_blocks_0[self.x_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy",
input_grad_blocks_1[self.y_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy",
input_grad_mindspore_parallel0)
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy",
input_grad_mindspore_parallel1)
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()
def test_reid_l2normalize_grad_input_128_512_stridesplit():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import pytest
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input1, input2, output_grad):
return grad_all_with_sens(self.network)(input1, input2, output_grad)
class Max(Cell):
def __init__(self, axis, keep_dims, strategy0=None, strategy1=None):
super(Max, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1)
self.axis = axis
def construct(self, input1, input2):
out = self.add(input1, input2)
return self.reduce_max(out, self.axis)
class MaxFactory:
def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1):
self.strategy0 = strategy0
self.strategy1 = strategy1
self.axis = axis
self.keep_dims = keep_dims
input_size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s) + "_"
input_size = input_size * s
number_range = min(1000, input_size)
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = self.input_np1.copy()
self.out_grad_np = None
out_shape = list(input_shape)
out_shape.pop(axis)
out_size = input_size / input_shape[axis]
number_range_ = min(1000, out_size)
self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype(
np.float32)
out_strategy = list(strategy1[1])
out_strategy.pop(axis)
self.out_strategy = out_strategy
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in out_strategy:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
out = net(input1, input2)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
out_grad = Tensor(self.out_grad_np)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(input1, input2, out_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy)
out_grad = Tensor(output_grads[self.out_id])
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad],
parallel_inputs_run=[x1, y1, out_grad])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
print(out_mindspore)
print(out_mindspore_parallel)
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_max_forward_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.grad_cmp()
def test_reid_max_forward_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.grad_cmp()
def test_reid_max_forward_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.grad_cmp()
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input1, input2, output_grad):
return grad_all_with_sens(self.network)(input1, input2, output_grad)
class Max(Cell):
def __init__(self, axis, keep_dims, strategy0=None, strategy1=None):
super(Max, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1)
self.axis = axis
def construct(self, input1, input2):
out = self.add(input1, input2)
return self.reduce_max(out, self.axis)
class MaxFactory:
def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1):
self.strategy0 = strategy0
self.strategy1 = strategy1
self.axis = axis
self.keep_dims = keep_dims
input_size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s) + "_"
input_size = input_size * s
number_range = min(1000, input_size)
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = self.input_np1.copy()
self.out_grad_np = None
out_shape = list(input_shape)
out_shape.pop(axis)
out_size = input_size / input_shape[axis]
number_range_ = min(1000, out_size)
self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype(
np.float32)
out_strategy = list(strategy1[1])
out_strategy.pop(axis)
self.out_strategy = out_strategy
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in out_strategy:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
out = net(input1, input2)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
out_grad = Tensor(self.out_grad_np)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(input1, input2, out_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy)
out_grad = Tensor(output_grads[self.out_id])
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad],
parallel_inputs_run=[x1, y1, out_grad])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
print(out_mindspore)
print(out_mindspore_parallel)
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_max_forward_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.grad_cmp()
def test_reid_max_forward_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.grad_cmp()
def test_reid_max_forward_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.grad_cmp()
......@@ -54,7 +54,7 @@ class Grad(nn.Cell):
return C.grad_all(self.network)(x, y)
def compile(net, x, y):
def compile_net(net, x, y):
net.set_auto_parallel()
_executor.compile(net, x, y)
......@@ -69,7 +69,7 @@ def test_add_relu_stride_slice():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32)
compile(net, x, y)
compile_net(net, x, y)
def test_add_relu_all_gather():
......@@ -82,4 +82,4 @@ def test_add_relu_all_gather():
x = Tensor(np.ones([128, 32]), dtype=ms.float32)
y = Tensor(np.ones([128, 32]), dtype=ms.float32)
compile(net, x, y)
compile_net(net, x, y)
......@@ -20,7 +20,6 @@ from mindspore import Tensor
from mindspore import context
from mindspore.common.api import _executor
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from tests.ut.python.ops.test_math_ops import VirtualLoss
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部