提交 3f38b1a0 编写于 作者: Z z00478463

for comments

for debug

for DEBUG

for DEBUG

for DEBUG

for DEBUG

for well performance

for pylint

for te chip

for pylint

for pylint nth
上级 bdb00c56
......@@ -23,7 +23,7 @@ config = ed({
"loss_scale": 128,
"momentum": 0.9,
"weight_decay": 5e-4,
"epoch_size": 50,
"epoch_size": 45,
"buffer_size": 1000,
"image_height": 224,
"image_width": 224,
......
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
eval.
"""
import os
import argparse
from dataset_imagenet import create_dataset
from config import config
from mindspore import context
from mindspore.model_zoo.resnet import resnet50
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from crossentropy import CrossEntropy
parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
parser.add_argument('--device_num', type=int, default=1, help='Device num.')
parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
args_opt = parser.parse_args()
device_id = int(os.getenv('DEVICE_ID'))
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
context.set_context(device_id=device_id)
if __name__ == '__main__':
net = resnet50(class_num=config.class_num)
if not config.label_smooth:
config.label_smooth_factor = 0.0
loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
if args_opt.do_eval:
dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
step_size = dataset.get_dataset_size()
if args_opt.checkpoint_path:
param_dict = load_checkpoint(args_opt.checkpoint_path)
load_param_into_net(net, param_dict)
net.set_train(False)
model = Model(net, loss_fn=loss, metrics={'acc'})
res = model.eval(dataset)
print("result:", res, "ckpt=", args_opt.checkpoint_path)
......@@ -21,6 +21,7 @@ from mindspore.common.tensor import Tensor
from mindspore.nn.optim.optimizer import Optimizer
from mindspore.ops import functional as F, composite as C, operations as P
from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
from model.grad_reducer_thor import DistributedGradReducerThor
momentum_opt = C.MultitypeFuncGraph("momentum_opt")
......
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if [ $# != 2 ]
then
echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
exit 1
fi
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
if [ ! -d $PATH1 ]
then
echo "error: DATASET_PATH=$1 is not a directory"
exit 1
fi
if [ ! -f $PATH2 ]
then
echo "error: CHECKPOINT_PATH=$2 is not a file"
exit 1
fi
ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=0
export RANK_SIZE=$DEVICE_NUM
export RANK_ID=0
if [ -d "infer" ];
then
rm -rf ./infer
fi
mkdir ./infer
cp *.py ./infer
cp *.sh ./infer
cd ./infer || exit
env > env.log
echo "start infering for device $DEVICE_ID"
python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log &
cd ..
......@@ -109,7 +109,7 @@ if __name__ == '__main__':
step_size = dataset.get_dataset_size()
loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
lr = Tensor(get_model_lr(0, 0.05, 6, 70, 5004))
lr = Tensor(get_model_lr(0, 0.045, 6, 70, 5004))
opt = THOR(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
filter(lambda x: 'matrix_A' in x.name, net.get_parameters()),
filter(lambda x: 'matrix_G' in x.name, net.get_parameters()),
......
......@@ -486,41 +486,41 @@ def cus_cube_matmul_cast(tik_instance, input_x1, trans_a, input_x2, trans_b,
input_x2_cast_ub[count * repeate_times_max * vectorfp32_size],
input_x2_ub[count * repeate_times_max * vectorfp32_size], repeate_num,
1, 1, 4, 8)
input_x2_L1 = tik_instance.Tensor("float16", [no_tile, ko_tile_inner, c0, c0],
name="input_x2_L1", scope=tik.scope_cbuf)
tik_instance.data_move(input_x2_L1, input_x2_cast_ub, 0, 1,
no_tile * ko_tile_inner * c0 * c0 * fp16_size // blocksize, 0, 0)
# input_x1 -> input_x1_L1
input_x1_L1 = tik_instance.Tensor(input_x1.dtype, [ko_tile_inner, mo_tile, c0, c0],
name="input_x1_L1", scope=tik.scope_cbuf)
tik_instance.data_move(input_x1_L1,
input_x1[k_idx,
core_m * mo_tile, 0, 0],
0, ko_tile_inner, mo_tile * c0 * c0 * fp16_size // blocksize,
(mo - mo_tile) * c0 * c0 * fp16_size // blocksize, 0)
# input_x2_L1 -> input_x2_L0B
input_x2_L0B = tik_instance.Tensor("float16", [ko_tile_inner, no_tile, c0, c0],
name="input_x2_L0B", scope=tik.scope_cb)
with tik_instance.for_range(0, ko_tile_inner) as cc2:
tik_instance.load2dv1(input_x2_L0B[cc2, 0, 0, 0], input_x2_L1[0, cc2, 0, 0], 0, no_tile,
ko_tile_inner,
0, True)
# input_x1_L1 -> input_x1_L0A
input_x1_L0A = tik_instance.Tensor(input_x1.dtype, [mo_tile, ko_tile_inner, c0, c0],
name="input_x1_L0A", scope=tik.scope_ca)
with tik_instance.for_range(0, mo_tile) as cc1:
tik_instance.load2dv1(input_x1_L0A[cc1, 0, 0, 0], input_x1_L1[0, cc1, 0, 0], 0, ko_tile_inner,
mo_tile, 0, False)
with tik_instance.if_scope(thread_idx_k == 0):
tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
ko_tile_inner * c0, no_tile * c0, 0)
with tik_instance.else_scope():
tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
ko_tile_inner * c0, no_tile * c0, 1)
res_ub = tik_instance.Tensor(input_x1.dtype, [no_tile, mo_tile, c0, c0],
name="resMatmul_ub", scope=tik.scope_ubuf)
tik_instance.data_move(res_ub, res_L0C, 0, 1, no_tile * mo_tile, 0, 0, 1)
tik_instance.data_move(res[(core_n * loop_n_num + cc_n) * no_tile, core_m * mo_tile, 0, 0],
res_ub, 0, no_tile,
mo_tile * c0 * c0 * fp16_size // blocksize, 0,
(mo - mo_tile) * c0 * c0 * fp16_size // blocksize)
input_x2_L1 = tik_instance.Tensor("float16", [no_tile, ko_tile_inner, c0, c0],
name="input_x2_L1", scope=tik.scope_cbuf)
tik_instance.data_move(input_x2_L1, input_x2_cast_ub, 0, 1,
no_tile * ko_tile_inner * c0 * c0 * fp16_size // blocksize, 0, 0)
# input_x1 -> input_x1_L1
input_x1_L1 = tik_instance.Tensor(input_x1.dtype, [ko_tile_inner, mo_tile, c0, c0],
name="input_x1_L1", scope=tik.scope_cbuf)
tik_instance.data_move(input_x1_L1,
input_x1[k_idx,
core_m * mo_tile, 0, 0],
0, ko_tile_inner, mo_tile * c0 * c0 * fp16_size // blocksize,
(mo - mo_tile) * c0 * c0 * fp16_size // blocksize, 0)
# input_x2_L1 -> input_x2_L0B
input_x2_L0B = tik_instance.Tensor("float16", [ko_tile_inner, no_tile, c0, c0],
name="input_x2_L0B", scope=tik.scope_cb)
with tik_instance.for_range(0, ko_tile_inner) as cc2:
tik_instance.load2dv1(input_x2_L0B[cc2, 0, 0, 0], input_x2_L1[0, cc2, 0, 0], 0, no_tile,
ko_tile_inner,
0, True)
# input_x1_L1 -> input_x1_L0A
input_x1_L0A = tik_instance.Tensor(input_x1.dtype, [mo_tile, ko_tile_inner, c0, c0],
name="input_x1_L0A", scope=tik.scope_ca)
with tik_instance.for_range(0, mo_tile) as cc1:
tik_instance.load2dv1(input_x1_L0A[cc1, 0, 0, 0], input_x1_L1[0, cc1, 0, 0], 0, ko_tile_inner,
mo_tile, 0, False)
with tik_instance.if_scope(thread_idx_k == 0):
tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
ko_tile_inner * c0, no_tile * c0, 0)
with tik_instance.else_scope():
tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
ko_tile_inner * c0, no_tile * c0, 1)
res_ub = tik_instance.Tensor(input_x1.dtype, [no_tile, mo_tile, c0, c0],
name="resMatmul_ub", scope=tik.scope_ubuf)
tik_instance.data_move(res_ub, res_L0C, 0, 1, no_tile * mo_tile, 0, 0, 1)
tik_instance.data_move(res[(core_n * loop_n_num + cc_n) * no_tile, core_m * mo_tile, 0, 0],
res_ub, 0, no_tile,
mo_tile * c0 * c0 * fp16_size // blocksize, 0,
(mo - mo_tile) * c0 * c0 * fp16_size // blocksize)
......@@ -13,10 +13,11 @@
# limitations under the License.
# ============================================================================
"""thor_ops"""
import mindspore as ms
from mindspore.ops import prim_attr_register, PrimitiveWithInfer
from mindspore.ops.composite import multitype_ops as C
import mindspore as ms
__all__ = ["CusBatchMatMul",
"CusCholeskyTrsm",
"CusFusedAbsMax1",
......@@ -33,12 +34,31 @@ __all__ = ["CusBatchMatMul",
class CusBatchMatMul(PrimitiveWithInfer):
"""CusBatchMatMul definition"""
"""
Multiplies matrix `a` by matrix `b` in batch.
The rank of input tensors must be `3`.
Inputs:
- **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, D, D)`. If
- **input_y** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(N, D, D)`. If
`transpose_b` is True.
Outputs:
Tensor, the shape of the output tensor is :math:`(N, D, D)`.
Examples:
>>> input_x = Tensor(np.ones(shape=[2, 128, 128]), mindspore.float32)
>>> input_y = Tensor(np.ones(shape=[2, 128, 128]), mindspore.float32)
>>> cus_batch_matmul = P.CusBatchMatMul()
>>> output = cus_batch_matmul(input_x, input_y)
"""
@prim_attr_register
def __init__(self):
"""init CusBatchMatMul"""
self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
from mindspore.ops._op_impl._custom_op.batch_matmul_impl import CusBatchMatMul
def get_bprop(self):
def bprop(x1, x2, out, dout):
return (C.zeros_like(x1), C.zeros_like(x2))
......@@ -54,12 +74,30 @@ class CusBatchMatMul(PrimitiveWithInfer):
class CusCholeskyTrsm(PrimitiveWithInfer):
"""CusCholeskyTrsm definition"""
"""
L * LT = A.
LT * (LT)^-1 = I.
return (LT)^-1.
Only compute the res of the diag part of input matrix with dim 128.
The rank of input tensors must be `2`.
Inputs:
- **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, N)`.
Outputs:
Tensor, the shape of the output tensor is :math:`(N // Split_dim, Split_dim, Split_dim)`.
Examples:
>>> input_x = Tensor(np.ones(shape=[256, 256]), mindspore.float32)
>>> cus_choleskytrsm = P.CusCholeskyTrsm()
>>> output = matmul(input_x)
"""
@prim_attr_register
def __init__(self):
"""init CusCholeskyTrsm"""
self.init_prim_io_names(inputs=['x1'], outputs=['y'])
from mindspore.ops._op_impl._custom_op.cholesky_trsm_impl import CusCholeskyTrsm
def infer_shape(self, data1_shape):
ll = []
m, _ = data1_shape
......@@ -75,13 +113,28 @@ class CusCholeskyTrsm(PrimitiveWithInfer):
class CusFusedAbsMax1(PrimitiveWithInfer):
"""CusFusedAbsMax1 definition"""
"""
Compute the abs max of Tensor input.
The rank of input tensors must be `4` or `2`.
Inputs:
- **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N0, M0, N1, M1)`
or math:`(32, 64)`.
Outputs:
Tensor, the shape of the output tensor is :math:`(32, 64)` or math:`(1, )`.
Examples:
>>> input_x = Tensor(np.ones(shape=[1, 3]), mindspore.float32)
>>> cus_fused_abs_max1 = P.CusFusedAbsMax1()
>>> output = cus_fused_abs_max1(input_x)
"""
@prim_attr_register
def __init__(self, origin_shape=[-1, -1]):
"""init CusFusedAbsMax1"""
self.init_prim_io_names(inputs=['x1'], outputs=['y'])
self.origin_shape = origin_shape
from mindspore.ops._op_impl._custom_op.fused_abs_max1_impl import CusFusedAbsMax1
def get_bprop(self):
def bprop(x, out, dout):
return (C.zeros_like(x),)
......@@ -102,6 +155,21 @@ class CusFusedAbsMax1(PrimitiveWithInfer):
class CusImg2Col(PrimitiveWithInfer):
"""CusImg2Col definition"""
"""
Img2col the feature map and the result in reorganized in NC1HWC0.
Args:
- **strides** (listInt) - the stride of the ops.
- **ksizes** (listInt) - the kernel size of the ops.
Inputs:
- **input_x** (Tensor) - The shape of the tensor is :math:`(N, C, H, W)`.
Outputs:
Tensor, the shape of the output tensor is :math:`(N * H_O * W_O, C1 * K_W * K_H * C0)`.
Examples:
>>> input_x = Tensor(np.ones(shape=[32, 3, 224, 224]), mindspore.float16)
>>> cusimg2col = P.CusImg2Col()
>>> output = cusimg2col(input_x)
"""
@prim_attr_register
def __init__(self, ksizes, strides, dilates=(1, 1, 1, 1), mode="NC1HWC0"):
......@@ -111,7 +179,7 @@ class CusImg2Col(PrimitiveWithInfer):
self.strides = strides
self.dilates = dilates
self.mode = mode
from mindspore.ops._op_impl._custom_op.img2col_impl import CusImg2Col
def get_bprop(self):
def bprop(x, out, dout):
return (C.zeros_like(x),)
......@@ -136,12 +204,30 @@ class CusImg2Col(PrimitiveWithInfer):
class CusMatMulCubeDenseLeft(PrimitiveWithInfer):
"""CusMatMulCube definition"""
"""
Multiplies matrix `a` by matrix `b`.
The rank of input_x1 must be `4`, the fractal format of the normal matrix.
The rank of input_x2 must be `2`.
Inputs:
- **input_x1** (Tensor) - The first tensor to be multiplied.
The shape of the tensor is :math:`(N0, M0, N1, M1)`.
- **input_x2** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(M, C)`.
Outputs:
Tensor, the shape of the output tensor is :math:`(N, C)`.
Examples:
>>> input_x = Tensor(np.ones(shape=[16, 16, 16, 16]), mindspore.float16)
>>> input_y = Tensor(np.ones(shape=[256, 256]), mindspore.float16)
>>> matmulcubedenseleft = P.CusMatMulCubeDenseLeft()
>>> output = matmulcubedenseleft(input_x, input_y)
"""
@prim_attr_register
def __init__(self):
"""init CusMatMulCubeDenseLeft"""
self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
from mindspore.ops._op_impl._custom_op.matmul_cube_dense_left_impl import CusMatMulCubeDenseLeft
def get_bprop(self):
def bprop(x1, x2, out, dout):
return (C.zeros_like(x1), C.zeros_like(x2))
......@@ -157,12 +243,32 @@ class CusMatMulCubeDenseLeft(PrimitiveWithInfer):
class CusMatMulCubeFraczRightMul(PrimitiveWithInfer):
"""CusMatMulCubeFraczRightMul definition"""
"""
Multiplies matrix `a` by matrix `b` and muls the result by scalar `c`.
The rank of input_x1 tensors must be `2`.
The rank of input_x2 tensors must be `4`.
Inputs:
- **input_x1** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, C)`.
- **input_x2** (Tensor) - The second tensor to be multiplied.
The shape of the tensor is :math:`(C1, M1, C0, M0)`.
- **input_x3** (Tensor) - The third tensor to be multiplied. The shape of the tensor if :math`(1, )`.
Outputs:
Tensor, the shape of the output tensor is :math:`(N, M)`.
Examples:
>>> input_x1 = Tensor(np.ones(shape=[256, 256]), mindspore.float16)
>>> input_x2 = Tensor(np.ones(shape=[16, 16, 16, 16]), mindspore.float16)
>>> input_x3 = Tensor(np.ones(shape=[1, ]), mindspore.float16)
>>> cusmatmulfraczrightmul = P.CusMatMulCubeFraczRightMul()
>>> output = cusmatmulfraczrightmul(input_x1, input_x2, input_x3)
"""
@prim_attr_register
def __init__(self):
"""init CusMatMulCubeFraczRightMul"""
self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y'])
from mindspore.ops._op_impl._custom_op.matmul_cube_fracz_right_mul_impl import CusMatMulCubeFraczRightMul
def get_bprop(self):
def bprop(x1, x2, x3, out, dout):
return (C.zeros_like(x1), C.zeros_like(x2), C.zeros_like(x3))
......@@ -178,6 +284,30 @@ class CusMatMulCubeFraczRightMul(PrimitiveWithInfer):
class CusMatMulCube(PrimitiveWithInfer):
"""CusMatMulCube definition"""
"""
Multiplies matrix `a` by matrix `b`.
The rank of input tensors must be `2`.
Args:
transpose_a (bool): If True, `a` is transposed before multiplication. Default: False.
transpose_b (bool): If True, `b` is transposed before multiplication. Default: False.
Inputs:
- **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, C)`. If
`transpose_a` is True, its shape should be :math:`(N, C)` after transposing.
- **input_y** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(C, M)`. If
`transpose_b` is True, its shape should be :math:`(C, M)` after transpose.
Outputs:
Tensor, the shape of the output tensor is :math:`(N, M)`.
Examples:
>>> input_x = Tensor(np.ones(shape=[256, 256]), mindspore.float16)
>>> input_y = Tensor(np.ones(shape=[256, 256]), mindspore.float16)
>>> cusmatmulcube = P.CusMatMulCube()
>>> output = matmul(input_x, input_y)
"""
@prim_attr_register
def __init__(self, transpose_a=False, transpose_b=False):
......@@ -185,7 +315,7 @@ class CusMatMulCube(PrimitiveWithInfer):
self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
self.transpose_a = transpose_a
self.transpose_b = transpose_b
from mindspore.ops._op_impl._custom_op.matmul_cube_impl import CusMatMulCube
def get_bprop(self):
def bprop(x1, x2, out, dout):
return (C.zeros_like(x1), C.zeros_like(x2))
......@@ -213,12 +343,27 @@ class CusMatMulCube(PrimitiveWithInfer):
class CusMatrixCombine(PrimitiveWithInfer):
"""CusMatrixCombine definition"""
"""
move the batch matrix to result matrix diag part.
The rank of input tensors must be `3`.
Inputs:
- **input_x** (Tensor) - The shape of the tensor is :math:`(N, D, D)`.
Outputs:
Tensor, the shape of the output tensor is :math:`(N * D, N * D)`.
Examples:
>>> input_x = Tensor(np.ones(shape=[2, 128, 128]), mindspore.float32)
>>> cusmatrixcombine = P.CusMatrixCombine()
>>> output = cusmatrixcombine(input_x)
"""
@prim_attr_register
def __init__(self):
"""init CusMatrixCombine"""
self.init_prim_io_names(inputs=['x'], outputs=['y'])
from mindspore.ops._op_impl._custom_op.matrix_combine_impl import CusMatrixCombine
def get_bprop(self):
def bprop(x, out, dout):
return (C.zeros_like(x),)
......@@ -237,12 +382,28 @@ class CusMatrixCombine(PrimitiveWithInfer):
class CusTranspose02314(PrimitiveWithInfer):
"""CusTranspose02314 definition"""
"""
Permute input tensor with perm (0, 2, 3, 1, 4)
The rank of input tensors must be `5` with format NC1HWC0.
Inputs:
- **input_x** (Tensor) - The shape of the tensor is :math:`(N, C1, H, W, C0)`.
Outputs:
Tensor, the shape of the output tensor is :math:`(N, H, W, C1, C0)`.
Examples:
>>> input_x = Tensor(np.ones(shape=[32, 1, 224, 224, 16]), mindspore.float16)
>>> custranspose02314 = P.CusTranspose02314()
>>> output = custranspose02314(input_x)
"""
@prim_attr_register
def __init__(self):
"""init CusTranspose02314"""
self.init_prim_io_names(inputs=['x1'], outputs=['y'])
from mindspore.ops._op_impl._custom_op.transpose02314_impl import CusTranspose02314
def get_bprop(self):
def bprop(x, out, dout):
return (C.zeros_like(x),)
......@@ -263,12 +424,32 @@ class CusTranspose02314(PrimitiveWithInfer):
class CusMatMulCubeDenseRight(PrimitiveWithInfer):
"""CusMatMulCubeDenseRight definition"""
"""
Multiplies matrix `a` by matrix `b`.
The rank of input_x1 tensor must be `2`.
The rank of input_x2 tensor must be `4`.
Inputs:
- **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, C)`.
- **input_y** (Tensor) - The second tensor to be multiplied.
The shape of the tensor is :math:`(C1, M1, M0, C0)`.
Outputs:
Tensor, the shape of the output tensor is :math:`(N, M)`.
Examples:
>>> input_x = Tensor(np.ones(shape=[256, 256]), mindspore.float16)
>>> input_y = Tensor(np.ones(shape=[16, 16, 16, 16]), mindspore.float16)
>>> cusmatmulcubedenseright = P.CusMatMulCubeDenseRight()
>>> output = cusmatmulcubedenseright(input_x, input_y)
"""
@prim_attr_register
def __init__(self):
"""init CusMatMulCubeDenseRight"""
self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y'])
from mindspore.ops._op_impl._custom_op.matmul_cube_dense_right_impl import CusMatMulCubeDenseRight
def get_bprop(self):
def bprop(x1, x2, x3, out, dout):
return (C.zeros_like(x1), C.zeros_like(x2), C.zeros_like(x3))
......@@ -284,12 +465,32 @@ class CusMatMulCubeDenseRight(PrimitiveWithInfer):
class CusMatMulCubeFraczLeftCast(PrimitiveWithInfer):
"""CusMatMulCubeFraczLeftCast definition"""
"""
Multiplies matrix `a` by matrix `b`.
The rank of input_x1 tensor must be `4`.
The rank of input_x2 tensors must be `2`.
Inputs:
- **input_x1** (Tensor) - The first tensor to be multiplied.
The shape of the tensor is :math:`(C1, N1, N0, C0)`.
- **input_x2** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(C, M)`.
Outputs:
Tensor, the shape of the output tensor is :math:`(N, M)`.
Examples:
>>> input_x = Tensor(np.ones(shape=[16, 16, 16, 16]), mindspore.float16)
>>> input_y = Tensor(np.ones(shape=[256, 256]), mindspore.float16)
>>> cusmatmulcubefraczleftcast = P.CusMatMulCubeFraczLeftCast()
>>> output = cusmatmulcubefraczleftcast(input_x, input_y)
"""
@prim_attr_register
def __init__(self):
"""init CusMatMulCubeFraczLeftCast"""
self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
from mindspore.ops._op_impl._custom_op.matmul_cube_fracz_left_cast_impl import CusMatMulCubeFraczLeftCast
def get_bprop(self):
def bprop(x1, x2, out, dout):
return (C.zeros_like(x1), C.zeros_like(x2))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册